Repository: gaoxiaoliangz/epub-parser Branch: master Commit: 0949d4420ccb Files: 33 Total size: 28.4 KB Directory structure: gitextract_631fmifc/ ├── .github/ │ └── workflows/ │ ├── publish-npm.yml │ └── test.yml ├── .gitignore ├── .npmignore ├── .prettierrc ├── .vscode/ │ └── settings.json ├── CHANGELOG.md ├── README.md ├── _config.yml ├── examples/ │ └── simple/ │ ├── main.js │ └── package.json ├── fixtures/ │ ├── file-1-no-toc.epub │ ├── file-1.epub │ ├── file-2.epub │ ├── file-3.epub │ ├── file-4.epub │ ├── file-e.epub │ ├── wells.epub │ └── zhihu.epub ├── jest.config.js ├── package.json ├── src/ │ ├── index.ts │ ├── mdConverters.ts │ ├── parseEpub.spec.ts │ ├── parseEpub.ts │ ├── parseHTML.spec.ts │ ├── parseHTML.ts │ ├── parseLink.ts │ ├── parseSection.ts │ ├── types.ts │ └── utils.ts ├── tsconfig.json └── tslint.json ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/publish-npm.yml ================================================ name: NPM publish on: push: tags: - 'v*' jobs: publish-npm: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - uses: actions/setup-node@v1 with: node-version: 12 registry-url: https://registry.npmjs.org/ - run: yarn - run: yarn build - run: yarn test - run: npm publish env: NODE_AUTH_TOKEN: ${{secrets.npm_token}} ================================================ FILE: .github/workflows/test.yml ================================================ name: Test on: [push, pull_request] jobs: build: runs-on: ubuntu-latest strategy: matrix: node-version: [12.x] steps: - uses: actions/checkout@v1 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 with: node-version: ${{ matrix.node-version }} - name: install, build, and test run: | yarn yarn build yarn test env: CI: true ================================================ FILE: .gitignore ================================================ node_modules lib ================================================ FILE: .npmignore ================================================ node_modules fixtures examples .vscode ================================================ FILE: .prettierrc ================================================ { "trailingComma": "all", "tabWidth": 2, "semi": false, "singleQuote": true, "printWidth": 100, "arrowParens": "always" } ================================================ FILE: .vscode/settings.json ================================================ { "debug.node.autoAttach": "on" } ================================================ FILE: CHANGELOG.md ================================================ # Changelog All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines. ## [2.0.4](https://github.com/gaoxiaoliangz/epub-parser/compare/v2.0.3...v2.0.4) (2021-07-26) ## [2.0.3](https://github.com/gaoxiaoliangz/epub-parser/compare/v2.0.2...v2.0.3) (2020-07-02) ### Bug Fixes * fix issue [#18](https://github.com/gaoxiaoliangz/epub-parser/issues/18) ([d979ff4](https://github.com/gaoxiaoliangz/epub-parser/commit/d979ff46b4dee8247af2e363f646690316505e43)) # Changelog ## v2.0.2 (2019-11-22) - fixed ================================================ FILE: README.md ================================================ # 📖 epub-parser > A powerful yet easy-to-use epub parser [![npm version](https://badge.fury.io/js/%40gxl%2Fepub-parser.svg)](https://badge.fury.io/js/%40gxl%2Fepub-parser) ![Test](https://github.com/gaoxiaoliangz/epub-parser/workflows/Test/badge.svg) The package exports a simple parser function which use epub file as input and output JavaScript object. As it is written in TypeScript, types are already included in the package. ## Install ```bash npm install @gxl/epub-parser --save ``` or if you prefer yarn ```bash yarn add @gxl/epub-parser ``` ## Usage ```js import { parseEpub } from '@gxl/epub-parser' const epubObj = await parseEpub('/path/to/file.epub', { type: 'path', }) console.log('epub content:', epubObj) ``` ### parseEpub(target: string | buffer, options?: object): EpubObject #### target type: `string` or `buffer` It can be the path to the file or file's binary string or buffer #### options type: `object` ##### type(optional): 'binaryString' | 'path' | 'buffer' It forces the parser to treat supplied target as the defined type, if not defined the parser itself will decide how to treat the file (useful when you are not sure if the path is valid). #### EpubObject The output is an object which contains `structure`, `sections`, `info`(private property names start with `_`. I don't recommend using them, since they are subscribed to change). `structure` is the parsed `toc` of epub file, they contain information about how the book is constructed. `sections` is an array of chapters or sections under chapters, they are referred in `structure`. Each section object contains the raw html string and a few handy methods. - `Section.prototype.toMarkdown`: convert to markdown object. - `Section.prototype.toHtmlObjects`: convert to html object. And a note about `src` and `href`, the `src` and `href` in raw html stay untouched, but the `toHtmlObjects` method resolves `src` to base64 string, and alters `href` so that they make sense in the parsed epub. And the parsed `href` is something like `#{sectionId},{hash}`. ## How to contribute - Raise an issue in the issue section. - PRs are the best. ❤️ ================================================ FILE: _config.yml ================================================ theme: jekyll-theme-merlot ================================================ FILE: examples/simple/main.js ================================================ // @ts-check const { parseEpub } = require('../../lib') parseEpub('../../fixtures/zhihu.epub').then(result => { console.log('result object has keys: ', Object.keys(result)) console.log('book info', result.info) console.log('book structure', result.structure) console.log('the book has', result.sections.length, 'sections') console.log('here is first section') const showSection = idx => { console.log(`-------- section index ${idx} --------`) console.log(result.sections[idx]) console.log('toMarkdown') console.log(result.sections[idx].toMarkdown()) console.log('toHtmlObjects') const htmlObjects = result.sections[idx].toHtmlObjects() console.log(htmlObjects) } showSection(2) // this section contains images which are converted to base64 // showSection(4) }) ================================================ FILE: examples/simple/package.json ================================================ { "name": "simple", "version": "1.0.0", "description": "", "scripts": { "start": "node main.js", "debug": "node --inspect main.js" }, "author": "gaoxiaoliangz", "license": "ISC" } ================================================ FILE: fixtures/file-e.epub ================================================ pretend to be one hehe ================================================ FILE: jest.config.js ================================================ module.exports = { preset: 'ts-jest', testEnvironment: 'node', modulePathIgnorePatterns: ['lib'], } ================================================ FILE: package.json ================================================ { "name": "@gxl/epub-parser", "version": "2.0.4", "description": "A powerful yet easy-to-use epub parser", "main": "lib/index.js", "scripts": { "prebuild": "yarn clean", "build": "tsc", "watch": "tsc --watch", "clean": "rimraf lib", "format": "prettier --write \"src/**/*.{js,jsx,ts,tsx,json,md,css,scss}\"", "test": "jest", "test-debug": "node --inspect-brk -r ts-node/register node_modules/.bin/jest --runInBand", "v": "standard-version --preset angular", "postv": "git push --follow-tags origin master", "prepare": "yarn build" }, "repository": { "type": "git", "url": "git+https://github.com/gaoxiaoliangz/epub-parser.git" }, "keywords": [ "epub-parser", "parser", "epub", "easy", "book", "file" ], "author": "gaoxiaoliangz", "license": "MIT", "bugs": { "url": "https://github.com/gaoxiaoliangz/epub-parser/issues" }, "homepage": "https://github.com/gaoxiaoliangz/epub-parser#readme", "dependencies": { "jsdom": "^15.1.1", "lodash": "^4.17.15", "node-zip": "^1.1.1", "to-markdown": "^3.1.1", "xml2js": "^0.4.19" }, "devDependencies": { "@types/express": "^4.17.1", "@types/jest": "^24.0.18", "@types/jsdom": "^12.2.4", "@types/lodash": "^4.14.137", "@types/node": "^12.7.2", "@types/xml2js": "^0.4.4", "cross-env": "^5.2.0", "dotenv": "^8.1.0", "express": "^4.17.1", "jest": "^24.9.0", "prettier": "^2.0.5", "rimraf": "^3.0.0", "source-map-support": "^0.5.13", "standard-version": "^8.0.0", "ts-jest": "^24.0.2", "ts-node": "^8.3.0", "tslint": "^5.19.0", "typescript": "^3.9.6", "vrsource-tslint-rules": "^6.0.0" } } ================================================ FILE: src/index.ts ================================================ import parseEpub from './parseEpub' import parseLink from './parseLink' import parseHTML from './parseHTML' export { parseLink, parseHTML, parseEpub } ================================================ FILE: src/mdConverters.ts ================================================ import parseLink from './parseLink' export const resolveInlineNavHref = (href: string) => { if (href && href.indexOf('http://') === -1) { const parsed = parseLink(href) if (parsed.hash) { return `#${parsed.name}$${parsed.hash}` } return `#${parsed.name}` } return href } export const h = { filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'], replacement: function (innerHTML: string, node: HTMLElement) { let hLevel = node.tagName.charAt(1) as any let hPrefix = '' for (let i = 0; i < hLevel; i++) { hPrefix += '#' } // return `\n${hPrefix} ${innerHTML.trim()}\n\n` const hTag = node.tagName.toLowerCase() const id = node.getAttribute('id') if (!id) { return `\n${hPrefix} ${innerHTML}\n\n` } // 块级元素若保留原标签需添加换行符,否则临近元素渲染会出现问题 return `\n<${hTag} id="${id}">${innerHTML.trim().split('\n').join(' ')}\n\n` }, } export const span = { filter: ['span'], replacement: function (innerHTML: string) { return innerHTML }, } export const a = { filter: ['a'], replacement: function (innerHTML: string, node: HTMLEmbedElement) { const href = node.getAttribute('href') return `\n[${innerHTML}](${resolveInlineNavHref(href!)})\n\n` }, } export const div = { filter: ['div'], replacement: function (innerHTML: string) { return `\n${innerHTML}\n\n` }, } export const img = { filter: ['img'], replacement: function (innerHTML: string) { return `\n[PIC]\n\n` }, } ================================================ FILE: src/parseEpub.spec.ts ================================================ import parser from './parseEpub' import _ from 'lodash' import * as path from 'path' const baseDir = process.cwd() const filesToBeTested = ['file-1', 'file-2', 'file-3', 'file-4', 'file-1-no-toc', 'wells'] const testFile = (filename: string) => { describe(`parser 测试 ${filename}.epub`, () => { const fileContent = parser(path.join(baseDir, `fixtures/${filename}.epub`), { type: 'path', expand: true, }) test('Result should have keys', async () => { const keys = _.keys(await fileContent) expect(keys.length).not.toBe(0) }) test('toc', async () => { const result = await fileContent if (filename === 'file-1-no-toc') { expect(result.structure).toBe(undefined) } else { expect(fileContent && typeof fileContent).toBe('object') } }) // it('key 分别为: flesh, nav, meta', done => { // const expectedKeys = ['flesh', 'nav', 'meta'] // fileContent.then(result => { // const keys = _.keys(result) // keys.forEach(key => { // expect(expectedKeys.indexOf(key)).to.not.be(-1) // }) // done() // }) // }) }) } filesToBeTested.forEach((filename) => { testFile(filename) }) ================================================ FILE: src/parseEpub.ts ================================================ import fs from 'fs' import xml2js from 'xml2js' import _ from 'lodash' // @ts-ignore import nodeZip from 'node-zip' import parseLink from './parseLink' import parseSection, { Section } from './parseSection' import { GeneralObject } from './types' const xmlParser = new xml2js.Parser() const xmlToJs = (xml: string) => { return new Promise((resolve, reject) => { xmlParser.parseString(xml, (err: Error, object: GeneralObject) => { if (err) { reject(err) } else { resolve(object) } }) }) } const determineRoot = (opfPath: string) => { let root = '' // set the opsRoot for resolving paths if (opfPath.match(/\//)) { // not at top level root = opfPath.replace(/\/([^\/]+)\.opf/i, '') if (!root.match(/\/$/)) { // 以 '/' 结尾,下面的 zip 路径写法会简单很多 root += '/' } if (root.match(/^\//)) { root = root.replace(/^\//, '') } } return root } const parseMetadata = (metadata: GeneralObject[]) => { const title = _.get(metadata[0], ['dc:title', 0]) as string let author = _.get(metadata[0], ['dc:creator', 0]) as string if (typeof author === 'object') { author = _.get(author, ['_']) as string } const publisher = _.get(metadata[0], ['dc:publisher', 0]) as string const meta = { title, author, publisher, } return meta } export class Epub { private _zip: any // nodeZip instance private _opfPath?: string private _root?: string private _content?: GeneralObject private _manifest?: any[] private _spine?: string[] // array of ids defined in manifest private _toc?: GeneralObject private _metadata?: GeneralObject structure?: GeneralObject info?: { title: string author: string publisher: string } sections?: Section[] constructor(buffer: Buffer) { this._zip = new nodeZip(buffer, { binary: true, base64: false, checkCRC32: true }) } resolve( path: string, ): { asText: () => string } { let _path if (path[0] === '/') { // use absolute path, root is zip root _path = path.substr(1) } else { _path = this._root + path } const file = this._zip.file(decodeURI(_path)) if (file) { return file } else { throw new Error(`${_path} not found!`) } } async _resolveXMLAsJsObject(path: string) { const xml = this.resolve(path).asText() return xmlToJs(xml) } private async _getOpfPath() { const container = await this._resolveXMLAsJsObject('/META-INF/container.xml') const opfPath = container.container.rootfiles[0].rootfile[0]['$']['full-path'] return opfPath } _getManifest(content: GeneralObject) { return _.get(content, ['package', 'manifest', 0, 'item'], []).map( (item: any) => item.$, ) as any[] } _resolveIdFromLink(href: string) { const { name: tarName } = parseLink(href) const tarItem = _.find(this._manifest, (item) => { const { name } = parseLink(item.href) return name === tarName }) return _.get(tarItem, 'id') } _getSpine() { return _.get(this._content, ['package', 'spine', 0, 'itemref'], []).map( (item: GeneralObject) => { return item.$.idref }, ) } _genStructureForHTML(tocObj: GeneralObject) { const tocRoot = tocObj.html.body[0].nav[0]['ol'][0].li; let runningIndex = 1; const parseHTMLNavPoints = (navPoint: GeneralObject) => { const element = navPoint.a[0] || {}; const path = element['$'].href; let name = element['_']; const prefix = element.span; if (prefix) { name = `${prefix.map((p: GeneralObject) => p['_']).join('')}${name}`; } const sectionId = this._resolveIdFromLink(path); const { hash: nodeId } = parseLink(path) const playOrder = runningIndex; let children = navPoint?.ol?.[0]?.li; if (children) { children = parseOuterHTML(children); } runningIndex++; return { name, sectionId, nodeId, path, playOrder, children, }; }; const parseOuterHTML = (collection: GeneralObject[]) => { return collection.map((point) => { return parseHTMLNavPoints(point); }); } return parseOuterHTML(tocRoot); } _genStructure(tocObj: GeneralObject, resolveNodeId = false) { if (tocObj.html) { return this._genStructureForHTML(tocObj); } const rootNavPoints = _.get(tocObj, ['ncx', 'navMap', '0', 'navPoint'], []) const parseNavPoint = (navPoint: GeneralObject) => { // link to section const path = _.get(navPoint, ['content', '0', '$', 'src'], '') const name = _.get(navPoint, ['navLabel', '0', 'text', '0']) const playOrder = _.get(navPoint, ['$', 'playOrder']) as string const { hash: nodeId } = parseLink(path) let children = navPoint.navPoint if (children) { // tslint:disable-next-line:no-use-before-declare children = parseNavPoints(children) } const sectionId = this._resolveIdFromLink(path) return { name, sectionId, nodeId, path, playOrder, children, } } const parseNavPoints = (navPoints: GeneralObject[]) => { return navPoints.map((point) => { return parseNavPoint(point) }) } return parseNavPoints(rootNavPoints) } _resolveSectionsFromSpine(expand = false) { // no chain return _.map(_.union(this._spine), (id) => { const path = _.find(this._manifest, { id }).href const html = this.resolve(path).asText() return parseSection({ id, htmlString: html, resourceResolver: this.resolve.bind(this), idResolver: this._resolveIdFromLink.bind(this), expand, }) }) } async parse(expand = false) { const opfPath = await this._getOpfPath() this._root = determineRoot(opfPath) const content = await this._resolveXMLAsJsObject('/' + opfPath) const manifest = this._getManifest(content) const metadata = _.get(content, ['package', 'metadata'], []) const tocID = _.get(content, ['package', 'spine', 0, '$', 'toc'], 'toc.xhtml'); // https://github.com/gaoxiaoliangz/epub-parser/issues/13 // https://www.w3.org/publishing/epub32/epub-packages.html#sec-spine-elem const tocPath = (_.find(manifest, { id: tocID }) || {}).href if (tocPath) { const toc = await this._resolveXMLAsJsObject(tocPath) this._toc = toc this.structure = this._genStructure(toc) } this._manifest = manifest this._content = content this._opfPath = opfPath this._spine = this._getSpine() this._metadata = metadata this.info = parseMetadata(metadata) this.sections = this._resolveSectionsFromSpine(expand) return this } } export interface ParserOptions { type?: 'binaryString' | 'path' | 'buffer' expand?: boolean } export default function parserWrapper(target: string | Buffer, options: ParserOptions = {}) { // seems 260 is the length limit of old windows standard // so path length is not used to determine whether it's path or binary string // the downside here is that if the filepath is incorrect, it will be treated as binary string by default // but it can use options to define the target type const { type, expand } = options let _target = target if (type === 'path' || (typeof target === 'string' && fs.existsSync(target))) { _target = fs.readFileSync(target as string, 'binary') } return new Epub(_target as Buffer).parse(expand) } ================================================ FILE: src/parseHTML.spec.ts ================================================ import parseHTML from './parseHTML' import _ from 'lodash' describe('parseHTML1', () => { it('unwrap tag in unwrap tag situation', () => { const result = parseHTML(`

李剑波[21]用他的创业经历告诉你:你的创业方向离不开你决定创业那一刻之前的人生积累,尤其是你的职业生涯的积累。

如果你的积累是工程师,我觉得你选择从解决问题的角度去创业是比较合适的。这个问题也应该是你自己本身需要解决的。更重要的是,你要多跟那些已经在创业的、创业小有所成的、创业失败的人去聊天。聊他们的项目,他们的产品,他们从0到1是怎么过来的。我创业之前聊过的朋友有:做手机做到上亿规模的,代理火控雷达做到千万规模的,做互联网品牌做到百万规模的,做二维码的,做电子商务做失败的,也有做到一年几十万规模的,还有做传统生意的。如果你足够有悟性,相信你能够从中找到你的创业方向的。

`) expect(JSON.stringify(result)).toBe( `[{"tag":"p","type":1,"children":[{"type":3,"text":"李剑波"},{"tag":"sup","type":1,"children":[{"tag":"a","type":1,"children":[{"type":3,"text":"[21]"}],"attrs":{"href":"../Text/part0006_split_001.html#note21n","id":"note21"}}],"attrs":{}},{"type":3,"text":"用他的创业经历告诉你:"},{"type":3,"text":"你的创业方向离不开你决定创业那一刻之前的人生积累,尤其是你的职业生涯的积累。"}],"attrs":{}},{"tag":"p","type":1,"children":[{"type":3,"text":"如果你的积累是工程师,我觉得你选择从解决问题的角度去创业是比较合适的。这个问题也应该是你自己本身需要解决的。更重要的是,你要多跟那些已经在创业的、创业小有所成的、创业失败的人去聊天。聊他们的项目,他们的产品,他们从0到1是怎么过来的。我创业之前聊过的朋友有:做手机做到上亿规模的,代理火控雷达做到千万规模的,做互联网品牌做到百万规模的,做二维码的,做电子商务做失败的,也有做到一年几十万规模的,还有做传统生意的。如果你足够有悟性,相信你能够从中找到你的创业方向的。"}],"attrs":{}}]`, ) }) }) ================================================ FILE: src/parseHTML.ts ================================================ import { JSDOM } from 'jsdom' import _ from 'lodash' import { traverseNestedObject } from './utils' import { HtmlNodeObject, GeneralObject } from './types' const OMITTED_TAGS = ['head', 'input', 'textarea', 'script', 'style', 'svg'] const UNWRAP_TAGS = ['body', 'html', 'div', 'span'] const PICKED_ATTRS = ['href', 'src', 'id'] /** * recursivelyReadParent * @param node * @param callback invoke every time a parent node is read, return truthy value to stop the reading process * @param final callback when reaching the root */ const recursivelyReadParent = ( node: GeneralObject, callback: (node: GeneralObject) => GeneralObject | null, final?: () => GeneralObject, ) => { const _read = (_node: GeneralObject): GeneralObject => { const parent = _node.parentNode if (parent) { const newNode = callback(parent) if (!newNode) { return _read(parent) } return newNode } else { if (final) { return final() } return node } } return _read(node) } export interface ParseHTMLConfig { resolveSrc?: (src: string) => string resolveHref?: (href: string) => string } const parseHTML = (HTMLString: string, config: ParseHTMLConfig = {}) => { const rootNode = new JSDOM(HTMLString).window.document.documentElement const { resolveHref, resolveSrc } = config // initial parse return traverseNestedObject(rootNode, { childrenKey: 'childNodes', preFilter(node) { return node.nodeType === 1 || node.nodeType === 3 }, transformer(node, children) { if (node.nodeType === 1) { const tag = node.tagName.toLowerCase() const attrs: GeneralObject = {} if (OMITTED_TAGS.indexOf(tag) !== -1) { return null } if (UNWRAP_TAGS.indexOf(tag) !== -1 && children) { return children.length === 1 ? children[0] : children } PICKED_ATTRS.forEach((attr) => { let attrVal = node.getAttribute(attr) || undefined if (attrVal && attr === 'href' && resolveHref) { attrVal = resolveHref(attrVal) } if (attrVal && attr === 'src' && resolveSrc) { attrVal = resolveSrc(attrVal) } attrs[attr] = attrVal }) return { tag, type: 1, children, attrs } } else { const text = node.textContent.trim() if (!text) { return null } const makeTextObject = () => { return { type: 3, text, } } // find the closest parent which is not in UNWRAP_TAGS // if failed then wrap with p tag return recursivelyReadParent( node, (parent) => { const tag = parent.tagName && parent.tagName.toLowerCase() if (!tag || UNWRAP_TAGS.indexOf(tag) !== -1) { return null } return makeTextObject() }, () => { return { tag: 'p', children: [makeTextObject()], } }, ) } }, postFilter(node) { return !_.isEmpty(node) }, }) as HtmlNodeObject[] } export default parseHTML ================================================ FILE: src/parseLink.ts ================================================ import _ from 'lodash' export default function parseHref(href: string) { const hash = href.split('#')[1] const url = href.split('#')[0] const prefix = url.split('/').slice(0, -1).join('/') const filename = _.last(url.split('/')) as string const name = filename.split('.').slice(0, -1).join('.') let ext = _.last(filename.split('.')) if (filename.indexOf('.') === -1) { ext = '' } return { hash, name, ext, prefix, url } } ================================================ FILE: src/parseSection.ts ================================================ import path from 'path' // @ts-ignore import toMarkdown from 'to-markdown' import parseLink from './parseLink' import parseHTML from './parseHTML' import * as mdConverters from './mdConverters' import { HtmlNodeObject } from './types' const isInternalUri = (uri: string) => { return uri.indexOf('http://') === -1 && uri.indexOf('https://') === -1 } export type ParseSectionConfig = { id: string htmlString: string resourceResolver: (path: string) => any idResolver: (link: string) => string expand: boolean } export class Section { id: string htmlString: string htmlObjects?: HtmlNodeObject[] private _resourceResolver?: (path: string) => any private _idResolver?: (link: string) => string constructor({ id, htmlString, resourceResolver, idResolver, expand }: ParseSectionConfig) { this.id = id this.htmlString = htmlString this._resourceResolver = resourceResolver this._idResolver = idResolver if (expand) { this.htmlObjects = this.toHtmlObjects?.() } } toMarkdown?() { return toMarkdown(this.htmlString, { converters: [ mdConverters.h, mdConverters.span, mdConverters.div, mdConverters.img, mdConverters.a, ], }) } toHtmlObjects?() { return parseHTML(this.htmlString, { resolveHref: (href) => { if (isInternalUri(href)) { const { hash } = parseLink(href) // todo: what if a link only contains hash part? const sectionId = this._idResolver?.(href) if (hash) { return `#${sectionId},${hash}` } return `#${sectionId}` } return href }, resolveSrc: (src) => { if (isInternalUri(src)) { // todo: may have bugs const absolutePath = path.resolve('/', src).substr(1) const buffer = this._resourceResolver?.(absolutePath)?.asNodeBuffer() const base64 = buffer.toString('base64') return `data:image/png;base64,${base64}` } return src }, }) } } const parseSection = (config: ParseSectionConfig) => { return new Section(config) } export default parseSection ================================================ FILE: src/types.ts ================================================ export interface GeneralObject { [key: string]: any } export interface HtmlNodeObject { tag?: string type: 1 | 3 text?: string children?: HtmlNodeObject[] attrs: { id: string href: string src: string } } ================================================ FILE: src/utils.ts ================================================ import _ from 'lodash' import { GeneralObject } from './types' export interface TraverseNestedObject { preFilter?: (node: GeneralObject) => boolean postFilter?: (node: GeneralObject) => boolean // children must be returned from transformer // or it may not work as expected transformer?: (node: GeneralObject, children?: GeneralObject[]) => any finalTransformer?: (node: GeneralObject) => any childrenKey: string } /** * traverseNestedObject * a note about config.transformer * `children` is a recursively transformed object and should be returned for transformer to take effect * objects without `children` will be transformed by finalTransformer * @param _rootObject * @param config */ export const traverseNestedObject = ( _rootObject: Object | Object[], config: TraverseNestedObject, ) => { const { childrenKey, transformer, preFilter, postFilter, finalTransformer } = config if (!_rootObject) { return [] } const traverse = (rootObject: any | any[]): any[] => { const makeArray = () => { if ( Array.isArray(rootObject) || _.isArrayLikeObject(rootObject) || _.isArrayLike(rootObject) ) { return rootObject } return [rootObject] } const rootArray = makeArray() let result = rootArray if (preFilter) { result = _.filter(result, preFilter) } result = _.map(result, (object, index) => { if (object[childrenKey]) { const transformedChildren = traverse(object[childrenKey]) // in parseHTML, if a tag is in unwrap list, like aaabbb // the result needs to be flatten const children = _.isEmpty(transformedChildren) ? undefined : _.flattenDeep(transformedChildren) if (transformer) { return transformer(object, children) } return { ...object, ...{ [childrenKey]: children, }, } } if (finalTransformer) { return finalTransformer(object) } return object }) if (postFilter) { result = _.filter(result, postFilter) } return result } return _.flattenDeep(traverse(_rootObject)) } ================================================ FILE: tsconfig.json ================================================ { "compilerOptions": { "target": "es5", "lib": [ "es6", "dom" ], "module": "commonjs", "moduleResolution": "node", "experimentalDecorators": true, "emitDecoratorMetadata": true, "outDir": "lib", "sourceMap": true, "declaration": true, "allowJs": false, "jsx": "react", "allowSyntheticDefaultImports": true, "esModuleInterop": true, "preserveWatchOutput": true, "strict": true }, "include": [ "src" ] } ================================================ FILE: tslint.json ================================================ { "rulesDirectory": ["node_modules/vrsource-tslint-rules/rules"], "rules": { "class-name": false, "comment-format": [ true, "check-space" ], "indent": [ true, "spaces" ], "no-duplicate-variable": true, "no-eval": true, "no-internal-module": true, "no-trailing-whitespace": false, "no-var-keyword": true, "one-line": [ true, "check-open-brace", "check-whitespace" ], "quotemark": [ true, "single", "jsx-double" ], "semicolon": [ true, "never" ], "triple-equals": [ true, "allow-null-check" ], "typedef-whitespace": [ true, { "call-signature": "nospace", "index-signature": "nospace", "parameter": "nospace", "property-declaration": "nospace", "variable-declaration": "nospace" } ], "variable-name": [ true, "ban-keywords" ], "whitespace": [ true, "check-branch", "check-decl", "check-operator", "check-separator", "check-type" ], "no-shadowed-variable": true, "no-unused-expression": true, "no-use-before-declare": true, "no-unused-variable": [ true, { "ignore-pattern": ["^_|React"] } ], "one-variable-per-declaration": [true, "ignore-for-loop"], "no-console": [true, "log"], // from plugin "no-param-reassign": true } }