Repository: ChanceYu/front-end-rss Branch: master Commit: 0c0f6de45d5b Files: 112 Total size: 11.5 MB Directory structure: gitextract_01fb6mri/ ├── .github/ │ └── workflows/ │ └── server.yml ├── .gitignore ├── README.md ├── TAGS.md ├── article-to-md/ │ ├── .gitignore │ ├── README.md │ ├── package.json │ └── src/ │ ├── check-cloud.js │ ├── images.js │ ├── index.js │ ├── once.js │ ├── processor.js │ ├── rules/ │ │ ├── fenghen.js │ │ ├── index.js │ │ ├── javascriptweekly.js │ │ ├── nodeweekly.js │ │ ├── ruanyifeng.js │ │ ├── weixin.js │ │ └── zhangxinxu.js │ ├── server.js │ ├── stealth.js │ ├── upload.js │ └── utils.js ├── data/ │ ├── atom.xml │ ├── deleted.json │ ├── hotwords.json │ ├── links.json │ ├── processed.json │ ├── rss.json │ └── tags.json ├── details/ │ ├── JavaScript-Weekly.md │ ├── Node-Weekly.md │ ├── Nodejs技术栈.md │ ├── iCSS前端趣闻.md │ ├── tags/ │ │ ├── ai.md │ │ ├── audio-video.md │ │ ├── browser.md │ │ ├── canvas-image.md │ │ ├── css.md │ │ ├── dev-desktop.md │ │ ├── dev-game.md │ │ ├── dev-mobile.md │ │ ├── front-end-advanced.md │ │ ├── git-svn.md │ │ ├── html.md │ │ ├── javascript.md │ │ ├── job-interview.md │ │ ├── miniprogram.md │ │ ├── nodejs.md │ │ ├── optimization.md │ │ ├── other.md │ │ ├── pack-build.md │ │ ├── react.md │ │ ├── server.md │ │ ├── typescript.md │ │ └── vue.md │ ├── 凹凸实验室.md │ ├── 前端之巅.md │ ├── 前端从进阶到入院.md │ ├── 前端侦探.md │ ├── 前端大全.md │ ├── 前端技术优选.md │ ├── 前端早读课.md │ ├── 前端精读评论.md │ ├── 字节前端-ByteFE.md │ ├── 张鑫旭-鑫空间-鑫生活.md │ ├── 淘系前端团队.md │ ├── 程序员成长指北.md │ ├── 阮一峰的网络日志.md │ └── 风痕·術&思.md ├── server/ │ ├── app.js │ ├── dedupe-links.js │ ├── feed.js │ ├── fetch.js │ ├── once.js │ ├── package.json │ ├── update.js │ ├── utils.js │ └── writemd.js ├── site/ │ ├── .babelrc │ ├── .editorconfig │ ├── .eslintignore │ ├── .eslintrc.js │ ├── .gitignore │ ├── .npmrc │ ├── .postcssrc.js │ ├── README.md │ ├── build/ │ │ ├── build.js │ │ ├── check-versions.js │ │ ├── createFiles.js │ │ ├── data.js │ │ ├── template-parameters.js │ │ ├── upload.js │ │ ├── utils.js │ │ ├── vue-loader.conf.js │ │ ├── webpack.base.conf.js │ │ ├── webpack.dev.conf.js │ │ └── webpack.prod.conf.js │ ├── config/ │ │ ├── dev.env.js │ │ ├── index.js │ │ └── prod.env.js │ ├── index.html │ ├── package.json │ ├── src/ │ │ ├── App.vue │ │ ├── components/ │ │ │ ├── Index.vue │ │ │ └── MarkdownViewer.vue │ │ ├── main.js │ │ └── router/ │ │ └── index.js │ └── static/ │ └── .gitkeep └── templates/ ├── DETAILS.md ├── README.md └── TAGS.md ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/server.yml ================================================ name: RSS Runner run-name: 'Runner #${{ github.run_number }} ${{ inputs.suffix }}' on: workflow_dispatch: inputs: suffix: description: 'Input suffix' required: false type: string schedule: - cron: '0 */2 * * *' jobs: build: if: github.repository == 'ChanceYu/front-end-rss' runs-on: ubuntu-latest permissions: contents: write steps: - uses: actions/checkout@v4 - name: Setup pnpm uses: pnpm/action-setup@v2 with: version: 9 run_install: false - name: Use node.js uses: actions/setup-node@v4 with: node-version: 24.x cache: pnpm cache-dependency-path: server/pnpm-lock.yaml - name: Get pnpm cache directory id: pnpm-store-path shell: bash run: echo "dir=$(pnpm store path)" >> ${GITHUB_OUTPUT} - name: Cache modules id: pnpm-cache uses: actions/cache@v3 with: path: | ${{ steps.pnpm-store-path.outputs.dir }} ~/.cache/ms-playwright key: ${{ runner.os }}-pnpm-20260303-${{ hashFiles('**/pnpm-lock.yaml') }} restore-keys: | ${{ runner.os }}-pnpm-20260303- - name: Install and run server working-directory: ./server env: WORKFLOW: true RSS_CONFIG: ${{ secrets.RSS_CONFIG }} run: | pnpm install pnpm run once - name: Process new articles to Markdown continue-on-error: true working-directory: ./article-to-md env: QINIU_ACCESS_KEY: ${{ secrets.QINIU_ACCESS_KEY }} QINIU_SECRET_KEY: ${{ secrets.QINIU_SECRET_KEY }} run: | if [ -f "../server/node_modules/new-articles.json" ]; then pnpm install pnpm exec playwright install chromium pnpm run once else echo "No new articles to process, skipping" fi - name: Commit all changed files id: auto-commit-action uses: stefanzweifel/git-auto-commit-action@v5 with: commit_message: update by action runner https://github.com/ChanceYu/front-end-rss/actions/runs/${{ github.run_id }} - name: Publish site if: steps.auto-commit-action.outputs.changes_detected == 'true' working-directory: ./site run: | pnpm install pnpm run build pnpm add -g surge surge ./dist front-end-rss.surge.sh --token ${{ secrets.SURGE_TOKEN }} ================================================ FILE: .gitignore ================================================ .DS_Store .AppleDouble .LSOverride .svn ._* .Spotlight-V100 .Trashes Thumbs.db ehthumbs.db Desktop.ini $RECYCLE.BIN/ .idea *.sublime-* .vscode node_modules *.js.map .vercel dist server/.env data/articles ================================================ FILE: README.md ================================================

/ → real newline text node
document.querySelectorAll('pre, code').forEach((el) => {
el.querySelectorAll('br').forEach((br) => {
br.replaceWith(document.createTextNode('\n'))
})
})
// 2. Reconstruct from line-per-element patterns.
// Covers WeChat's own code block (each line is a inside )
// and other editors that wrap every line in a block element.
// Skip if the already carries an hljs class — in that case the
// child elements are syntax-highlight tokens, not line containers.
document.querySelectorAll('pre').forEach((pre) => {
if (pre.classList.contains('hljs')) return
const code = pre.querySelector('code') ?? pre
if (code.classList.contains('hljs')) return
const lines = code.querySelectorAll(
':scope > span, :scope > p, :scope > div, :scope > li',
)
// Only rewrite when there are multiple line elements and the
// current textContent has no real newlines (i.e. it IS collapsed)
if (lines.length > 1 && !code.textContent.includes('\n')) {
const text = [...lines].map((l) => l.textContent).join('\n')
code.textContent = text
}
})
})
// Extract inner HTML of the content element(s) + log img situation.
// When contentSelector is an array, each matched element's innerHTML is
// concatenated in order and separated by a blank line.
// If an img's displayed width >= 30% of content container width, mark data-rss-block-img so it renders on its own line in Markdown.
const { contentHtml, contentImgs } = await page.evaluate((selectors) => {
const els = selectors
.map((sel) => document.querySelector(sel))
.filter(Boolean)
const imgSource = els.length ? els : [document.body]
// If img is inside strong/b, and every node from img up to that strong has only one child (max 4 levels), unwrap so we don't get **![]()** in Markdown
imgSource.forEach((el) => {
const imgs = [...el.querySelectorAll('img')]
imgs.forEach((img) => {
let p = img.parentNode
for (let level = 0; level < 4 && p; level++) {
if (p.children.length !== 1) break
if (p.nodeType === 1 && (p.tagName === 'STRONG' || p.tagName === 'B')) {
const parent = p.parentNode
if (parent) {
while (p.firstChild) {
parent.insertBefore(p.firstChild, p)
}
parent.removeChild(p)
}
break
}
p = p.parentNode
}
})
})
const blockImageRatio = 0.3
imgSource.forEach((el) => {
const contentWidth = el.getBoundingClientRect().width
el.querySelectorAll('img').forEach((img) => {
const displayWidth = img.getBoundingClientRect().width
if (contentWidth > 0 && displayWidth >= contentWidth * blockImageRatio) {
img.setAttribute('data-rss-block-img', '1')
}
})
})
const html = els.length
? els.map((el) => el.innerHTML).join('\n')
: document.body.innerHTML
const imgs = imgSource.flatMap((el) =>
[...el.querySelectorAll('img')].map((img) => ({
src: img.getAttribute('src') ?? '',
}))
)
return { contentHtml: html, contentImgs: imgs }
}, contentSelectors)
// Use cheerio to clean up the extracted HTML
const $ = cheerio.load(contentHtml)
// Remove excluded elements
;['script', 'style', ...(rule.excludeSelectors || [])].forEach((sel) => $(sel).remove())
// Normalize image src to absolute URLs using the article page as base.
// Handles protocol-relative (//cdn…) and relative (/path or ../path) srcs.
$('img[src]').each((_, el) => {
const src = $(el).attr('src')
if (src && !src.startsWith('http')) {
try {
$(el).attr('src', new URL(src, link).href)
} catch {}
}
})
// Convert elements before Turndown runs (turndown-plugin-gfm uses
// browser-only table.rows / tr.cells which don't exist in Node.js DOM).
//
// Strategy:
// - Data table (has or ) → convert to GFM Markdown table
// - Layout table (no headers) → unwrap, keep cell content as-is
//
// Process from deepest nesting level first so inner tables are resolved
// before their parent tables are evaluated.
const tableMd = new Map() // placeholder → markdown string
let tableIdx = 0
const sortedTables = $('table').toArray().sort(
(a, b) => $(b).parents('table').length - $(a).parents('table').length
)
for (const table of sortedTables) {
const $table = $(table)
const hasExplicitHeader = $table.find('> * th, > * > * th, thead').length > 0
// Collect all rows first so we can check dimensions for implicit data tables
const rows = []
$table.find('tr').each((_, tr) => {
const cells = $(tr).children('th, td').map((_, cell) => {
return $(cell)
.text()
.replace(/[\n\r]+/g, ' ')
.replace(/\s{2,}/g, ' ')
.replace(/\|/g, '\\|')
.trim() || ' '
}).get()
if (cells.length) rows.push(cells)
})
// A table with 2+ columns and 2+ rows is treated as a data table even
// when it has no explicit / markup (first row becomes header).
// Single-column or single-row tables without headers are layout tables.
const maxCols = rows.length ? Math.max(...rows.map((r) => r.length)) : 0
const isDataTable = hasExplicitHeader || (rows.length >= 2 && maxCols >= 2)
if (!isDataTable) {
// Layout table: unwrap each / content, discard table chrome
$table.find('tr').each((_, tr) => {
// Separate cells with a newline so content doesn't run together
const cellsHtml = $(tr).children('td, th').map((_, cell) => $(cell).html()).get().join('\n')
$(tr).replaceWith(cellsHtml + '\n')
})
$table.replaceWith($table.html() ?? '')
continue
}
if (!rows.length) { $table.remove(); continue }
const cols = maxCols
const normalised = rows.map((r) => {
while (r.length < cols) r.push(' ')
return r.slice(0, cols)
})
// Determine header row:
// 1. explicit row
// 2. first row whose cells are all
// 3. no explicit header → use first row as implicit header
const headRowIdx = (() => {
const theadRow = $table.find('thead tr').first()
if (theadRow.length) {
let idx = 0
$table.find('tr').each((i, tr) => { if (tr === theadRow[0]) { idx = i } })
return idx
}
const firstTr = $table.find('tr').first()
if (firstTr.find('th').length) return 0
// No explicit header: treat first row as header
return 0
})()
const header = normalised[headRowIdx]
const body = normalised.filter((_, i) => i !== headRowIdx)
const mdLines = [
'| ' + header.join(' | ') + ' |',
'| ' + header.map(() => '---').join(' | ') + ' |',
...body.map((row) => '| ' + row.join(' | ') + ' |'),
]
const key = `\x02MDTABLE${tableIdx++}\x03`
tableMd.set(key, mdLines.join('\n'))
$table.replaceWith(`${key}
`)
}
// Unwrap that only contains (code blocks wrapped in quotes)
$('blockquote').each((_, el) => {
const $bq = $(el)
const children = $bq.children()
if (children.length === 1 && children.first().is('pre')) {
$bq.replaceWith(children.first())
}
})
// Remove any direct children of that are not (e.g. copy-button
// toolbars, line-number gutter spans injected by syntax highlighters).
$('pre').each((_, el) => {
// Remove non- children, but keep elements that carry an hljs class
// (highlight.js sometimes places directly under )
$(el).children(':not(code)').each((_, child) => {
if (child.type !== 'tag') return
const cls = $(child).attr('class') ?? ''
if (!/hljs/.test(cls)) $(child).remove()
})
})
// Merge multiple siblings inside one into a single .
// Some sites emit one per line / section inside the same .
$('pre').each((_, el) => {
const $pre = $(el)
const $codes = $pre.children('code')
if ($codes.length <= 1) return
// Collect language class from first code that has one
let langClass = ''
$codes.each((_, code) => {
if (!langClass) {
const cls = $(code).attr('class') ?? ''
if (/language-\S+/.test(cls)) langClass = cls.match(/language-\S+/)[0]
}
})
// Join all code blocks with a newline, then replace with a single
const merged = $codes.map((_, code) => $(code).html()).get().join('\n')
$pre.html(`${merged}`)
})
// Ensure every that lacks a direct child is wrapped in one
// so Turndown always produces a fenced code block
$('pre').each((_, el) => {
if (!$(el).children('code').length) {
$(el).html(`${$(el).html()}`)
}
})
// Normalise language hints to `language-xxx` on the element so
// Turndown's fence rule picks up the identifier correctly.
// Handles: language-js, lang-js, hljs javascript, brush: js, etc.
$('pre').each((_, el) => {
const $pre = $(el)
const $code = $pre.children('code').first()
if (!$code.length) return
const raw = [$pre.attr('class'), $code.attr('class')].join(' ')
const lang = detectLang(raw)
if (lang) $code.attr('class', `language-${lang}`)
})
// Normalise invalid list nesting: ul/ol as direct child of ul/ol (sibling of li)
// e.g. - A
- B
- B-1
→ move inner ul into previous li (B)
// Process deepest first so inner lists are re-parented before outer.
const orphanLists = $('ul > ul, ul > ol, ol > ul, ol > ol').toArray()
.sort((a, b) => $(b).parents('ul,ol').length - $(a).parents('ul,ol').length)
orphanLists.forEach((el) => {
const $el = $(el)
const prev = $el.prev()[0]
if (!prev || prev.type !== 'tag') return
const prevTag = prev.name.toUpperCase()
if (prevTag === 'LI') {
$(prev).append($el)
} else if (prevTag === 'UL' || prevTag === 'OL') {
const lastLi = $(prev).children('li').last()[0]
if (lastLi) $(lastLi).append($el)
}
})
const cleanedHtml = $('body').html() ?? contentHtml
// HTML → Markdown
const td = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced',
bulletListMarker: '-',
hr: '---',
})
// Override list item rule:
// - single space after the bullet marker (Turndown default is 3)
// - strip trailing whitespace from each item
// - use 2-space indent per nesting level for continuation lines (ul/ol nested in ul/ol)
// - do not strip leading bullet/ordinal when content is multi-line (nested list)
td.addRule('listItem', {
filter: 'li',
replacement(content, node, options) {
const parent = node.parentNode
const isOrdered = parent.nodeName === 'OL'
// Count list nesting depth (ul/ol ancestors) for correct indent of nested lists
let depth = 0
let p = node.parentNode
while (p && p.nodeType === 1) {
if (p.nodeName === 'UL' || p.nodeName === 'OL') depth++
p = p.parentNode
}
const indent = ' '.repeat(depth)
content = content
.replace(/^\n+/, '')
.replace(/\n+$/, '')
.replace(/\n/gm, `\n${indent}`)
// Only strip leading bullet/ordinal on single-line content (site-injected noise).
// Multi-line content may start with a nested list marker; leave it intact.
const isSingleLine = !content.includes('\n')
if (isSingleLine) {
if (isOrdered) {
content = content.replace(/^\d+\\?[.、\)]\s*/, '').replace(/^[①②③④⑤⑥⑦⑧⑨⑩]\s*/, '')
} else {
content = content.replace(/^\\?[•·\-–—]\s*/, '')
}
}
const prefix = isOrdered
? `${Array.prototype.indexOf.call(parent.children, node) + 1}. `
: `${options.bulletListMarker} `
return prefix + content + (node.nextSibling ? '\n' : '')
},
})
// Preserve