Repository: trkbt10/mikan.js Branch: master Commit: 77a04ff0f71a Files: 10 Total size: 17.4 KB Directory structure: gitextract_k2l19ac3/ ├── .gitignore ├── LICENSE ├── README.md ├── docs/ │ ├── index.html │ └── mikan.js ├── package.json ├── src/ │ ├── mikan.d.ts │ └── mikan.js └── test/ ├── mikan-test.js └── numbers-and-units-test.js ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ ### https://raw.github.com/github/gitignore/9f8e411bba17223b5a7744ace8f41cede6601c59/Global/OSX.gitignore .DS_Store .AppleDouble .LSOverride # Icon must end with two \r Icon # Thumbnails ._* # Files that might appear in the root of a volume .DocumentRevisions-V100 .fseventsd .Spotlight-V100 .TemporaryItems .Trashes .VolumeIcon.icns # Directories potentially created on remote AFP share .AppleDB .AppleDesktop Network Trash Folder Temporary Items .apdisk ### https://raw.github.com/github/gitignore/9f8e411bba17223b5a7744ace8f41cede6601c59/node.gitignore # Logs logs *.log npm-debug.log* # Runtime data pids *.pid *.seed # Directory for instrumented libs generated by jscoverage/JSCover lib-cov # Coverage directory used by tools like istanbul coverage # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) .grunt # node-waf configuration .lock-wscript # Compiled binary addons (http://nodejs.org/api/addons.html) build/Release # Dependency directories node_modules jspm_packages # Optional npm cache directory .npm # Optional REPL history .node_repl_history ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2020 trkbt10 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Mikan.js - 機械学習を用いていない日本語改行問題へのソリューション mikan.jsは、正規表現を用いた簡易形態素解析による、単語の改行問題への解決策を提供します。 [demo](https://trkbt10.github.io/mikan.js/) ## インストール `npm install mikanjs` ## 使い方 nodeで用いる場合 ```javascript const mikan = require('mikanjs'); console.log(mikan('常に最新、最高のモバイル。Androidを開発した同じチームから。')); /* 常に 最新、 最高の モバイル。 Androidを 開発した 同じ チームから。 */ console.log(mikan.split('常に最新、最高のモバイル。Androidを開発した同じチームから。')); // ['常に', '最新、', '最高の', 'モバイル。', 'Androidを', '開発した', '同じ', 'チームから。'] ``` Webで用いる場合 ```html
``` Reactで用いる場合 ```javascript
``` もしくは ```javascript {Mikan.split('常に最新、最高のモバイル。Androidを開発した同じチームから。').map((text) => {text})} ``` ================================================ FILE: docs/index.html ================================================ Demo
================================================ FILE: docs/mikan.js ================================================ "use strict"; (function(root, factory) { if (typeof define === 'function' && define.amd) { // AMD define([], factory); } else if (typeof exports === 'object') { // commonjs module.exports = factory(); } else { // Browser globals root.Mikan = factory(); } })(this, function() { var joshi = /(でなければ|について|かしら|くらい|けれど|なのか|ばかり|ながら|ことよ|こそ|こと|さえ|しか|した|たり|だけ|だに|だの|つつ|ても|てよ|でも|とも|から|など|なり|ので|のに|ほど|まで|もの|やら|より|って|で|と|な|に|ね|の|も|は|ば|へ|や|わ|を|か|が|さ|し|ぞ|て)/g; var keywords = /(\ |[a-zA-Z0-9]+\.[a-z]{2,}|[一-龠々〆ヵヶゝ]+|[ぁ-んゝ]+|[ァ-ヴー]+|[a-zA-Z0-9]+|[a-zA-Z0-9]+)/g; var periods = /([\.\,。、!\!?\?]+)$/g var bracketsBegin = /([〈《「『「((\[【〔〚〖〘❮❬❪❨(<{❲❰{❴])/g var bracketsEnd = /([〉》」』」))\]】〕〗〙〛}>\)❩❫❭❯❱❳❵}])/g function SimpleAnalyze(str) { var words = str.split(keywords).reduce(function(prev, word) { return [].concat(prev, word.split(joshi)); }).reduce(function(prev, word) { return [].concat(prev, word.split(bracketsBegin)); }).reduce(function(prev, word) { return [].concat(prev, word.split(bracketsEnd)); }).filter(function(word) { return word; }); var result = []; var prevType = ''; var prevWord = ''; words.forEach(function(word) { var token = word.match(periods) || word.match(joshi) if (word.match(bracketsBegin)) { prevType = 'bracketBegin'; prevWord = word; return } if (word.match(bracketsEnd)) { result[result.length - 1] += word; prevType = 'bracketEnd'; prevWord = word; return } if (prevType === 'bracketBegin') { word = prevWord + word prevWord = '' prevType = '' } // すでに文字が入っている上で助詞が続く場合は結合する if (result.length > 0 && token && prevType === '') { result[result.length - 1] += word; prevType = 'keyword'; prevWord = word; return } // 単語のあとの文字がひらがななら結合する if (result.length > 1 && token || (prevType === 'keyword' && word.match(/[ぁ-んゝ]+/g))) { result[result.length - 1] += word; prevType = '' prevWord = word; return; } result.push(word); prevType = 'keyword'; prevWord = word; }); return result; } function Mikan() { var text = arguments.length <= 0 || arguments[0] === undefined ? '' : arguments[0]; var userOption = arguments.length <= 1 || arguments[1] === undefined ? {} : arguments[1]; var defaultOption = { style: 'display:inline-block', role: 'presentation', className: '' }; var option = {}; Object.keys(defaultOption).forEach(function(key) { option[key] = (typeof userOption[key] === 'undefined') ? defaultOption[key] : userOption[key]; }); var attr = ''; option.style && (attr += " style=\"" + option.style + "\""); option.role && (attr += " role=\"" + option.role + "\""); option.className && (attr += " class=\"" + option.className + "\""); var words = SimpleAnalyze(text); var html = words.map(function(word) { return '' + word + '' }).join(''); return html; } Mikan.split = SimpleAnalyze; return Mikan; }); ================================================ FILE: package.json ================================================ { "name": "mikan.js", "version": "1.0.14", "description": "Japanese Line Break Organizer", "main": "src/mikan.js", "types": "src/mikan.d.ts", "files": [ "src" ], "scripts": { "test": "ava" }, "keywords": [], "author": "", "license": "MIT", "devDependencies": { "assert": "^1.4.1", "ava": "^0.19.1" } } ================================================ FILE: src/mikan.d.ts ================================================ declare function SimpleAnalyze(str: string): string[]; declare type IUserOption = { style?: string; role?: string; className?: string; }; declare function Mikan(text?: string, userOption?: IUserOption): string; declare namespace Mikan { var split: typeof SimpleAnalyze; } export default Mikan; ================================================ FILE: src/mikan.js ================================================ /*! mikan.js v1.0.13 | MIT License | https://github.com/trkbt10/mikan.js/blob/master/LICENSE */ "use strict"; (function (root, factory) { if (typeof define === 'function' && define.amd) { // AMD define([], factory); } else if (typeof exports === 'object') { // commonjs module.exports = factory(); } else { // Browser globals root.Mikan = factory(); } })(this, function () { var joshi = /(でなければ|について|かしら|くらい|けれど|なのか|ばかり|ながら|ことよ|こそ|こと|さえ|しか|した|たり|だけ|だに|だの|つつ|ても|てよ|でも|とも|から|など|なり|ので|のに|ほど|まで|もの|やら|より|って|で|と|な|に|ね|の|も|は|ば|へ|や|わ|を|か|が|さ|し|ぞ|て)/g; var numbers = /([0-90-9零一二三四五六七八九十]+)/; var keywords = /(\ |[a-zA-Z0-9]+\.[a-z]{2,}|[一-龠々〆ヵヶゝ]+|[ぁ-んゝ]+|[ァ-ヴー]+|[a-zA-Z0-9]+|[a-zA-Z0-9]+)/g; var periods = /([\.\,。、!\!?\?]+)$/g var units = /(px|point|$|\$|€|¥|ノット|ユーロ|ドル|円|里|百|千|万|億|兆|京|㌫|%|\%|cm|m|km|㌢|㍍|㌖|センチメートル|メートル|キロ|キロメートル|°|度|ℓ|リットル|mℓ|ミリリットル|マイル|フィート)/i; var bracketsBegin = /([〈《「『「((\[【〔〚〖〘❮❬❪❨(<{❲❰{❴])/g var bracketsEnd = /([〉》」』」))\]】〕〗〙〛}>\)❩❫❭❯❱❳❵}])/g function SimpleAnalyze(str) { if (!str) { return ['']; } var words = str.split(keywords).reduce(function (prev, word) { return [].concat(prev, word.split(joshi)); }, []).reduce(function (prev, word) { return [].concat(prev, word.split(numbers)); }, []).reduce(function (prev, word) { return [].concat(prev, word.split(bracketsBegin)); }, []).reduce(function (prev, word) { return [].concat(prev, word.split(bracketsEnd)); }, []).filter(function (word) { return word; }); var result = []; var prevType = ''; var prevWord = ''; words.forEach(function (word) { var periodToken = word.match(periods); var joshiToken = word.match(joshi); var token = periodToken || joshiToken; if (word.match(numbers)) { result.push(word); prevType = 'number'; prevWord = word; return } // 前が数字で、後ろが単位であれば数字と単位を結合する if (word.match(units) && prevType === 'number') { result[result.length - 1] += word; prevType = 'unit'; prevWord = word; return } if (word.match(bracketsBegin)) { prevType = 'bracketBegin'; prevWord = word; return } if (word.match(bracketsEnd)) { result[result.length - 1] += word; prevType = 'bracketEnd'; prevWord = word; return } if (prevType === 'bracketBegin') { word = prevWord + word prevWord = '' prevType = '' } // すでに文字が入っている上で助詞が続く場合は結合する([単語][て|を|に|は|など]の形にする) if (result.length > 0 && token && prevType === '') { result[result.length - 1] += word; prevType = 'keyword'; prevWord = word; return } // 単語のあとの文字がひらがななら結合する if (result.length > 1 && token || (prevType === 'keyword' && !prevWord.match(/^[とのに]$/g) && !prevWord.match(periods) && word.match(/[ぁ-んゝ]+/g))) { result[result.length - 1] += word; if (!joshiToken) prevType = '' prevWord = word; return; } result.push(word); prevType = 'keyword'; prevWord = word; }); return result; } function Mikan() { var text = arguments.length <= 0 || arguments[0] === undefined ? '' : arguments[0]; var userOption = arguments.length <= 1 || arguments[1] === undefined ? {} : arguments[1]; var defaultOption = { style: 'display:inline-block', role: 'presentation', className: '' }; var option = {}; Object.keys(defaultOption).forEach(function (key) { option[key] = (typeof userOption[key] === 'undefined') ? defaultOption[key] : userOption[key]; }); var attr = ''; option.style && (attr += " style=\"" + option.style + "\""); option.role && (attr += " role=\"" + option.role + "\""); option.className && (attr += " class=\"" + option.className + "\""); var words = SimpleAnalyze(text); var html = words.map(function (word) { return '' + word + '' }).join(''); return html; } Mikan.split = SimpleAnalyze; return Mikan; }); ================================================ FILE: test/mikan-test.js ================================================ const mikan = require('../src/mikan.js') const test = require('ava') function tagToArray(text = "") { return text.split(/<.*?>(.*?)<\/.*?>/g).filter((word) => word) } test(t => { const source = '常に最新、最高のモバイル。Androidを開発した同じチームから。' const expected = ['常に', '最新、', '最高の', 'モバイル。', 'Androidを', '開発した', '同じ', 'チームから。'] const queue = mikan.split(source) const result = mikan(source) t.deepEqual(queue, expected) t.deepEqual(tagToArray(result), expected) t.truthy(result.indexOf('') > -1) }) test(t => { const source = '私は好きにした。君たちも好きにしろ。' const result = mikan(source, { className: 'wbr', style: 'font-weight:bold', role: 'debag' }) t.truthy(result.indexOf('class="wbr"') > -1) t.truthy(result.indexOf('style="font-weight:bold"') > -1) t.truthy(result.indexOf('role="debag"') > -1) }) test(t => { const source = 'え、蒲田に!?' const result = mikan(source, { className: '', style: '', role: '' }) t.truthy(result.indexOf('class=') <= -1) t.truthy(result.indexOf('style=') <= -1) t.truthy(result.indexOf('role=') <= -1) }) test(t => { const source = '原稿と防災服を用意してくれ' const expected = ['原稿と', '防災服を', '用意してくれ'] const result = mikan.split(source) t.deepEqual(result, expected) }) test(t => { const source = '1192' const expected = ['1192'] const result = mikan.split(source) t.deepEqual(result, expected) }) test(t => { const source = 'やりたいことのそばにいる' const expected = ["やりたいことの", "そばに", "いる"] const result = mikan.split(source) t.deepEqual(result, expected) }) test(t => { const source = 'このmikan.jsというライブラリは、スマートな文字区切りを可能にします。' const expected = ['この', 'mikan.jsと', 'いう', 'ライブラリは、', 'スマートな', '文字区切りを', '可能にします。'] const result = mikan.split(source) t.deepEqual(result, expected) }) test(t => { const source = 'テンプレートを使用しますか、それとも空白の調査から始めますか?' const expected = ['テンプレートを', '使用しますか、', 'それとも', '空白の', '調査から', '始めますか?'] const result = mikan.split(source) t.deepEqual(result, expected) }) test(t => { const source = '「あれ」でもない、「これ」でもない。' const expected = ['「あれ」', 'でもない、', '「これ」', 'でもない。'] const result = mikan.split(source) t.deepEqual(result, expected) }) test(t => { const source = '半角スペース 対応' const expected = ['半角', 'スペース', ' ', '対応'] const result = mikan.split(source) t.deepEqual(result, expected) }) test(t => { const source = '' const expected = [''] const result = mikan.split(source) t.deepEqual(result, expected) }) test(t => { const source = '\n' const expected = ['\n'] const result = mikan.split(source) t.deepEqual(result, expected) }) test(t => { const source = ' ' const expected = [' '] const result = mikan.split(source) t.deepEqual(result, expected) }) ================================================ FILE: test/numbers-and-units-test.js ================================================ const mikan = require('../src/mikan.js') const test = require('ava') test(t => { const source = '桜の花の落ちるスピード。秒速5センチメートル' const expected = [ "桜の", "花の", "落ちる", "スピード。", "秒速", "5センチメートル" ] const queue = mikan.split(source) const result = mikan(source) t.deepEqual(queue, expected) t.truthy(result.indexOf('') > -1) }) test(t => { const source = 'ページの読み込みが 50%加速' const expected = ['ページの', '読み', '込みが', ' ', '50%', '加速'] const result = mikan.split(source) t.deepEqual(result, expected) }) test(t => { const source = '赤道を抜け、嵐を抜け、氷を割り、日本から1万4000キロ' const expected = [ "赤道を", "抜け、", "嵐を", "抜け、", "氷を", "割り、", "日本から", "1万", "4000キロ", ] const queue = mikan.split(source) const result = mikan(source) t.deepEqual(queue, expected) t.truthy(result.indexOf('') > -1) }) test(t => { const source = '母をたずねて三千里' const expected = [ "母をたずねて", "三千里", ] const queue = mikan.split(source) const result = mikan(source) t.deepEqual(queue, expected) t.truthy(result.indexOf('') > -1) }) test(t => { const source = 'ヘディング190、高度32000、速度720ノット、なお南下中' const expected = [ "ヘディング", "190、", "高度", "32000、", "速度", "720ノット、なお", "南下中", ] const queue = mikan.split(source) const result = mikan(source) t.deepEqual(queue, expected) t.truthy(result.indexOf('') > -1) })