Repository: ddddxxx/SwiftyOpenCC Branch: master Commit: 1d8105a0f719 Files: 33 Total size: 24.4 KB Directory structure: gitextract_b6utl32b/ ├── .github/ │ └── workflows/ │ ├── ci.yml │ └── jazzy.yml ├── .gitmodules ├── LICENSE ├── Package.swift ├── README.md ├── Sources/ │ ├── OpenCC/ │ │ ├── ChineseConverter.swift │ │ ├── ConversionDictionary.swift │ │ ├── ConversionError.swift │ │ ├── Dictionary/ │ │ │ ├── HKVariants.ocd2 │ │ │ ├── HKVariantsRev.ocd2 │ │ │ ├── HKVariantsRevPhrases.ocd2 │ │ │ ├── JPShinjitaiCharacters.ocd2 │ │ │ ├── JPShinjitaiPhrases.ocd2 │ │ │ ├── JPVariants.ocd2 │ │ │ ├── JPVariantsRev.ocd2 │ │ │ ├── STCharacters.ocd2 │ │ │ ├── STPhrases.ocd2 │ │ │ ├── TSCharacters.ocd2 │ │ │ ├── TSPhrases.ocd2 │ │ │ ├── TWPhrases.ocd2 │ │ │ ├── TWPhrasesRev.ocd2 │ │ │ ├── TWVariants.ocd2 │ │ │ ├── TWVariantsRev.ocd2 │ │ │ └── TWVariantsRevPhrases.ocd2 │ │ ├── DictionaryLoader.swift │ │ ├── DictionaryName.swift │ │ └── WeakValueCache.swift │ └── copencc/ │ ├── include/ │ │ ├── header.h │ │ └── module.modulemap │ └── source.cpp └── Tests/ ├── LinuxMain.swift └── OpenCCTests/ └── OpenCCTests.swift ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: push: pull_request: jobs: Xcode: strategy: matrix: env: - platform: macOS destination: "platform=macOS" action: test - platform: iOS destination: "platform=iOS Simulator,name=iPhone 8" action: test - platform: tvOS destination: "platform=tvOS Simulator,name=Apple TV 4K" action: test # - platform: watchOS # destination: "platform=watchOS Simulator,name=Apple Watch Series 4 - 44mm" # action: build runs-on: macOS-latest env: ACTION: ${{ matrix.env.action }} DESTINATION: ${{ matrix.env.destination }} steps: - uses: actions/checkout@v2 with: submodules: true - name: Xcode Version run: xcodebuild -version - name: Build and Test run: | set -o pipefail xcodebuild "$ACTION" \ -scheme SwiftyOpenCC \ -destination "$DESTINATION" | xcpretty # Mac: # runs-on: macOS-latest # steps: # - uses: actions/checkout@v2 # with: # submodules: true # - name: Swift Version # run: swift -version # - name: Build and Test # run: swift test # Linux: # strategy: # matrix: # tag: ['5.4'] # runs-on: ubuntu-latest # container: # image: swift:${{ matrix.tag }} # steps: # - uses: actions/checkout@v1 # - run: git submodule update --init # - name: Swift Version # run: swift -version # - name: Build and Test # run: swift test --enable-test-discovery ================================================ FILE: .github/workflows/jazzy.yml ================================================ name: Jazzy on: push: tags: - v* jobs: build: runs-on: macOS-latest steps: - name: Get Version id: get_version run: echo ::set-output name=VERSION::${GITHUB_REF#refs/tags/v} - name: Checkout uses: actions/checkout@v1 - name: Install Jazzy run: gem install jazzy --user-install - name: Run Jazzy run: | jazzy \ --clean \ --author ddddxxx \ --github_url https://github.com/$GITHUB_REPOSITORY \ --module-version $VERSION \ --module OpenCC \ --output docs env: VERSION: ${{ steps.get_version.outputs.VERSION }} - name: Deploy run: | cd docs git init git config user.name "CI" git config user.email "jazzy-ci@github.com" git remote add secure-origin https://${{ secrets.ACCESS_TOKEN }}@github.com/$GITHUB_REPOSITORY.git git checkout -b gh-pages git add . git commit -m "Updated docs" git push --force secure-origin gh-pages ================================================ FILE: .gitmodules ================================================ [submodule "OpenCC"] path = OpenCC url = https://github.com/BYVoid/OpenCC ================================================ FILE: LICENSE ================================================ The MIT License (MIT) Copyright (c) 2017 DengXiang Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: Package.swift ================================================ // swift-tools-version:5.3 import PackageDescription let package = Package( name: "SwiftyOpenCC", products: [ .library( name: "OpenCC", targets: ["OpenCC"]), ], targets: [ .target( name: "OpenCC", dependencies: ["copencc"], resources: [ .copy("Dictionary") ]), .testTarget( name: "OpenCCTests", dependencies: ["OpenCC"], resources: [ .copy("benchmark"), .copy("testcases"), ]), .target( name: "copencc", exclude: [ "src/benchmark", "src/tools", "src/BinaryDictTest.cpp", "src/Config.cpp", "src/ConfigTest.cpp", "src/ConversionChainTest.cpp", "src/ConversionTest.cpp", "src/DartsDictTest.cpp", "src/DictGroupTest.cpp", "src/MarisaDictTest.cpp", "src/MaxMatchSegmentationTest.cpp", "src/PhraseExtractTest.cpp", "src/SerializedValuesTest.cpp", "src/SimpleConverter.cpp", "src/SimpleConverterTest.cpp", "src/TextDictTest.cpp", "src/UTF8StringSliceTest.cpp", "src/UTF8UtilTest.cpp", "deps/google-benchmark", "deps/gtest-1.11.0", "deps/pybind11-2.5.0", "deps/rapidjson-1.1.0", "deps/tclap-1.2.2", "src/CmdLineOutput.hpp", "src/Config.hpp", "src/ConfigTestBase.hpp", "src/DictGroupTestBase.hpp", "src/SimpleConverter.hpp", "src/TestUtils.hpp", "src/TestUtilsUTF8.hpp", "src/TextDictTestBase.hpp", "src/py_opencc.cpp", // ??? "src/README.md", "src/CMakeLists.txt", "deps/marisa-0.2.6/AUTHORS", "deps/marisa-0.2.6/CMakeLists.txt", "deps/marisa-0.2.6/COPYING.md", "deps/marisa-0.2.6/README.md", ], sources: [ "source.cpp", "src", "deps/marisa-0.2.6", ], cxxSettings: [ .headerSearchPath("src"), .headerSearchPath("deps/darts-clone"), .headerSearchPath("deps/marisa-0.2.6/include"), .headerSearchPath("deps/marisa-0.2.6/lib"), .define("ENABLE_DARTS"), ]), ], cxxLanguageStandard: .cxx14 ) ================================================ FILE: README.md ================================================ # Swifty Open Chinese Convert [![Github CI Status](https://github.com/ddddxxx/SwiftyOpenCC/workflows/CI/badge.svg)](https://github.com/ddddxxx/SwiftyOpenCC/actions) ![platforms](https://img.shields.io/badge/platforms-Linux%20%7C%20macOS%20%7C%20iOS%20%7C%20tvOS%20%7C%20watchOS-lightgrey.svg) [![codebeat badge](https://codebeat.co/badges/39f17620-4f1c-4a46-b3f9-8f5b248ac28f)](https://codebeat.co/projects/github-com-ddddxxx-swiftyopencc-master) Swift port of [Open Chinese Convert](https://github.com/BYVoid/OpenCC) ## Requirements - macOS 10.10+ / iOS 8.0+ / tvOS 9.0+ / watchOS 2.0+ - Swift 5.0 ## Usage ### Quick Start ```swift import OpenCC let str = "鼠标里面的硅二极管坏了,导致光标分辨率降低。" let converter = try! ChineseConverter(option: [.traditionalize, .twStandard, .twIdiom]) converter.convert(str) // 滑鼠裡面的矽二極體壞了,導致游標解析度降低。 ``` ## Documentation [Github Pages](http://ddddxxx.github.io/SwiftyOpenCC) (100% Documented) ## License SwiftyOpenCC is available under the MIT license. See the [LICENSE file](LICENSE). ================================================ FILE: Sources/OpenCC/ChineseConverter.swift ================================================ // // ChineseConverter.swift // OpenCC // // Created by ddddxxx on 2017/3/9. // import Foundation import copencc /// The `ChineseConverter` class is used to represent and apply conversion /// between Traditional Chinese and Simplified Chinese to Unicode strings. /// An instance of this class is an immutable representation of a compiled /// conversion pattern. /// /// The `ChineseConverter` supporting character-level conversion, phrase-level /// conversion, variant conversion and regional idioms among Mainland China, /// Taiwan and HongKong /// /// `ChineseConverter` is designed to be immutable and threadsafe, so that /// a single instance can be used in conversion on multiple threads at once. /// However, the string on which it is operating should not be mutated /// during the course of a conversion. public class ChineseConverter { /// These constants define the ChineseConverter options. public struct Options: OptionSet { public let rawValue: Int public init(rawValue: Int) { self.rawValue = rawValue } /// Convert to Traditional Chinese. (default) public static let traditionalize = Options(rawValue: 1 << 0) /// Convert to Simplified Chinese. public static let simplify = Options(rawValue: 1 << 1) /// Use Taiwan standard. public static let twStandard = Options(rawValue: 1 << 5) /// Use HongKong standard. public static let hkStandard = Options(rawValue: 1 << 6) /// Taiwanese idiom conversion. public static let twIdiom = Options(rawValue: 1 << 10) } private let seg: ConversionDictionary private let chain: [ConversionDictionary] private let converter: CCConverterRef private init(loader: DictionaryLoader, options: Options) throws { seg = try loader.segmentation(options: options) chain = try loader.conversionChain(options: options) var rawChain = chain.map { $0.dict } converter = CCConverterCreate("SwiftyOpenCC", seg.dict, &rawChain, rawChain.count) } /// Returns an initialized `ChineseConverter` instance with the specified /// conversion options. /// /// - Parameter options: The convert’s options. /// - Throws: Throws `ConversionError` if failed. public convenience init(options: Options) throws { let loader = DictionaryLoader(bundle: .module) try self.init(loader: loader, options: options) } /// Return a converted string using the convert’s current option. /// /// - Parameter text: The string to convert. /// - Returns: A converted string using the convert’s current option. public func convert(_ text: String) -> String { let stlStr = CCConverterCreateConvertedStringFromString(converter, text)! defer { STLStringDestroy(stlStr) } return String(utf8String: STLStringGetUTF8String(stlStr))! } } ================================================ FILE: Sources/OpenCC/ConversionDictionary.swift ================================================ // // ConversionDictionary.swift // OpenCC // // Created by ddddxxx on 2020/1/3. // import Foundation import copencc class ConversionDictionary { let group: [ConversionDictionary] let dict: CCDictRef init(path: String) throws { guard let dict = CCDictCreateMarisaWithPath(path) else { throw ConversionError(ccErrorno) } self.group = [] self.dict = dict } init(group: [ConversionDictionary]) { var rawGroup = group.map { $0.dict } self.group = group self.dict = CCDictCreateWithGroup(&rawGroup, rawGroup.count) } } ================================================ FILE: Sources/OpenCC/ConversionError.swift ================================================ // // ConversionError.swift // OpenCC // // Created by ddddxxx on 2020/1/3. // import Foundation import copencc public enum ConversionError: Error { case fileNotFound case invalidFormat case invalidTextDictionary case invalidUTF8 case unknown init(_ code: CCErrorCode) { switch code { case .fileNotFound: self = .fileNotFound case .invalidFormat: self = .invalidFormat case .invalidTextDictionary: self = .invalidTextDictionary case .invalidUTF8: self = .invalidUTF8 case .unknown, _: self = .unknown } } } ================================================ FILE: Sources/OpenCC/DictionaryLoader.swift ================================================ // // DictionaryLoader.swift // OpenCC // // Created by ddddxxx on 2018/5/5. // import Foundation import copencc extension ChineseConverter { struct DictionaryLoader { private static let subdirectory = "Dictionary" private static let dictCache = WeakValueCache() private let bundle: Bundle init(bundle: Bundle) { self.bundle = bundle } func dict(_ name: ChineseConverter.DictionaryName) throws -> ConversionDictionary { guard let path = bundle.path(forResource: name.description, ofType: "ocd2", inDirectory: DictionaryLoader.subdirectory) else { throw ConversionError.fileNotFound } return try DictionaryLoader.dictCache.value(for: path) { return try ConversionDictionary(path: path) } } } } extension ChineseConverter.DictionaryLoader { func segmentation(options: ChineseConverter.Options) throws -> ConversionDictionary { let dictName = options.segmentationDictName return try dict(dictName) } func conversionChain(options: ChineseConverter.Options) throws -> [ConversionDictionary] { return try options.conversionChain.compactMap { names in switch names.count { case 0: return nil case 1: return try dict(names.first!) case _: let dicts = try names.map(dict) return ConversionDictionary(group: dicts) } } } } ================================================ FILE: Sources/OpenCC/DictionaryName.swift ================================================ // // DictionaryName.swift // OpenCC // // Created by ddddxxx on 2019/9/16. // import Foundation extension ChineseConverter { enum DictionaryName: CustomStringConvertible { case hkVariants case hkVariantsRev case hkVariantsRevPhrases case jpVariants case stCharacters case stPhrases case tsCharacters case tsPhrases case twPhrases case twPhrasesRev case twVariants case twVariantsRev case twVariantsRevPhrases var description: String { switch self { case .hkVariants: return "HKVariants" case .hkVariantsRev: return "HKVariantsRev" case .hkVariantsRevPhrases: return "HKVariantsRevPhrases" case .jpVariants: return "JPVariants" case .stCharacters: return "STCharacters" case .stPhrases: return "STPhrases" case .tsCharacters: return "TSCharacters" case .tsPhrases: return "TSPhrases" case .twPhrases: return "TWPhrases" case .twPhrasesRev: return "TWPhrasesRev" case .twVariants: return "TWVariants" case .twVariantsRev: return "TWVariantsRev" case .twVariantsRevPhrases: return "TWVariantsRevPhrases" } } } } extension ChineseConverter.Options { var segmentationDictName: ChineseConverter.DictionaryName { if contains(.traditionalize) { return .stPhrases } else if contains(.simplify) { return .tsPhrases } else if contains(.hkStandard) { return .hkVariants } else if contains(.twStandard) { return .twVariants } else { return .stPhrases } } var conversionChain: [[ChineseConverter.DictionaryName]] { var result: [[ChineseConverter.DictionaryName]] = [] if contains(.traditionalize) { result.append([.stPhrases, .stCharacters]) if contains(.twIdiom) { result.append([.twPhrases]) } if contains(.hkStandard) { result.append([.hkVariants]) } else if contains(.twStandard) { result.append([.twVariants]) } } else if contains(.simplify) { if contains(.hkStandard) { result.append([.hkVariantsRevPhrases, .hkVariantsRev]) } else if contains(.twStandard) { result.append([.twVariantsRevPhrases, .twVariantsRev]) } if contains(.twIdiom) { result.append([.twPhrasesRev]) } result.append([.tsPhrases, .tsCharacters]) } else { if contains(.hkStandard) { result.append([.hkVariants]) } else if contains(.twStandard) { result.append([.twVariants]) } } if result.isEmpty { return [[.stPhrases, .stCharacters]] } return result } } ================================================ FILE: Sources/OpenCC/WeakValueCache.swift ================================================ // // WeakValueCache.swift // OpenCC // // Created by ddddxxx on 2020/1/3. // import Foundation class WeakBox { private(set) weak var value: Value? init(_ value: Value) { self.value = value } } class WeakValueCache { private var storage: [Key: WeakBox] = [:] private var lock = NSLock() func value(for key: Key) -> Value? { return storage[key]?.value } func value(for key: Key, make: () throws -> Value) rethrows -> Value { if let value = storage[key]?.value { return value } lock.lock() defer { lock.unlock() } if let value = storage[key]?.value { return value } let value = try make() storage[key] = WeakBox(value) return value } } ================================================ FILE: Sources/copencc/include/header.h ================================================ #include #ifdef __cplusplus extern "C" { #endif // MARK: Error enum CCErrorCode { CCErrorCodeFileNotFound = 1, CCErrorCodeInvalidFormat, CCErrorCodeInvalidTextDictionary, CCErrorCodeInvalidUTF8, CCErrorCodeUnknown, } __attribute__((enum_extensibility(open))); typedef enum CCErrorCode CCErrorCode; CCErrorCode ccErrorno; // MARK: CCDict typedef void* CCDictRef; CCDictRef _Nullable CCDictCreateDartsWithPath(const char * _Nonnull path); CCDictRef _Nullable CCDictCreateMarisaWithPath(const char * _Nonnull path); CCDictRef _Nonnull CCDictCreateWithGroup(CCDictRef _Nonnull * const _Nonnull dictGroup, intptr_t count); void CCDictDestroy(CCDictRef _Nonnull dict); // MARK: CCConverter typedef void* CCConverterRef; CCConverterRef _Nonnull CCConverterCreate(const char * _Nonnull name, CCDictRef _Nonnull segmentation, CCDictRef _Nonnull * const _Nonnull conversionChain, intptr_t chainCount); void CCConverterDestroy(CCConverterRef _Nonnull dict); typedef void* STLString; STLString _Nullable CCConverterCreateConvertedStringFromString(CCConverterRef _Nonnull converter, const char * _Nonnull str); const char* _Nonnull STLStringGetUTF8String(STLString _Nonnull str); void STLStringDestroy(STLString _Nonnull str); #ifdef __cplusplus } #endif ================================================ FILE: Sources/copencc/include/module.modulemap ================================================ module copencc { header "header.h" export * } ================================================ FILE: Sources/copencc/source.cpp ================================================ #include "DartsDict.hpp" #include "DictGroup.hpp" #include "Converter.hpp" #include "MarisaDict.hpp" #include "MaxMatchSegmentation.hpp" #include "Conversion.hpp" #include "ConversionChain.hpp" #include "header.h" // MARK: Error void* catchOpenCCException(void* (^block)()) { try { return block(); } catch (opencc::FileNotFound& ex) { ccErrorno = CCErrorCodeFileNotFound; return NULL; } catch (opencc::InvalidFormat& ex) { ccErrorno = CCErrorCodeInvalidFormat; return NULL; } catch (opencc::InvalidTextDictionary& ex) { ccErrorno = CCErrorCodeInvalidTextDictionary; return NULL; } catch (opencc::InvalidUTF8& ex) { ccErrorno = CCErrorCodeInvalidUTF8; return NULL; } catch (opencc::Exception& ex) { ccErrorno = CCErrorCodeUnknown; return NULL; } } // MARK: CCDict CCDictRef _Nullable CCDictCreateDartsWithPath(const char * _Nonnull path) { return catchOpenCCException(^{ auto dict = opencc::SerializableDict::NewFromFile(std::string(path)); auto dictPtr = new opencc::DictPtr(dict); return static_cast(dictPtr); }); } CCDictRef _Nullable CCDictCreateMarisaWithPath(const char * _Nonnull path) { return catchOpenCCException(^{ auto dict = opencc::SerializableDict::NewFromFile(std::string(path)); auto dictPtr = new opencc::DictPtr(dict); return static_cast(dictPtr); }); } CCDictRef _Nonnull CCDictCreateWithGroup(CCDictRef _Nonnull * const _Nonnull dictGroup, intptr_t count) { std::list list; for (int i=0; i(dictGroup[i]); list.push_back(*dictPtr); } auto dict = new opencc::DictGroupPtr(new opencc::DictGroup(list)); return static_cast(dict); } void CCDictDestroy(CCDictRef _Nonnull dict) { auto *dictPtr = static_cast(dict); dictPtr->reset(); } // MARK: CCConverter CCConverterRef _Nonnull CCConverterCreate(const char * _Nonnull name, CCDictRef _Nonnull segmentation, CCDictRef _Nonnull * const _Nonnull conversionChain, intptr_t chainCount) { auto *segmentationPtr = static_cast(segmentation); std::list conversions; for (int i=0; i(conversionChain[i]); auto conversion = opencc::ConversionPtr(new opencc::Conversion(*dictPtr)); conversions.push_back(conversion); } auto covName = std::string(name); auto covSeg = opencc::SegmentationPtr(new opencc::MaxMatchSegmentation(*segmentationPtr)); auto covChain = opencc::ConversionChainPtr(new opencc::ConversionChain(conversions)); auto converter = new opencc::Converter(covName, covSeg, covChain); return static_cast(converter); } void CCConverterDestroy(CCConverterRef _Nonnull dict) { auto converter = static_cast(dict); delete converter; } STLString _Nullable CCConverterCreateConvertedStringFromString(CCConverterRef _Nonnull converter, const char * _Nonnull str) { return catchOpenCCException(^{ auto converterPtr = static_cast(converter); auto string = new std::string(converterPtr->Convert(str)); return static_cast(string); }); } const char* _Nonnull STLStringGetUTF8String(STLString _Nonnull str) { auto string = static_cast(str); return string->c_str(); } void STLStringDestroy(STLString _Nonnull str) { auto string = static_cast(str); delete string; } ================================================ FILE: Tests/LinuxMain.swift ================================================ fatalError("Run the tests with `swift test --enable-test-discovery`.") ================================================ FILE: Tests/OpenCCTests/OpenCCTests.swift ================================================ import XCTest @testable import OpenCC let testCases: [(String, ChineseConverter.Options)] = [ ("s2t", [.traditionalize]), ("t2s", [.simplify]), ("s2hk", [.traditionalize, .hkStandard]), ("hk2s", [.simplify, .hkStandard]), ("s2tw", [.traditionalize, .twStandard]), ("tw2s", [.simplify, .twStandard]), ("s2twp", [.traditionalize, .twStandard, .twIdiom]), ("tw2sp", [.simplify, .twStandard, .twIdiom]), ] class OpenCCTests: XCTestCase { func converter(option: ChineseConverter.Options) throws -> ChineseConverter { return try ChineseConverter(options: option) } func testConversion() throws { func testCase(name: String, ext: String) -> String { let url = Bundle.module.url(forResource: name, withExtension: ext, subdirectory: "testcases")! return try! String(contentsOf: url) } for (name, opt) in testCases { let coverter = try ChineseConverter(options: opt) let input = testCase(name: name, ext: "in") let converted = coverter.convert(input) let output = testCase(name: name, ext: "ans") XCTAssertEqual(converted, output, "Conversion \(name) fails") } } func testConverterCreationPerformance() { let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom] measure { for _ in 0..<10 { _ = try! ChineseConverter(options: options) } } } func testDictionaryCache() { let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom] let holder = try! ChineseConverter(options: options) measure { for _ in 0..<1_000 { _ = try! ChineseConverter(options: options) } } _ = holder.convert("foo") } func testConversionPerformance() throws { let cov = try converter(option: [.traditionalize, .twStandard, .twIdiom]) let url = Bundle.module.url(forResource: "zuozhuan", withExtension: "txt", subdirectory: "benchmark")! // 1.9 MB, 624k word let str = try String(contentsOf: url) measure { _ = cov.convert(str) } } }