Open Chinese Convert 1.1.5
A project for conversion between Traditional and Simplified Chinese
Segments.hpp
1/*
2 * Open Chinese Convert
3 *
4 * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19#pragma once
20
21#include <sstream>
22
23#include "Common.hpp"
24
25namespace opencc {
30class OPENCC_EXPORT Segments {
31public:
32 Segments() {}
33
34 Segments(std::initializer_list<const char*> initList) {
35 for (const std::string& item : initList) {
36 AddSegment(item);
37 }
38 }
39
40 Segments(std::initializer_list<std::string> initList) {
41 for (const std::string& item : initList) {
42 AddSegment(item);
43 }
44 }
45
46 void AddSegment(const char* unmanagedString) {
47 indexes.push_back(std::make_pair(unmanaged.size(), false));
48 unmanaged.push_back(unmanagedString);
49 }
50
51 void AddSegment(const std::string& str) {
52 indexes.push_back(std::make_pair(managed.size(), true));
53 managed.push_back(str);
54 }
55
56 class iterator : public std::iterator<std::input_iterator_tag, const char*> {
57 public:
58 iterator(const Segments* const _segments, size_t _cursor)
59 : segments(_segments), cursor(_cursor) {}
60
61 iterator& operator++() {
62 cursor++;
63 return *this;
64 }
65
66 bool operator==(const iterator& that) const {
67 return cursor == that.cursor && segments == that.segments;
68 }
69
70 bool operator!=(const iterator& that) const {
71 return !this->operator==(that);
72 }
73
74 const char* operator*() const { return segments->At(cursor); }
75
76 private:
77 const Segments* const segments;
78 size_t cursor;
79 };
80
81 const char* At(size_t cursor) const {
82 const auto& index = indexes[cursor];
83 if (index.second) {
84 return managed[index.first].c_str();
85 } else {
86 return unmanaged[index.first];
87 }
88 }
89
90 size_t Length() const { return indexes.size(); }
91
92 iterator begin() const { return iterator(this, 0); }
93
94 iterator end() const { return iterator(this, indexes.size()); }
95
96 std::string ToString() const {
97 // TODO implement a nested structure to reduce concatenation,
98 // like a purely functional differential list
99 std::ostringstream buffer;
100 for (const char* segment : *this) {
101 buffer << segment;
102 }
103 return buffer.str();
104 }
105
106private:
107 Segments(const Segments&) {}
108
109 std::vector<const char*> unmanaged;
110 std::vector<std::string> managed;
111 // index, managed
112 std::vector<std::pair<size_t, bool>> indexes;
113};
114} // namespace opencc
Definition: Segments.hpp:56
Segmented text.
Definition: Segments.hpp:30