Repository: createmomo/CRF-Layer-on-the-Top-of-BiLSTM
Branch: master
Commit: 562b2ea744d3
Files: 3
Total size: 11.0 KB
Directory structure:
gitextract_7n5x16cj/
├── MyCRFLayer.py
├── README.md
└── demo.py
================================================
FILE CONTENTS
================================================
================================================
FILE: MyCRFLayer.py
================================================
import chainer.links as L
import chainer.functions as F
from chainer import variable
import numpy as np
from chainer.functions.math import sum as _sum
from chainer.functions.math import exponential as _exponential
# More details: https://github.com/createmomo/CRF-Layer-on-the-Top-of-BiLSTM
# This code is modified based on https://github.com/glample/tagger
class My_CRF(L.CRF1d):
def __init__(self, n_label):
super(My_CRF, self).__init__(n_label)
with self.init_scope():
'''
[Initialization]
'''
# Generate random values for transition matrix.
# The shape of transition matrix is (n_label+2, n_label+2).
# "2" means the extra added labels, START and END. (see 3.2)
drange = np.sqrt(6. / (np.sum((n_label + 2, n_label + 2))))
value = drange * np.random.uniform(low=-1.0, high=1.0, size=(n_label + 2, n_label + 2))
transitions = np.array(value, dtype=np.float32)
self.cost = variable.Parameter(transitions)
# The number of unique labels in training data set (e.g B-Person, I-Person, O)
self.n_label = n_label
# The small value will fill the expanded emission score matrix as described in 3.2
self.small = -1000
def __call__(self, xs, ys):
'''
:param xs: the outputs of BiLSTM layer (the emission score matrix)
:param ys: the ground truth labels
:return: CRF loss
'''
'''
Loss Function
'''
# Assign new id for extra added labels (START and END)
b_id = np.array([self.n_label], dtype='i')
e_id = np.array([self.n_label + 1], dtype='i')
total_loss = 0.0
small = self.small
#Compute crf loss for each sentence
for xs_i, ys_i in zip(xs,ys):
s_len = len(xs_i)# how many words does the sentence have
# Expand the emission score matrix by adding two extra labels (START and END).
# For more details, please see the example in 3.2
b_s = np.array([[small] * self.n_label + [0, small]]).astype(np.float32)
e_s = np.array([[small] * self.n_label + [small, 0]]).astype(np.float32)
observations = F.concat((xs_i, small * np.ones((s_len,2),dtype='f')),axis=1)
observations = F.concat((b_s,observations,e_s), axis=0)
# Compute the real path score according the ground truth labels (see 2.4)
# Emission score of the real path
real_path_score = _sum.sum(xs_i[list(range(s_len)), ys_i])
# Transition score of the real path
padded_tags_ids = F.concat((b_id, ys_i, e_id), axis=0)
real_path_score += _sum.sum(self.cost[
padded_tags_ids[list(range(s_len + 1))].data,
padded_tags_ids[[current_item + 1 for current_item in range(s_len + 1)]].data
])
# Compute the score of all the possible paths of current sentence (see 2.5)
all_paths_scores = self.forward(observations, self.cost)
# The crf cost of current sentence (see 2.5)
current_cost = - (real_path_score - all_paths_scores)
total_loss += current_cost
return total_loss
def log_sum_exp(self, x, axis=None):
'''
Compute the sum of scores in log space (see 2.5).
This function is used in forward.
'''
xmax = F.max(x, axis=axis, keepdims=True)
xmax_ = F.max(x, axis=axis)
second_item = _exponential.log(_sum.sum(_exponential.exp(x - F.broadcast_to(xmax,x.shape)),axis=axis))
return xmax_ + second_item
def forward(self, observations, transitions,
viterbi=False,
return_best_sequence=False
):
'''
:param observations: (see 2.5) In 2.5, 'obs' are the observations here.
:param transitions: Transition score matrix
:param viterbi: When the viterbi and return_best_sequence are true, this method will return the predicted best paths.
If false, this function will return the sum of scores in log space
:param return_best_sequence: Please see above.
:return: The sum of scores in log space or the predicted best sequence
'''
'''
This function is described in detail in 2.5 and 2.6.
'''
def recurrence(obs, previous, transitions):
previous = previous.reshape((previous.shape[0],1))
obs = obs.reshape((1,obs.shape[0]))
if viterbi:# Please see 2.6
scores = F.broadcast_to(previous,(self.n_label+2, self.n_label+2)) + F.broadcast_to(obs,(self.n_label+2, self.n_label+2)) + transitions
scores = scores.data
out = scores.max(axis=0)
if return_best_sequence:
out2 = scores.argmax(axis=0)
out2 = np.array(out2,dtype='i')
return out, out2
else:# Please see 2.5 (Return the sum of scores in log space)
previous = F.broadcast_to(previous,(self.n_label+2, self.n_label+2))
obs = F.broadcast_to(obs,(self.n_label+2, self.n_label+2))
return self.log_sum_exp(previous + obs + transitions, axis=0)
def mini_function_for_best_sequence(beta_i, previous):
return beta_i[previous]
if return_best_sequence:# Return the best predicted path for one sentence (see 2.6)
initial_0 = observations[0]
alpha_0 = np.array(initial_0.data, dtype='f')
alpha_0 = F.expand_dims(alpha_0,axis=0)
alpha_1 = None
flag = True
for obs in observations[1:]:
initial_0, initial_1 = recurrence(obs, initial_0, transitions)
alpha_0 = F.vstack((alpha_0,F.expand_dims(initial_0,axis=0)))
if flag == True:
alpha_1 = np.array(initial_1, dtype='i')
alpha_1 = F.expand_dims(alpha_1, axis=0)
flag = False
alpha_1 = F.vstack((alpha_1, F.expand_dims(initial_1, axis=0)))
alpha_0 = alpha_0.data[1:]
initial_beta = np.argmax(alpha_0[-1])
initial_beta = initial_beta.astype('i')
sequence = np.array(initial_beta,dtype='i')
sequence = F.expand_dims(sequence,axis=0)
for item in alpha_1.data[::-1].astype('i'):
initial_beta = mini_function_for_best_sequence(item,initial_beta)
sequence = F.concat((sequence, F.expand_dims(np.array(initial_beta), axis=0)), axis=0)
sequence = sequence[::-1][2:-1]
sequence = sequence.reshape(1,sequence.shape[0])
return sequence[0]# Return best path
else:# Please see 2.5 (Return the sum of scores in log space)
initial = observations[0]
alpha = []
alpha.append(initial)
for obs in observations[1:]:
initial = recurrence(obs, initial, transitions)
alpha.append(initial)
alpha = alpha[1:]
return self.log_sum_exp(alpha[-1], axis=0)
def argmax(self, xs):
'''
:param xs: The list of new sentences
:return: Predicted labels for the new sentences
'''
best_sequence = []
small = self.small
# Predict the labels for new sentences (Please see 2.6)
for xs_i in xs:
s_len = len(xs_i)
b_s = np.array([[small] * self.n_label + [0, small]]).astype(np.float32)
e_s = np.array([[small] * self.n_label + [small, 0]]).astype(np.float32)
observations = F.concat((xs_i, small * np.ones((s_len, 2), dtype='f')), axis=1)
observations = F.concat((b_s, observations, e_s), axis=0)
current_best_sequence = self.forward(observations, self.cost, viterbi=True, return_best_sequence=True)
best_sequence.append(current_best_sequence.data)
return best_sequence
================================================
FILE: README.md
================================================
# CRF-Layer-on-the-Top-of-BiLSTM (BiLSTM-CRF)
The article series include:
- **Introduction** - the general idea of the CRF layer on the top of BiLSTM for named entity recognition tasks
- **A Detailed Example** - a toy example to explain how CRF layer works step-by-step
- **Chainer Implementation** - a chainer implementation of the CRF Layer
Links:
* [CRF Layer on the Top of BiLSTM - 1](https://createmomo.github.io/2017/09/12/CRF_Layer_on_the_Top_of_BiLSTM_1/) Outline and Introduction
* [CRF Layer on the Top of BiLSTM - 2](https://createmomo.github.io/2017/09/23/CRF_Layer_on_the_Top_of_BiLSTM_2/) CRF Layer (Emission and Transition Score)
* [CRF Layer on the Top of BiLSTM - 3](https://createmomo.github.io/2017/10/08/CRF-Layer-on-the-Top-of-BiLSTM-3/) CRF Loss Function
* [CRF Layer on the Top of BiLSTM - 4](https://createmomo.github.io/2017/10/17/CRF-Layer-on-the-Top-of-BiLSTM-4/) Real Path Score
* [CRF Layer on the Top of BiLSTM - 5](https://createmomo.github.io/2017/11/11/CRF-Layer-on-the-Top-of-BiLSTM-5/) The Total Score of All the Paths
* [CRF Layer on the Top of BiLSTM - 6](https://createmomo.github.io/2017/11/24/CRF-Layer-on-the-Top-of-BiLSTM-6/) Infer the Labels for a New Sentence
* [CRF Layer on the Top of BiLSTM - 7](https://createmomo.github.io/2017/12/06/CRF-Layer-on-the-Top-of-BiLSTM-7/) Chainer Implementation Warm Up
* [CRF Layer on the Top of BiLSTM - 8](https://createmomo.github.io/2017/12/07/CRF-Layer-on-the-Top-of-BiLSTM-8/) Demo Code
GitHub: https://github.com/createmomo/CRF-Layer-on-the-Top-of-BiLSTM
# Wechat Public Account
Please note that: The **Wechat Public Account** is avaiable now! If you found this article is useful and would like to found more information about this series, please subscribe to the public account by your Wechat! **(2020-04-03)**
<img src="/qr_code.jpg" alt="QR Code" title="QR Code" width="393" height="127" />
================================================
FILE: demo.py
================================================
import numpy as np
import chainer
import MyCRFLayer
n_label = 2
a = np.random.uniform(-1, 1, n_label).astype('f')
b = np.random.uniform(-1, 1, n_label).astype('f')
x1 = np.stack([b, a])
x2 = np.stack([a])
xs = [x1, x2]
ys = [np.random.randint(n_label,size = x.shape[0],dtype='i') for x in xs]
my_crf = MyCRFLayer.My_CRF(n_label)
loss = my_crf(xs,ys)
print('Ground Truth:')
for i,y in enumerate(ys):
print('\tsentence {0}: [{1}]'.format(str(i),' '.join([str(label) for label in y])))
from chainer import optimizers
optimizer = optimizers.SGD(lr=0.01)
optimizer.setup(my_crf)
optimizer.add_hook(chainer.optimizer.GradientClipping(5.0))
print('Predictions:')
for epoch_i in range(201):
with chainer.using_config('train', True):
loss = my_crf(xs,ys)
# update parameters
optimizer.target.zerograds()
loss.backward()
optimizer.update()
with chainer.using_config('train', False):
if epoch_i % 50 == 0:
print('\tEpoch {0}: (loss={1})'.format(str(epoch_i),str(loss.data)))
for i, prediction in enumerate(my_crf.argmax(xs)):
print('\t\tsentence {0}: [{1}]'.format(str(i), ' '.join([str(label) for label in prediction])))
gitextract_7n5x16cj/ ├── MyCRFLayer.py ├── README.md └── demo.py
SYMBOL INDEX (6 symbols across 1 files)
FILE: MyCRFLayer.py
class My_CRF (line 12) | class My_CRF(L.CRF1d):
method __init__ (line 13) | def __init__(self, n_label):
method __call__ (line 33) | def __call__(self, xs, ys):
method log_sum_exp (line 83) | def log_sum_exp(self, x, axis=None):
method forward (line 95) | def forward(self, observations, transitions,
method argmax (line 176) | def argmax(self, xs):
Condensed preview — 3 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (12K chars).
[
{
"path": "MyCRFLayer.py",
"chars": 8119,
"preview": "import chainer.links as L\nimport chainer.functions as F\nfrom chainer import variable\n\nimport numpy as np\nfrom chainer.fu"
},
{
"path": "README.md",
"chars": 1901,
"preview": "# CRF-Layer-on-the-Top-of-BiLSTM (BiLSTM-CRF)\nThe article series include:\n- **Introduction** - the general idea of the C"
},
{
"path": "demo.py",
"chars": 1224,
"preview": "import numpy as np\nimport chainer\n\nimport MyCRFLayer\n\nn_label = 2\n\na = np.random.uniform(-1, 1, n_label).astype('f')\nb ="
}
]
About this extraction
This page contains the full source code of the createmomo/CRF-Layer-on-the-Top-of-BiLSTM GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 3 files (11.0 KB), approximately 3.1k tokens, and a symbol index with 6 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.