master 562b2ea744d3 cached
3 files
11.0 KB
3.1k tokens
6 symbols
1 requests
Download .txt
Repository: createmomo/CRF-Layer-on-the-Top-of-BiLSTM
Branch: master
Commit: 562b2ea744d3
Files: 3
Total size: 11.0 KB

Directory structure:
gitextract_7n5x16cj/

├── MyCRFLayer.py
├── README.md
└── demo.py

================================================
FILE CONTENTS
================================================

================================================
FILE: MyCRFLayer.py
================================================
import chainer.links as L
import chainer.functions as F
from chainer import variable

import numpy as np
from chainer.functions.math import sum as _sum
from chainer.functions.math import exponential as _exponential

# More details: https://github.com/createmomo/CRF-Layer-on-the-Top-of-BiLSTM
# This code is modified based on https://github.com/glample/tagger

class My_CRF(L.CRF1d):
    def __init__(self, n_label):
        super(My_CRF, self).__init__(n_label)
        with self.init_scope():
            '''
            [Initialization]
            '''
            # Generate random values for transition matrix.
            # The shape of transition matrix is (n_label+2, n_label+2).
            # "2" means the extra added labels, START and END. (see 3.2)
            drange = np.sqrt(6. / (np.sum((n_label + 2, n_label + 2))))
            value = drange * np.random.uniform(low=-1.0, high=1.0, size=(n_label + 2, n_label + 2))
            transitions = np.array(value, dtype=np.float32)
            self.cost = variable.Parameter(transitions)

            # The number of unique labels in training data set (e.g B-Person, I-Person, O)
            self.n_label = n_label

            # The small value will fill the expanded emission score matrix as described in 3.2
            self.small = -1000

    def __call__(self, xs, ys):
        '''
        :param xs: the outputs of BiLSTM layer (the emission score matrix)
        :param ys: the ground truth labels
        :return: CRF loss
        '''
        '''
        Loss Function
        '''

        # Assign new id for extra added labels (START and END)
        b_id = np.array([self.n_label], dtype='i')
        e_id = np.array([self.n_label + 1], dtype='i')

        total_loss = 0.0
        small = self.small

        #Compute crf loss for each sentence
        for xs_i, ys_i in zip(xs,ys):
            s_len = len(xs_i)# how many words does the sentence have

            # Expand the emission score matrix by adding two extra labels (START and END).
            # For more details, please see the example in 3.2
            b_s = np.array([[small] * self.n_label + [0, small]]).astype(np.float32)
            e_s = np.array([[small] * self.n_label + [small, 0]]).astype(np.float32)
            observations = F.concat((xs_i, small * np.ones((s_len,2),dtype='f')),axis=1)
            observations = F.concat((b_s,observations,e_s), axis=0)

            # Compute the real path score according the ground truth labels (see 2.4)
            # Emission score of the real path
            real_path_score = _sum.sum(xs_i[list(range(s_len)), ys_i])

            # Transition score of the real path
            padded_tags_ids = F.concat((b_id, ys_i, e_id), axis=0)

            real_path_score += _sum.sum(self.cost[
                padded_tags_ids[list(range(s_len + 1))].data,
                padded_tags_ids[[current_item + 1 for current_item in range(s_len + 1)]].data
            ])

            # Compute the score of all the possible paths of current sentence (see 2.5)
            all_paths_scores = self.forward(observations, self.cost)

            # The crf cost of current sentence (see 2.5)
            current_cost = - (real_path_score - all_paths_scores)

            total_loss += current_cost

        return total_loss

    def log_sum_exp(self, x, axis=None):
        '''
        Compute the sum of scores in log space (see 2.5).
        This function is used in forward.
        '''
        xmax = F.max(x, axis=axis, keepdims=True)
        xmax_ = F.max(x, axis=axis)

        second_item = _exponential.log(_sum.sum(_exponential.exp(x - F.broadcast_to(xmax,x.shape)),axis=axis))

        return xmax_ + second_item

    def forward(self, observations, transitions,
                viterbi=False,
                return_best_sequence=False
                ):
        '''
        :param observations: (see 2.5) In 2.5, 'obs' are the observations here.
        :param transitions: Transition score matrix
        :param viterbi: When the viterbi and return_best_sequence are true, this method will return the predicted best paths.
        If false, this function will return the sum of scores in log space
        :param return_best_sequence: Please see above.
        :return: The sum of scores in log space or the predicted best sequence
        '''
        '''
        This function is described in detail in 2.5 and 2.6.
        '''
        def recurrence(obs, previous, transitions):
            previous = previous.reshape((previous.shape[0],1))
            obs = obs.reshape((1,obs.shape[0]))
            if viterbi:# Please see 2.6
                scores = F.broadcast_to(previous,(self.n_label+2, self.n_label+2)) + F.broadcast_to(obs,(self.n_label+2, self.n_label+2)) + transitions
                scores = scores.data
                out = scores.max(axis=0)
                if return_best_sequence:
                    out2 = scores.argmax(axis=0)
                    out2 = np.array(out2,dtype='i')
                    return out, out2
            else:# Please see 2.5 (Return the sum of scores in log space)
                previous = F.broadcast_to(previous,(self.n_label+2, self.n_label+2))
                obs = F.broadcast_to(obs,(self.n_label+2, self.n_label+2))
                return self.log_sum_exp(previous + obs + transitions, axis=0)

        def mini_function_for_best_sequence(beta_i, previous):
            return beta_i[previous]

        if return_best_sequence:# Return the best predicted path for one sentence (see 2.6)
            initial_0 = observations[0]
            alpha_0 = np.array(initial_0.data, dtype='f')
            alpha_0 = F.expand_dims(alpha_0,axis=0)

            alpha_1 = None

            flag = True
            for obs in observations[1:]:
                initial_0, initial_1 = recurrence(obs, initial_0, transitions)
                alpha_0 = F.vstack((alpha_0,F.expand_dims(initial_0,axis=0)))

                if flag == True:
                    alpha_1 = np.array(initial_1, dtype='i')
                    alpha_1 = F.expand_dims(alpha_1, axis=0)
                    flag = False

                alpha_1 = F.vstack((alpha_1, F.expand_dims(initial_1, axis=0)))

            alpha_0 = alpha_0.data[1:]

            initial_beta = np.argmax(alpha_0[-1])
            initial_beta = initial_beta.astype('i')
            sequence = np.array(initial_beta,dtype='i')
            sequence = F.expand_dims(sequence,axis=0)

            for item in alpha_1.data[::-1].astype('i'):
                initial_beta = mini_function_for_best_sequence(item,initial_beta)
                sequence = F.concat((sequence, F.expand_dims(np.array(initial_beta), axis=0)), axis=0)

            sequence = sequence[::-1][2:-1]
            sequence = sequence.reshape(1,sequence.shape[0])
            return sequence[0]# Return best path

        else:# Please see 2.5 (Return the sum of scores in log space)
            initial = observations[0]
            alpha = []
            alpha.append(initial)

            for obs in observations[1:]:
                initial = recurrence(obs, initial, transitions)
                alpha.append(initial)

            alpha = alpha[1:]

            return self.log_sum_exp(alpha[-1], axis=0)

    def argmax(self, xs):
        '''
        :param xs: The list of new sentences
        :return: Predicted labels for the new sentences
        '''
        best_sequence = []

        small = self.small

        # Predict the labels for new sentences (Please see 2.6)
        for xs_i in xs:
            s_len = len(xs_i)

            b_s = np.array([[small] * self.n_label + [0, small]]).astype(np.float32)
            e_s = np.array([[small] * self.n_label + [small, 0]]).astype(np.float32)
            observations = F.concat((xs_i, small * np.ones((s_len, 2), dtype='f')), axis=1)
            observations = F.concat((b_s, observations, e_s), axis=0)

            current_best_sequence = self.forward(observations, self.cost, viterbi=True, return_best_sequence=True)
            best_sequence.append(current_best_sequence.data)

        return best_sequence



================================================
FILE: README.md
================================================
# CRF-Layer-on-the-Top-of-BiLSTM (BiLSTM-CRF)
The article series include:
- **Introduction** - the general idea of the CRF layer on the top of BiLSTM for named entity recognition tasks
- **A Detailed Example** -  a toy example to explain how CRF layer works step-by-step
- **Chainer Implementation** - a chainer implementation of the CRF Layer

Links:
  * [CRF Layer on the Top of BiLSTM - 1](https://createmomo.github.io/2017/09/12/CRF_Layer_on_the_Top_of_BiLSTM_1/) Outline and Introduction
  * [CRF Layer on the Top of BiLSTM - 2](https://createmomo.github.io/2017/09/23/CRF_Layer_on_the_Top_of_BiLSTM_2/) CRF Layer (Emission and Transition Score)
  * [CRF Layer on the Top of BiLSTM - 3](https://createmomo.github.io/2017/10/08/CRF-Layer-on-the-Top-of-BiLSTM-3/) CRF Loss Function
  * [CRF Layer on the Top of BiLSTM - 4](https://createmomo.github.io/2017/10/17/CRF-Layer-on-the-Top-of-BiLSTM-4/) Real Path Score
  * [CRF Layer on the Top of BiLSTM - 5](https://createmomo.github.io/2017/11/11/CRF-Layer-on-the-Top-of-BiLSTM-5/) The Total Score of All the Paths
  * [CRF Layer on the Top of BiLSTM - 6](https://createmomo.github.io/2017/11/24/CRF-Layer-on-the-Top-of-BiLSTM-6/) Infer the Labels for a New Sentence
  * [CRF Layer on the Top of BiLSTM - 7](https://createmomo.github.io/2017/12/06/CRF-Layer-on-the-Top-of-BiLSTM-7/) Chainer Implementation Warm Up
  * [CRF Layer on the Top of BiLSTM - 8](https://createmomo.github.io/2017/12/07/CRF-Layer-on-the-Top-of-BiLSTM-8/) Demo Code

GitHub: https://github.com/createmomo/CRF-Layer-on-the-Top-of-BiLSTM

# Wechat Public Account
Please note that: The **Wechat Public Account** is avaiable now! If you found this article is useful and would like to found more information about this series, please subscribe to the public account by your Wechat! **(2020-04-03)**
<img src="/qr_code.jpg" alt="QR Code" title="QR Code" width="393" height="127" />


================================================
FILE: demo.py
================================================
import numpy as np
import chainer

import MyCRFLayer

n_label = 2

a = np.random.uniform(-1, 1, n_label).astype('f')
b = np.random.uniform(-1, 1, n_label).astype('f')

x1 = np.stack([b, a])
x2 = np.stack([a])

xs = [x1, x2]

ys = [np.random.randint(n_label,size = x.shape[0],dtype='i') for x in xs]

my_crf = MyCRFLayer.My_CRF(n_label)

loss = my_crf(xs,ys)

print('Ground Truth:')
for i,y in enumerate(ys):
    print('\tsentence {0}: [{1}]'.format(str(i),' '.join([str(label) for label in y])))

from chainer import optimizers
optimizer = optimizers.SGD(lr=0.01)
optimizer.setup(my_crf)
optimizer.add_hook(chainer.optimizer.GradientClipping(5.0))

print('Predictions:')
for epoch_i in range(201):
    with chainer.using_config('train', True):
        loss = my_crf(xs,ys)

        # update parameters
        optimizer.target.zerograds()
        loss.backward()
        optimizer.update()

    with chainer.using_config('train', False):
        if epoch_i % 50 == 0:
            print('\tEpoch {0}: (loss={1})'.format(str(epoch_i),str(loss.data)))
            for i, prediction in enumerate(my_crf.argmax(xs)):
                print('\t\tsentence {0}: [{1}]'.format(str(i), ' '.join([str(label) for label in prediction])))
Download .txt
gitextract_7n5x16cj/

├── MyCRFLayer.py
├── README.md
└── demo.py
Download .txt
SYMBOL INDEX (6 symbols across 1 files)

FILE: MyCRFLayer.py
  class My_CRF (line 12) | class My_CRF(L.CRF1d):
    method __init__ (line 13) | def __init__(self, n_label):
    method __call__ (line 33) | def __call__(self, xs, ys):
    method log_sum_exp (line 83) | def log_sum_exp(self, x, axis=None):
    method forward (line 95) | def forward(self, observations, transitions,
    method argmax (line 176) | def argmax(self, xs):
Condensed preview — 3 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (12K chars).
[
  {
    "path": "MyCRFLayer.py",
    "chars": 8119,
    "preview": "import chainer.links as L\nimport chainer.functions as F\nfrom chainer import variable\n\nimport numpy as np\nfrom chainer.fu"
  },
  {
    "path": "README.md",
    "chars": 1901,
    "preview": "# CRF-Layer-on-the-Top-of-BiLSTM (BiLSTM-CRF)\nThe article series include:\n- **Introduction** - the general idea of the C"
  },
  {
    "path": "demo.py",
    "chars": 1224,
    "preview": "import numpy as np\nimport chainer\n\nimport MyCRFLayer\n\nn_label = 2\n\na = np.random.uniform(-1, 1, n_label).astype('f')\nb ="
  }
]

About this extraction

This page contains the full source code of the createmomo/CRF-Layer-on-the-Top-of-BiLSTM GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 3 files (11.0 KB), approximately 3.1k tokens, and a symbol index with 6 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!