This repository was archived by the owner on Mar 30, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsplitting_reader.go
More file actions
58 lines (47 loc) · 1.42 KB
/
splitting_reader.go
File metadata and controls
58 lines (47 loc) · 1.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
// Copyright 2015 The conllx Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package conllx
import "errors"
// A FoldSet contains fold numbers. This type is used with a SplittingReader
// to indicate from which folds sentences should be returned.
type FoldSet map[int]interface{}
var _ SentenceReader = &SplittingReader{}
// SplittingReader is a wrapper around a (CoNLL-X) Reader that splits the
// corpus into folds.
type SplittingReader struct {
reader *Reader
nFolds int
folds FoldSet
count int
}
// NewSplittingReader creates a SplittingReader, that splits the data in
// 'nFolds' folds. The reader returns the sentences that are in 'folds'.
func NewSplittingReader(reader *Reader, nFolds int, folds FoldSet) (*SplittingReader, error) {
if nFolds < 1 {
return nil, errors.New("The data should be 'splitted' in at least 1 fold.")
}
return &SplittingReader{
reader: reader,
nFolds: nFolds,
folds: folds,
count: -1,
}, nil
}
// ReadSentence returns the next sentence that is in one of the folds
// requested from the SplittingReader.
func (r *SplittingReader) ReadSentence() (sentence Sentence, err error) {
for {
sentence, err := r.reader.ReadSentence()
if err != nil {
return sentence, err
}
r.count++
if r.count == r.nFolds {
r.count = 0
}
if _, ok := r.folds[r.count]; ok {
return sentence, nil
}
}
}