Skip to content

Commit fd764ad

Browse files
authored
20200501 fbsync (#750)
1 parent 85be3b7 commit fd764ad

File tree

6 files changed

+9
-16
lines changed

6 files changed

+9
-16
lines changed

.python3

Whitespace-only changes.

test/test_vocab.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def test_vocab_download_fasttext_vectors(self):
132132
def test_vocab_extend(self):
133133
c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
134134
# Build a vocab and get vectors twice to test caching.
135-
for i in range(2):
135+
for _ in range(2):
136136
f = FastText(language='simple')
137137
v = vocab.Vocab(c, min_freq=3, specials=['<unk>', '<pad>', '<bos>'],
138138
vectors=f)
@@ -163,7 +163,7 @@ def test_vocab_extend(self):
163163
def test_vocab_download_custom_vectors(self):
164164
c = Counter({'hello': 4, 'world': 3, 'ᑌᑎIᑕOᗪᕮ_Tᕮ᙭T': 5, 'freq_too_low': 2})
165165
# Build a vocab and get vectors twice to test caching.
166-
for i in range(2):
166+
for _ in range(2):
167167
v = vocab.Vocab(c, min_freq=3, specials=['<unk>', '<pad>', '<bos>'],
168168
vectors=Vectors('wiki.simple.vec',
169169
url=FastText.url_base.format('simple')))

torchtext/data/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from functools import partial
77

88

9-
def _split_tokenizer(x):
9+
def _split_tokenizer(x): # noqa: F821
1010
# type: (str) -> List[str]
1111
return x.split()
1212

torchtext/experimental/datasets/raw/text_classification.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import torch
22
import io
33
from torchtext.utils import download_from_url, extract_archive, unicode_csv_reader
4-
import sys
54

65
URLS = {
76
'AG_NEWS':
@@ -55,15 +54,9 @@ def __iter__(self):
5554
self.setup_iter()
5655

5756
for i, item in enumerate(self._iterator):
58-
if i == self.start:
59-
break
60-
61-
num_lines = self.num_lines if self.num_lines is not None else sys.maxsize
62-
for _ in range(num_lines):
63-
yield item
64-
try:
65-
item = next(self._iterator)
66-
except StopIteration:
57+
if i >= self.start:
58+
yield item
59+
if self.num_lines is not None and i == (self.start + self.num_lines):
6760
break
6861

6962
def get_iterator(self):

torchtext/experimental/datasets/text_classification.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def _forward(token_list):
2727

2828
def build_vocab(data, transforms):
2929
tok_list = []
30-
for (label, txt) in data:
30+
for _, txt in data:
3131
tok_list.append(transforms(txt))
3232
return build_vocab_from_iterator(tok_list)
3333

torchtext/vocab.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class Vocab(object):
3131
# TODO (@mttk): Populate classs with default values of special symbols
3232
UNK = '<unk>'
3333

34-
def __init__(self, counter, max_size=None, min_freq=1, specials=['<unk>', '<pad>'],
34+
def __init__(self, counter, max_size=None, min_freq=1, specials=('<unk>', '<pad>'),
3535
vectors=None, unk_init=None, vectors_cache=None, specials_first=True):
3636
"""Create a Vocab object from a collections.Counter.
3737
@@ -218,7 +218,7 @@ def set_vectors(self, stoi, vectors, dim, unk_init=torch.Tensor.zero_):
218218

219219
class SubwordVocab(Vocab):
220220

221-
def __init__(self, counter, max_size=None, specials=['<pad>'],
221+
def __init__(self, counter, max_size=None, specials=('<pad>'),
222222
vectors=None, unk_init=torch.Tensor.zero_):
223223
"""Create a revtok subword vocabulary from a collections.Counter.
224224

0 commit comments

Comments
 (0)