【发布时间】:2021-05-23 02:12:49
【问题描述】:
最近遇到这样一个问题:ValueError: 大小为零的数组直到强制转换操作的最大值,没有标识。
import tensorflow as tf
import unicodedata
import string
import numpy as np
import re
import matplotlib.pyplot as plt
keras = tf.keras
class Lang(object):
def __init__(self, name):
self.name = name
self.word2int = {} #maps words to integers
self.word2count = {} #maps words to their total number in the corpus
self.int2word = {0 : "SOS", 1 : "EOS"} #maps integers to tokens (just the opposite of word2int but has some initial values. EOS means End of Sentence and it's a token used to indicate the end of a sentence. Every sentence is going to have an EOS token. SOS means Start of Sentence and is used to indicate the start of a sentence.)
self.n_words = 2 #Intial number of tokens (EOS and SOS)
def addWord(self, word):
if word not in self.word2int:
self.word2int[word] = self.n_words
self.word2count[word] = 1
self.int2word[self.n_words] = word
self.n_words += 1
else:
self.word2count[word] += 1
def addSentence(self, sentence):
for word in sentence.split(" "):
self.addWord(word)
def unicodeToAscii(s):
return "".join(c for c in unicodedata.normalize("NFD", s) \
if unicodedata.category(c) != "Mn")
def normalizeString(s):
s = unicodeToAscii(s.lower().strip())
s = re.sub(r"([!.?])", r" \1", s)
s = re.sub(r"[^a-zA-Z?.!]+", " ", s)
return s
def load_dataset():
with open("en_fr.txt",'r') as f:
lines = f.readlines()
pairs = [[normalizeString(pair) for pair in
line.strip().split('\t')] for line in lines]
return pairs
def sentencetoIndexes(sentence, lang):
indexes = [lang.word2int[word] for word in sentence.split()]
indexes.append(EOS_token)
return indexes
SOS_token = 0
EOS_token = 1
pairs = load_dataset()
MAX_LENGTH = 50
def sentencetoIndexes(sentence, lang):
indexes = [lang.word2int[word] for word in sentence.split()]
indexes.append(EOS_token)
return indexes
def filterPair(p):
try:
return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH
except:
return False
def filterPairs(pairs):
return [pair for pair in range(len(pairs)) if filterPair(pair)]
pairs = filterPairs(pairs)
def build_lang(lang1, lang2, max_length=50):
input_lang = Lang(lang1)
output_lang = Lang(lang2)
input_seq = []
output_seq = []
for pair in pairs:
input_lang.addSentence(pair[1])
output_lang.addSentence(pair[0])
for pair in pairs:
input_seq.append(sentencetoIndexes(pair[1], input_lang))
output_seq.append(sentencetoIndexes(pair[0], output_lang))
return (
keras.preprocessing.sequence.pad_sequences(input_seq, maxlen=max_length, padding='post', truncating='post'),
keras.preprocessing.sequence.pad_sequences(output_seq, padding='post', truncating='post'),
input_lang, output_lang
)
什么时候做
input_tensor, output_tensor, input_lang, output_lang = build_lang('en', 'fra')
给出这个错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-12-259feac15862> in <module>
----> 1 input_tensor, output_tensor, input_lang, output_lang = build_lang('en', 'fra')
<ipython-input-10-d20934657bc2> in build_lang(lang1, lang2, max_length)
12 output_seq.append(sentencetoIndexes(pair[0], output_lang))
13 return keras.preprocessing.sequence.pad_sequences(input_seq, maxlen=max_length, padding='post',
---> 14 truncating='post'), keras.preprocessing.sequence.pad_sequences(output_seq, padding='post', truncating='post'), input_lang, output_lang
c:\users\zealottv\appdata\local\programs\python\python38\lib\site-packages\tensorflow\python\keras\preprocessing\sequence.py in pad_sequences(sequences, maxlen, dtype, padding, truncating, value)
154 or in case of invalid shape for a `sequences` entry.
155 """
--> 156 return sequence.pad_sequences(
157 sequences, maxlen=maxlen, dtype=dtype,
158 padding=padding, truncating=truncating, value=value)
c:\users\zealottv\appdata\local\programs\python\python38\lib\site-packages\keras_preprocessing\sequence.py in pad_sequences(sequences, maxlen, dtype, padding, truncating, value)
75
76 if maxlen is None:
---> 77 maxlen = np.max(lengths)
78
79 is_dtype_str = np.issubdtype(dtype, np.str_) or np.issubdtype(dtype, np.unicode_)
<__array_function__ internals> in amax(*args, **kwargs)
c:\users\zealottv\appdata\local\programs\python\python38\lib\site-packages\numpy\core\fromnumeric.py in amax(a, axis, out, keepdims, initial, where)
2665 5
2666 """
-> 2667 return _wrapreduction(a, np.maximum, 'max', axis, None, out,
2668 keepdims=keepdims, initial=initial, where=where)
2669
c:\users\zealottv\appdata\local\programs\python\python38\lib\site-packages\numpy\core\fromnumeric.py in _wrapreduction(obj, ufunc, method, axis, dtype, out, **kwargs)
88 return reduction(axis=axis, out=out, **passkwargs)
89
---> 90 return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
91
92
ValueError: zero-size array to reduction operation maximum which has no identity
full code 可以在这里下载。
【问题讨论】:
-
公认的做法是在问题中包含所有相关代码,而不是包含代码链接
-
例如,您在使用
filter_pairs时会出错,因为您希望它返回一个包含 2 个字符串元素的列表列表,而实际上它返回的是一个整数列表。 -
但是您没有在问题中包含此代码。
-
我添加了其余的代码。 (我在Jupiter Notebook上写的)对不起,我是第一次来这里!
-
没问题,只是为以后的问题提供建议。但请注意,我之前关于
filter_pairs的评论仍然有效