Converting Tensorflow Graph to use Tensorflow Estimator, getting 'TypeError: data type not understood', at...
up vote
1
down vote
favorite
I am trying to convert a working Tensorflow graph to use Tensorflow Estimator, using a custom Estimator. My model works when I was just using a model and then running it with a session. But when I try to use it with the Estimator API, it's not working.
This is where I defined my model
def my_model( features, labels, mode, params):
train_dataset = features
train_labels = labels
batch_sizeE=params["batch_size"]
embedding_sizeE=params["embedding_size"]
num_inputsE=params["num_inputs"]
num_sampledE=params["num_sampled"]
print(features)
print(labels)
epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
update_epoch = tf.assign(epochCount, epochCount + 1)
embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )
softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )
softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )
segments= np.arange(batch_size).repeat(num_inputs)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
if mode == "train":
sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=64, num_classes=3096637)
loss = tf.reduce_mean( sSML )
optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)
saver = tf.train.Saver()
This is where I call the training
#Define the estimator
word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128
})
word2vecEstimator.train(
input_fn=generate_batch,
steps=10)
And this is the error I get
INFO:tensorflow:Calling model_fn.
<tf.Variable 'softmax_weights:0' shape=(3096637, 50) dtype=float32_ref>
<tf.Variable 'softmax_biases:0' shape=(3096637,) dtype=float32_ref>
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-49-955f44867ee5> in <module>()
1 word2vecEstimator.train(
2 input_fn=generate_batch,
----> 3 steps=10)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
352
353 saving_listeners = _check_listeners_type(saving_listeners)
--> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
355 logging.info('Loss for final step: %s.', loss)
356 return self
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1206 else:
-> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
1208
1209 def _train_model_default(self, input_fn, hooks, saving_listeners):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1235 worker_hooks.extend(input_hooks)
1236 estimator_spec = self._call_model_fn(
-> 1237 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
1238 global_step_tensor = training_util.get_global_step(g)
1239 return self._train_with_estimator_spec(estimator_spec, worker_hooks,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1193
1194 logging.info('Calling model_fn.')
-> 1195 model_fn_results = self._model_fn(features=features, **kwargs)
1196 logging.info('Done calling model_fn.')
1197
<ipython-input-47-95d390a50046> in my_model(features, labels, mode, params)
47
48 sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
---> 49 labels=train_labels, num_sampled=64, num_classes=3096637)
50
51 loss = tf.reduce_mean( sSML )
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)
1347 partition_strategy=partition_strategy,
1348 name=name,
-> 1349 seed=seed)
1350 labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")
1351 sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
1029 with ops.name_scope(name, "compute_sampled_logits",
1030 weights + [biases, inputs, labels]):
-> 1031 if labels.dtype != dtypes.int64:
1032 labels = math_ops.cast(labels, dtypes.int64)
1033 labels_flat = array_ops.reshape(labels, [-1])
TypeError: data type not understood
Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.
https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE
And here is the full code, from the notebook.
import math
import numpy as np
import random
import zipfile
import shutil
from collections import namedtuple
import os
import pprint
import tensorflow as tf
import pandas as pd
import pickle
from numpy import genfromtxt
!pip install -U -q PyDrive
from google.colab import files
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
vocabulary_size = 3096637 #updated 10-25-18 3096636
import gc
dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18
myDownload = drive.CreateFile({'id': dl_id})
myDownload.GetContentFile('Data.npy')
my_data = np.load('Data.npy')
#os.remove('Data.npy')
np.random.shuffle(my_data)
print(my_data[0:15])
data_index = 0
epoch_index = 0
recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models
def generate_batch():
global data_index, epoch_index
features = np.ndarray(shape=(batch_size, num_inputs), dtype=np.int32)
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
n=0
while n < batch_size:
if len( set(my_data[data_index, 1]) ) >= num_inputs:
labels[n,0] = my_data[data_index, 0]
features[n] = random.sample( set(my_data[data_index, 1]), num_inputs)
n = n+1
data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
else:
data_index = (data_index + 1) % len(my_data)
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
return features, labels
def my_model( features, labels, mode, params):
train_dataset = features
train_labels = labels
batch_sizeE=params["batch_size"]
embedding_sizeE=params["embedding_size"]
num_inputsE=params["num_inputs"]
num_sampledE=params["num_sampled"]
print(features)
print(labels)
epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
update_epoch = tf.assign(epochCount, epochCount + 1)
embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )
softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )
softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )
segments= np.arange(batch_size).repeat(num_inputs)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
print(softmax_weights )
print(softmax_biases )
if mode == "train":
sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=64, num_classes=3096637)
loss = tf.reduce_mean( sSML )
optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)
saver = tf.train.Saver()
word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128
})
word2vecEstimator.train(
input_fn=generate_batch,
steps=10)
python tensorflow
This question has an open bounty worth +50
reputation from SantoshGupta7 ending in 6 days.
This question has not received enough attention.
Answer should figure out what is causing my error, and a solution. The solution should result in me being able to train my model without any error.
add a comment |
up vote
1
down vote
favorite
I am trying to convert a working Tensorflow graph to use Tensorflow Estimator, using a custom Estimator. My model works when I was just using a model and then running it with a session. But when I try to use it with the Estimator API, it's not working.
This is where I defined my model
def my_model( features, labels, mode, params):
train_dataset = features
train_labels = labels
batch_sizeE=params["batch_size"]
embedding_sizeE=params["embedding_size"]
num_inputsE=params["num_inputs"]
num_sampledE=params["num_sampled"]
print(features)
print(labels)
epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
update_epoch = tf.assign(epochCount, epochCount + 1)
embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )
softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )
softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )
segments= np.arange(batch_size).repeat(num_inputs)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
if mode == "train":
sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=64, num_classes=3096637)
loss = tf.reduce_mean( sSML )
optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)
saver = tf.train.Saver()
This is where I call the training
#Define the estimator
word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128
})
word2vecEstimator.train(
input_fn=generate_batch,
steps=10)
And this is the error I get
INFO:tensorflow:Calling model_fn.
<tf.Variable 'softmax_weights:0' shape=(3096637, 50) dtype=float32_ref>
<tf.Variable 'softmax_biases:0' shape=(3096637,) dtype=float32_ref>
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-49-955f44867ee5> in <module>()
1 word2vecEstimator.train(
2 input_fn=generate_batch,
----> 3 steps=10)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
352
353 saving_listeners = _check_listeners_type(saving_listeners)
--> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
355 logging.info('Loss for final step: %s.', loss)
356 return self
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1206 else:
-> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
1208
1209 def _train_model_default(self, input_fn, hooks, saving_listeners):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1235 worker_hooks.extend(input_hooks)
1236 estimator_spec = self._call_model_fn(
-> 1237 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
1238 global_step_tensor = training_util.get_global_step(g)
1239 return self._train_with_estimator_spec(estimator_spec, worker_hooks,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1193
1194 logging.info('Calling model_fn.')
-> 1195 model_fn_results = self._model_fn(features=features, **kwargs)
1196 logging.info('Done calling model_fn.')
1197
<ipython-input-47-95d390a50046> in my_model(features, labels, mode, params)
47
48 sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
---> 49 labels=train_labels, num_sampled=64, num_classes=3096637)
50
51 loss = tf.reduce_mean( sSML )
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)
1347 partition_strategy=partition_strategy,
1348 name=name,
-> 1349 seed=seed)
1350 labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")
1351 sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
1029 with ops.name_scope(name, "compute_sampled_logits",
1030 weights + [biases, inputs, labels]):
-> 1031 if labels.dtype != dtypes.int64:
1032 labels = math_ops.cast(labels, dtypes.int64)
1033 labels_flat = array_ops.reshape(labels, [-1])
TypeError: data type not understood
Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.
https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE
And here is the full code, from the notebook.
import math
import numpy as np
import random
import zipfile
import shutil
from collections import namedtuple
import os
import pprint
import tensorflow as tf
import pandas as pd
import pickle
from numpy import genfromtxt
!pip install -U -q PyDrive
from google.colab import files
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
vocabulary_size = 3096637 #updated 10-25-18 3096636
import gc
dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18
myDownload = drive.CreateFile({'id': dl_id})
myDownload.GetContentFile('Data.npy')
my_data = np.load('Data.npy')
#os.remove('Data.npy')
np.random.shuffle(my_data)
print(my_data[0:15])
data_index = 0
epoch_index = 0
recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models
def generate_batch():
global data_index, epoch_index
features = np.ndarray(shape=(batch_size, num_inputs), dtype=np.int32)
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
n=0
while n < batch_size:
if len( set(my_data[data_index, 1]) ) >= num_inputs:
labels[n,0] = my_data[data_index, 0]
features[n] = random.sample( set(my_data[data_index, 1]), num_inputs)
n = n+1
data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
else:
data_index = (data_index + 1) % len(my_data)
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
return features, labels
def my_model( features, labels, mode, params):
train_dataset = features
train_labels = labels
batch_sizeE=params["batch_size"]
embedding_sizeE=params["embedding_size"]
num_inputsE=params["num_inputs"]
num_sampledE=params["num_sampled"]
print(features)
print(labels)
epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
update_epoch = tf.assign(epochCount, epochCount + 1)
embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )
softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )
softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )
segments= np.arange(batch_size).repeat(num_inputs)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
print(softmax_weights )
print(softmax_biases )
if mode == "train":
sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=64, num_classes=3096637)
loss = tf.reduce_mean( sSML )
optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)
saver = tf.train.Saver()
word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128
})
word2vecEstimator.train(
input_fn=generate_batch,
steps=10)
python tensorflow
This question has an open bounty worth +50
reputation from SantoshGupta7 ending in 6 days.
This question has not received enough attention.
Answer should figure out what is causing my error, and a solution. The solution should result in me being able to train my model without any error.
add a comment |
up vote
1
down vote
favorite
up vote
1
down vote
favorite
I am trying to convert a working Tensorflow graph to use Tensorflow Estimator, using a custom Estimator. My model works when I was just using a model and then running it with a session. But when I try to use it with the Estimator API, it's not working.
This is where I defined my model
def my_model( features, labels, mode, params):
train_dataset = features
train_labels = labels
batch_sizeE=params["batch_size"]
embedding_sizeE=params["embedding_size"]
num_inputsE=params["num_inputs"]
num_sampledE=params["num_sampled"]
print(features)
print(labels)
epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
update_epoch = tf.assign(epochCount, epochCount + 1)
embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )
softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )
softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )
segments= np.arange(batch_size).repeat(num_inputs)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
if mode == "train":
sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=64, num_classes=3096637)
loss = tf.reduce_mean( sSML )
optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)
saver = tf.train.Saver()
This is where I call the training
#Define the estimator
word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128
})
word2vecEstimator.train(
input_fn=generate_batch,
steps=10)
And this is the error I get
INFO:tensorflow:Calling model_fn.
<tf.Variable 'softmax_weights:0' shape=(3096637, 50) dtype=float32_ref>
<tf.Variable 'softmax_biases:0' shape=(3096637,) dtype=float32_ref>
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-49-955f44867ee5> in <module>()
1 word2vecEstimator.train(
2 input_fn=generate_batch,
----> 3 steps=10)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
352
353 saving_listeners = _check_listeners_type(saving_listeners)
--> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
355 logging.info('Loss for final step: %s.', loss)
356 return self
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1206 else:
-> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
1208
1209 def _train_model_default(self, input_fn, hooks, saving_listeners):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1235 worker_hooks.extend(input_hooks)
1236 estimator_spec = self._call_model_fn(
-> 1237 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
1238 global_step_tensor = training_util.get_global_step(g)
1239 return self._train_with_estimator_spec(estimator_spec, worker_hooks,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1193
1194 logging.info('Calling model_fn.')
-> 1195 model_fn_results = self._model_fn(features=features, **kwargs)
1196 logging.info('Done calling model_fn.')
1197
<ipython-input-47-95d390a50046> in my_model(features, labels, mode, params)
47
48 sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
---> 49 labels=train_labels, num_sampled=64, num_classes=3096637)
50
51 loss = tf.reduce_mean( sSML )
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)
1347 partition_strategy=partition_strategy,
1348 name=name,
-> 1349 seed=seed)
1350 labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")
1351 sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
1029 with ops.name_scope(name, "compute_sampled_logits",
1030 weights + [biases, inputs, labels]):
-> 1031 if labels.dtype != dtypes.int64:
1032 labels = math_ops.cast(labels, dtypes.int64)
1033 labels_flat = array_ops.reshape(labels, [-1])
TypeError: data type not understood
Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.
https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE
And here is the full code, from the notebook.
import math
import numpy as np
import random
import zipfile
import shutil
from collections import namedtuple
import os
import pprint
import tensorflow as tf
import pandas as pd
import pickle
from numpy import genfromtxt
!pip install -U -q PyDrive
from google.colab import files
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
vocabulary_size = 3096637 #updated 10-25-18 3096636
import gc
dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18
myDownload = drive.CreateFile({'id': dl_id})
myDownload.GetContentFile('Data.npy')
my_data = np.load('Data.npy')
#os.remove('Data.npy')
np.random.shuffle(my_data)
print(my_data[0:15])
data_index = 0
epoch_index = 0
recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models
def generate_batch():
global data_index, epoch_index
features = np.ndarray(shape=(batch_size, num_inputs), dtype=np.int32)
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
n=0
while n < batch_size:
if len( set(my_data[data_index, 1]) ) >= num_inputs:
labels[n,0] = my_data[data_index, 0]
features[n] = random.sample( set(my_data[data_index, 1]), num_inputs)
n = n+1
data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
else:
data_index = (data_index + 1) % len(my_data)
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
return features, labels
def my_model( features, labels, mode, params):
train_dataset = features
train_labels = labels
batch_sizeE=params["batch_size"]
embedding_sizeE=params["embedding_size"]
num_inputsE=params["num_inputs"]
num_sampledE=params["num_sampled"]
print(features)
print(labels)
epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
update_epoch = tf.assign(epochCount, epochCount + 1)
embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )
softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )
softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )
segments= np.arange(batch_size).repeat(num_inputs)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
print(softmax_weights )
print(softmax_biases )
if mode == "train":
sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=64, num_classes=3096637)
loss = tf.reduce_mean( sSML )
optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)
saver = tf.train.Saver()
word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128
})
word2vecEstimator.train(
input_fn=generate_batch,
steps=10)
python tensorflow
I am trying to convert a working Tensorflow graph to use Tensorflow Estimator, using a custom Estimator. My model works when I was just using a model and then running it with a session. But when I try to use it with the Estimator API, it's not working.
This is where I defined my model
def my_model( features, labels, mode, params):
train_dataset = features
train_labels = labels
batch_sizeE=params["batch_size"]
embedding_sizeE=params["embedding_size"]
num_inputsE=params["num_inputs"]
num_sampledE=params["num_sampled"]
print(features)
print(labels)
epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
update_epoch = tf.assign(epochCount, epochCount + 1)
embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )
softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )
softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )
segments= np.arange(batch_size).repeat(num_inputs)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
if mode == "train":
sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=64, num_classes=3096637)
loss = tf.reduce_mean( sSML )
optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)
saver = tf.train.Saver()
This is where I call the training
#Define the estimator
word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128
})
word2vecEstimator.train(
input_fn=generate_batch,
steps=10)
And this is the error I get
INFO:tensorflow:Calling model_fn.
<tf.Variable 'softmax_weights:0' shape=(3096637, 50) dtype=float32_ref>
<tf.Variable 'softmax_biases:0' shape=(3096637,) dtype=float32_ref>
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-49-955f44867ee5> in <module>()
1 word2vecEstimator.train(
2 input_fn=generate_batch,
----> 3 steps=10)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
352
353 saving_listeners = _check_listeners_type(saving_listeners)
--> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
355 logging.info('Loss for final step: %s.', loss)
356 return self
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1206 else:
-> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
1208
1209 def _train_model_default(self, input_fn, hooks, saving_listeners):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1235 worker_hooks.extend(input_hooks)
1236 estimator_spec = self._call_model_fn(
-> 1237 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
1238 global_step_tensor = training_util.get_global_step(g)
1239 return self._train_with_estimator_spec(estimator_spec, worker_hooks,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1193
1194 logging.info('Calling model_fn.')
-> 1195 model_fn_results = self._model_fn(features=features, **kwargs)
1196 logging.info('Done calling model_fn.')
1197
<ipython-input-47-95d390a50046> in my_model(features, labels, mode, params)
47
48 sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
---> 49 labels=train_labels, num_sampled=64, num_classes=3096637)
50
51 loss = tf.reduce_mean( sSML )
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)
1347 partition_strategy=partition_strategy,
1348 name=name,
-> 1349 seed=seed)
1350 labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")
1351 sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
1029 with ops.name_scope(name, "compute_sampled_logits",
1030 weights + [biases, inputs, labels]):
-> 1031 if labels.dtype != dtypes.int64:
1032 labels = math_ops.cast(labels, dtypes.int64)
1033 labels_flat = array_ops.reshape(labels, [-1])
TypeError: data type not understood
Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.
https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE
And here is the full code, from the notebook.
import math
import numpy as np
import random
import zipfile
import shutil
from collections import namedtuple
import os
import pprint
import tensorflow as tf
import pandas as pd
import pickle
from numpy import genfromtxt
!pip install -U -q PyDrive
from google.colab import files
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
vocabulary_size = 3096637 #updated 10-25-18 3096636
import gc
dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18
myDownload = drive.CreateFile({'id': dl_id})
myDownload.GetContentFile('Data.npy')
my_data = np.load('Data.npy')
#os.remove('Data.npy')
np.random.shuffle(my_data)
print(my_data[0:15])
data_index = 0
epoch_index = 0
recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models
def generate_batch():
global data_index, epoch_index
features = np.ndarray(shape=(batch_size, num_inputs), dtype=np.int32)
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
n=0
while n < batch_size:
if len( set(my_data[data_index, 1]) ) >= num_inputs:
labels[n,0] = my_data[data_index, 0]
features[n] = random.sample( set(my_data[data_index, 1]), num_inputs)
n = n+1
data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
else:
data_index = (data_index + 1) % len(my_data)
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
return features, labels
def my_model( features, labels, mode, params):
train_dataset = features
train_labels = labels
batch_sizeE=params["batch_size"]
embedding_sizeE=params["embedding_size"]
num_inputsE=params["num_inputs"]
num_sampledE=params["num_sampled"]
print(features)
print(labels)
epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
update_epoch = tf.assign(epochCount, epochCount + 1)
embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )
softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )
softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )
segments= np.arange(batch_size).repeat(num_inputs)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
print(softmax_weights )
print(softmax_biases )
if mode == "train":
sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=64, num_classes=3096637)
loss = tf.reduce_mean( sSML )
optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)
saver = tf.train.Saver()
word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128
})
word2vecEstimator.train(
input_fn=generate_batch,
steps=10)
python tensorflow
python tensorflow
edited Nov 21 at 5:40
asked Nov 21 at 5:17
SantoshGupta7
5851513
5851513
This question has an open bounty worth +50
reputation from SantoshGupta7 ending in 6 days.
This question has not received enough attention.
Answer should figure out what is causing my error, and a solution. The solution should result in me being able to train my model without any error.
This question has an open bounty worth +50
reputation from SantoshGupta7 ending in 6 days.
This question has not received enough attention.
Answer should figure out what is causing my error, and a solution. The solution should result in me being able to train my model without any error.
add a comment |
add a comment |
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53405657%2fconverting-tensorflow-graph-to-use-tensorflow-estimator-getting-typeerror-dat%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown