Converting Tensorflow Graph to use Tensorflow Estimator, getting 'TypeError: data type not understood', at...











up vote
1
down vote

favorite












I am trying to convert a working Tensorflow graph to use Tensorflow Estimator, using a custom Estimator. My model works when I was just using a model and then running it with a session. But when I try to use it with the Estimator API, it's not working.



This is where I defined my model



def my_model( features, labels, mode, params):

train_dataset = features
train_labels = labels

batch_sizeE=params["batch_size"]
embedding_sizeE=params["embedding_size"]
num_inputsE=params["num_inputs"]
num_sampledE=params["num_sampled"]

print(features)
print(labels)

epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
update_epoch = tf.assign(epochCount, epochCount + 1)

embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )

softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )

softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )

embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is

embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )

segments= np.arange(batch_size).repeat(num_inputs)

averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)

if mode == "train":

sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=64, num_classes=3096637)

loss = tf.reduce_mean( sSML )

optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)

saver = tf.train.Saver()


This is where I call the training



#Define the estimator
word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128
})

word2vecEstimator.train(
input_fn=generate_batch,
steps=10)


And this is the error I get



INFO:tensorflow:Calling model_fn.

<tf.Variable 'softmax_weights:0' shape=(3096637, 50) dtype=float32_ref>
<tf.Variable 'softmax_biases:0' shape=(3096637,) dtype=float32_ref>
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-49-955f44867ee5> in <module>()
1 word2vecEstimator.train(
2 input_fn=generate_batch,
----> 3 steps=10)

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
352
353 saving_listeners = _check_listeners_type(saving_listeners)
--> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
355 logging.info('Loss for final step: %s.', loss)
356 return self

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1206 else:
-> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
1208
1209 def _train_model_default(self, input_fn, hooks, saving_listeners):

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1235 worker_hooks.extend(input_hooks)
1236 estimator_spec = self._call_model_fn(
-> 1237 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
1238 global_step_tensor = training_util.get_global_step(g)
1239 return self._train_with_estimator_spec(estimator_spec, worker_hooks,

/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1193
1194 logging.info('Calling model_fn.')
-> 1195 model_fn_results = self._model_fn(features=features, **kwargs)
1196 logging.info('Done calling model_fn.')
1197

<ipython-input-47-95d390a50046> in my_model(features, labels, mode, params)
47
48 sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
---> 49 labels=train_labels, num_sampled=64, num_classes=3096637)
50
51 loss = tf.reduce_mean( sSML )

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)
1347 partition_strategy=partition_strategy,
1348 name=name,
-> 1349 seed=seed)
1350 labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")
1351 sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
1029 with ops.name_scope(name, "compute_sampled_logits",
1030 weights + [biases, inputs, labels]):
-> 1031 if labels.dtype != dtypes.int64:
1032 labels = math_ops.cast(labels, dtypes.int64)
1033 labels_flat = array_ops.reshape(labels, [-1])

TypeError: data type not understood


Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.



https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE



And here is the full code, from the notebook.



import math
import numpy as np
import random
import zipfile
import shutil
from collections import namedtuple

import os
import pprint

import tensorflow as tf

import pandas as pd
import pickle
from numpy import genfromtxt

!pip install -U -q PyDrive

from google.colab import files
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

vocabulary_size = 3096637 #updated 10-25-18 3096636

import gc



dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18

myDownload = drive.CreateFile({'id': dl_id})
myDownload.GetContentFile('Data.npy')
my_data = np.load('Data.npy')
#os.remove('Data.npy')
np.random.shuffle(my_data)
print(my_data[0:15])

data_index = 0
epoch_index = 0
recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models

def generate_batch():
global data_index, epoch_index

features = np.ndarray(shape=(batch_size, num_inputs), dtype=np.int32)
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)

n=0
while n < batch_size:
if len( set(my_data[data_index, 1]) ) >= num_inputs:
labels[n,0] = my_data[data_index, 0]
features[n] = random.sample( set(my_data[data_index, 1]), num_inputs)
n = n+1
data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)
else:
data_index = (data_index + 1) % len(my_data)
if data_index == 0:
epoch_index = epoch_index + 1
print('Completed %d Epochs' % epoch_index)

return features, labels



def my_model( features, labels, mode, params):

train_dataset = features
train_labels = labels

batch_sizeE=params["batch_size"]
embedding_sizeE=params["embedding_size"]
num_inputsE=params["num_inputs"]
num_sampledE=params["num_sampled"]

print(features)
print(labels)

epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
update_epoch = tf.assign(epochCount, epochCount + 1)

embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )

softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )

softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )

embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is

embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )

segments= np.arange(batch_size).repeat(num_inputs)

averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)

print(softmax_weights )
print(softmax_biases )

if mode == "train":

sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=64, num_classes=3096637)

loss = tf.reduce_mean( sSML )

optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)

saver = tf.train.Saver()



word2vecEstimator = tf.estimator.Estimator(
model_fn=my_model,
params={
'batch_size': 16,
'embedding_size': 10,
'num_inputs': 3,
'num_sampled': 128
})

word2vecEstimator.train(
input_fn=generate_batch,
steps=10)









share|improve this question

















This question has an open bounty worth +50
reputation from SantoshGupta7 ending in 6 days.


This question has not received enough attention.


Answer should figure out what is causing my error, and a solution. The solution should result in me being able to train my model without any error.




















    up vote
    1
    down vote

    favorite












    I am trying to convert a working Tensorflow graph to use Tensorflow Estimator, using a custom Estimator. My model works when I was just using a model and then running it with a session. But when I try to use it with the Estimator API, it's not working.



    This is where I defined my model



    def my_model( features, labels, mode, params):

    train_dataset = features
    train_labels = labels

    batch_sizeE=params["batch_size"]
    embedding_sizeE=params["embedding_size"]
    num_inputsE=params["num_inputs"]
    num_sampledE=params["num_sampled"]

    print(features)
    print(labels)

    epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
    update_epoch = tf.assign(epochCount, epochCount + 1)

    embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
    initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )

    softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
    initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
    stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )

    softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
    initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )

    embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is

    embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )

    segments= np.arange(batch_size).repeat(num_inputs)

    averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)

    if mode == "train":

    sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
    labels=train_labels, num_sampled=64, num_classes=3096637)

    loss = tf.reduce_mean( sSML )

    optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)

    saver = tf.train.Saver()


    This is where I call the training



    #Define the estimator
    word2vecEstimator = tf.estimator.Estimator(
    model_fn=my_model,
    params={
    'batch_size': 16,
    'embedding_size': 10,
    'num_inputs': 3,
    'num_sampled': 128
    })

    word2vecEstimator.train(
    input_fn=generate_batch,
    steps=10)


    And this is the error I get



    INFO:tensorflow:Calling model_fn.

    <tf.Variable 'softmax_weights:0' shape=(3096637, 50) dtype=float32_ref>
    <tf.Variable 'softmax_biases:0' shape=(3096637,) dtype=float32_ref>
    ---------------------------------------------------------------------------
    TypeError Traceback (most recent call last)
    <ipython-input-49-955f44867ee5> in <module>()
    1 word2vecEstimator.train(
    2 input_fn=generate_batch,
    ----> 3 steps=10)

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
    352
    353 saving_listeners = _check_listeners_type(saving_listeners)
    --> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
    355 logging.info('Loss for final step: %s.', loss)
    356 return self

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
    1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
    1206 else:
    -> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
    1208
    1209 def _train_model_default(self, input_fn, hooks, saving_listeners):

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
    1235 worker_hooks.extend(input_hooks)
    1236 estimator_spec = self._call_model_fn(
    -> 1237 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
    1238 global_step_tensor = training_util.get_global_step(g)
    1239 return self._train_with_estimator_spec(estimator_spec, worker_hooks,

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
    1193
    1194 logging.info('Calling model_fn.')
    -> 1195 model_fn_results = self._model_fn(features=features, **kwargs)
    1196 logging.info('Done calling model_fn.')
    1197

    <ipython-input-47-95d390a50046> in my_model(features, labels, mode, params)
    47
    48 sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
    ---> 49 labels=train_labels, num_sampled=64, num_classes=3096637)
    50
    51 loss = tf.reduce_mean( sSML )

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)
    1347 partition_strategy=partition_strategy,
    1348 name=name,
    -> 1349 seed=seed)
    1350 labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")
    1351 sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
    1029 with ops.name_scope(name, "compute_sampled_logits",
    1030 weights + [biases, inputs, labels]):
    -> 1031 if labels.dtype != dtypes.int64:
    1032 labels = math_ops.cast(labels, dtypes.int64)
    1033 labels_flat = array_ops.reshape(labels, [-1])

    TypeError: data type not understood


    Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.



    https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE



    And here is the full code, from the notebook.



    import math
    import numpy as np
    import random
    import zipfile
    import shutil
    from collections import namedtuple

    import os
    import pprint

    import tensorflow as tf

    import pandas as pd
    import pickle
    from numpy import genfromtxt

    !pip install -U -q PyDrive

    from google.colab import files
    from pydrive.auth import GoogleAuth
    from pydrive.drive import GoogleDrive
    from google.colab import auth
    from oauth2client.client import GoogleCredentials

    auth.authenticate_user()
    gauth = GoogleAuth()
    gauth.credentials = GoogleCredentials.get_application_default()
    drive = GoogleDrive(gauth)

    vocabulary_size = 3096637 #updated 10-25-18 3096636

    import gc



    dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18

    myDownload = drive.CreateFile({'id': dl_id})
    myDownload.GetContentFile('Data.npy')
    my_data = np.load('Data.npy')
    #os.remove('Data.npy')
    np.random.shuffle(my_data)
    print(my_data[0:15])

    data_index = 0
    epoch_index = 0
    recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models

    def generate_batch():
    global data_index, epoch_index

    features = np.ndarray(shape=(batch_size, num_inputs), dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)

    n=0
    while n < batch_size:
    if len( set(my_data[data_index, 1]) ) >= num_inputs:
    labels[n,0] = my_data[data_index, 0]
    features[n] = random.sample( set(my_data[data_index, 1]), num_inputs)
    n = n+1
    data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]
    if data_index == 0:
    epoch_index = epoch_index + 1
    print('Completed %d Epochs' % epoch_index)
    else:
    data_index = (data_index + 1) % len(my_data)
    if data_index == 0:
    epoch_index = epoch_index + 1
    print('Completed %d Epochs' % epoch_index)

    return features, labels



    def my_model( features, labels, mode, params):

    train_dataset = features
    train_labels = labels

    batch_sizeE=params["batch_size"]
    embedding_sizeE=params["embedding_size"]
    num_inputsE=params["num_inputs"]
    num_sampledE=params["num_sampled"]

    print(features)
    print(labels)

    epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
    update_epoch = tf.assign(epochCount, epochCount + 1)

    embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
    initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )

    softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
    initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
    stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )

    softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
    initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )

    embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is

    embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )

    segments= np.arange(batch_size).repeat(num_inputs)

    averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)

    print(softmax_weights )
    print(softmax_biases )

    if mode == "train":

    sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
    labels=train_labels, num_sampled=64, num_classes=3096637)

    loss = tf.reduce_mean( sSML )

    optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)

    saver = tf.train.Saver()



    word2vecEstimator = tf.estimator.Estimator(
    model_fn=my_model,
    params={
    'batch_size': 16,
    'embedding_size': 10,
    'num_inputs': 3,
    'num_sampled': 128
    })

    word2vecEstimator.train(
    input_fn=generate_batch,
    steps=10)









    share|improve this question

















    This question has an open bounty worth +50
    reputation from SantoshGupta7 ending in 6 days.


    This question has not received enough attention.


    Answer should figure out what is causing my error, and a solution. The solution should result in me being able to train my model without any error.


















      up vote
      1
      down vote

      favorite









      up vote
      1
      down vote

      favorite











      I am trying to convert a working Tensorflow graph to use Tensorflow Estimator, using a custom Estimator. My model works when I was just using a model and then running it with a session. But when I try to use it with the Estimator API, it's not working.



      This is where I defined my model



      def my_model( features, labels, mode, params):

      train_dataset = features
      train_labels = labels

      batch_sizeE=params["batch_size"]
      embedding_sizeE=params["embedding_size"]
      num_inputsE=params["num_inputs"]
      num_sampledE=params["num_sampled"]

      print(features)
      print(labels)

      epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
      update_epoch = tf.assign(epochCount, epochCount + 1)

      embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
      initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )

      softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
      initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
      stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )

      softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
      initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )

      embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is

      embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )

      segments= np.arange(batch_size).repeat(num_inputs)

      averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)

      if mode == "train":

      sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
      labels=train_labels, num_sampled=64, num_classes=3096637)

      loss = tf.reduce_mean( sSML )

      optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)

      saver = tf.train.Saver()


      This is where I call the training



      #Define the estimator
      word2vecEstimator = tf.estimator.Estimator(
      model_fn=my_model,
      params={
      'batch_size': 16,
      'embedding_size': 10,
      'num_inputs': 3,
      'num_sampled': 128
      })

      word2vecEstimator.train(
      input_fn=generate_batch,
      steps=10)


      And this is the error I get



      INFO:tensorflow:Calling model_fn.

      <tf.Variable 'softmax_weights:0' shape=(3096637, 50) dtype=float32_ref>
      <tf.Variable 'softmax_biases:0' shape=(3096637,) dtype=float32_ref>
      ---------------------------------------------------------------------------
      TypeError Traceback (most recent call last)
      <ipython-input-49-955f44867ee5> in <module>()
      1 word2vecEstimator.train(
      2 input_fn=generate_batch,
      ----> 3 steps=10)

      /usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
      352
      353 saving_listeners = _check_listeners_type(saving_listeners)
      --> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
      355 logging.info('Loss for final step: %s.', loss)
      356 return self

      /usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
      1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
      1206 else:
      -> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
      1208
      1209 def _train_model_default(self, input_fn, hooks, saving_listeners):

      /usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
      1235 worker_hooks.extend(input_hooks)
      1236 estimator_spec = self._call_model_fn(
      -> 1237 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
      1238 global_step_tensor = training_util.get_global_step(g)
      1239 return self._train_with_estimator_spec(estimator_spec, worker_hooks,

      /usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
      1193
      1194 logging.info('Calling model_fn.')
      -> 1195 model_fn_results = self._model_fn(features=features, **kwargs)
      1196 logging.info('Done calling model_fn.')
      1197

      <ipython-input-47-95d390a50046> in my_model(features, labels, mode, params)
      47
      48 sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
      ---> 49 labels=train_labels, num_sampled=64, num_classes=3096637)
      50
      51 loss = tf.reduce_mean( sSML )

      /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)
      1347 partition_strategy=partition_strategy,
      1348 name=name,
      -> 1349 seed=seed)
      1350 labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")
      1351 sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(

      /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
      1029 with ops.name_scope(name, "compute_sampled_logits",
      1030 weights + [biases, inputs, labels]):
      -> 1031 if labels.dtype != dtypes.int64:
      1032 labels = math_ops.cast(labels, dtypes.int64)
      1033 labels_flat = array_ops.reshape(labels, [-1])

      TypeError: data type not understood


      Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.



      https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE



      And here is the full code, from the notebook.



      import math
      import numpy as np
      import random
      import zipfile
      import shutil
      from collections import namedtuple

      import os
      import pprint

      import tensorflow as tf

      import pandas as pd
      import pickle
      from numpy import genfromtxt

      !pip install -U -q PyDrive

      from google.colab import files
      from pydrive.auth import GoogleAuth
      from pydrive.drive import GoogleDrive
      from google.colab import auth
      from oauth2client.client import GoogleCredentials

      auth.authenticate_user()
      gauth = GoogleAuth()
      gauth.credentials = GoogleCredentials.get_application_default()
      drive = GoogleDrive(gauth)

      vocabulary_size = 3096637 #updated 10-25-18 3096636

      import gc



      dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18

      myDownload = drive.CreateFile({'id': dl_id})
      myDownload.GetContentFile('Data.npy')
      my_data = np.load('Data.npy')
      #os.remove('Data.npy')
      np.random.shuffle(my_data)
      print(my_data[0:15])

      data_index = 0
      epoch_index = 0
      recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models

      def generate_batch():
      global data_index, epoch_index

      features = np.ndarray(shape=(batch_size, num_inputs), dtype=np.int32)
      labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)

      n=0
      while n < batch_size:
      if len( set(my_data[data_index, 1]) ) >= num_inputs:
      labels[n,0] = my_data[data_index, 0]
      features[n] = random.sample( set(my_data[data_index, 1]), num_inputs)
      n = n+1
      data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]
      if data_index == 0:
      epoch_index = epoch_index + 1
      print('Completed %d Epochs' % epoch_index)
      else:
      data_index = (data_index + 1) % len(my_data)
      if data_index == 0:
      epoch_index = epoch_index + 1
      print('Completed %d Epochs' % epoch_index)

      return features, labels



      def my_model( features, labels, mode, params):

      train_dataset = features
      train_labels = labels

      batch_sizeE=params["batch_size"]
      embedding_sizeE=params["embedding_size"]
      num_inputsE=params["num_inputs"]
      num_sampledE=params["num_sampled"]

      print(features)
      print(labels)

      epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
      update_epoch = tf.assign(epochCount, epochCount + 1)

      embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
      initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )

      softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
      initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
      stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )

      softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
      initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )

      embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is

      embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )

      segments= np.arange(batch_size).repeat(num_inputs)

      averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)

      print(softmax_weights )
      print(softmax_biases )

      if mode == "train":

      sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
      labels=train_labels, num_sampled=64, num_classes=3096637)

      loss = tf.reduce_mean( sSML )

      optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)

      saver = tf.train.Saver()



      word2vecEstimator = tf.estimator.Estimator(
      model_fn=my_model,
      params={
      'batch_size': 16,
      'embedding_size': 10,
      'num_inputs': 3,
      'num_sampled': 128
      })

      word2vecEstimator.train(
      input_fn=generate_batch,
      steps=10)









      share|improve this question















      I am trying to convert a working Tensorflow graph to use Tensorflow Estimator, using a custom Estimator. My model works when I was just using a model and then running it with a session. But when I try to use it with the Estimator API, it's not working.



      This is where I defined my model



      def my_model( features, labels, mode, params):

      train_dataset = features
      train_labels = labels

      batch_sizeE=params["batch_size"]
      embedding_sizeE=params["embedding_size"]
      num_inputsE=params["num_inputs"]
      num_sampledE=params["num_sampled"]

      print(features)
      print(labels)

      epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
      update_epoch = tf.assign(epochCount, epochCount + 1)

      embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
      initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )

      softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
      initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
      stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )

      softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
      initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )

      embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is

      embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )

      segments= np.arange(batch_size).repeat(num_inputs)

      averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)

      if mode == "train":

      sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
      labels=train_labels, num_sampled=64, num_classes=3096637)

      loss = tf.reduce_mean( sSML )

      optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)

      saver = tf.train.Saver()


      This is where I call the training



      #Define the estimator
      word2vecEstimator = tf.estimator.Estimator(
      model_fn=my_model,
      params={
      'batch_size': 16,
      'embedding_size': 10,
      'num_inputs': 3,
      'num_sampled': 128
      })

      word2vecEstimator.train(
      input_fn=generate_batch,
      steps=10)


      And this is the error I get



      INFO:tensorflow:Calling model_fn.

      <tf.Variable 'softmax_weights:0' shape=(3096637, 50) dtype=float32_ref>
      <tf.Variable 'softmax_biases:0' shape=(3096637,) dtype=float32_ref>
      ---------------------------------------------------------------------------
      TypeError Traceback (most recent call last)
      <ipython-input-49-955f44867ee5> in <module>()
      1 word2vecEstimator.train(
      2 input_fn=generate_batch,
      ----> 3 steps=10)

      /usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
      352
      353 saving_listeners = _check_listeners_type(saving_listeners)
      --> 354 loss = self._train_model(input_fn, hooks, saving_listeners)
      355 logging.info('Loss for final step: %s.', loss)
      356 return self

      /usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
      1205 return self._train_model_distributed(input_fn, hooks, saving_listeners)
      1206 else:
      -> 1207 return self._train_model_default(input_fn, hooks, saving_listeners)
      1208
      1209 def _train_model_default(self, input_fn, hooks, saving_listeners):

      /usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
      1235 worker_hooks.extend(input_hooks)
      1236 estimator_spec = self._call_model_fn(
      -> 1237 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
      1238 global_step_tensor = training_util.get_global_step(g)
      1239 return self._train_with_estimator_spec(estimator_spec, worker_hooks,

      /usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
      1193
      1194 logging.info('Calling model_fn.')
      -> 1195 model_fn_results = self._model_fn(features=features, **kwargs)
      1196 logging.info('Done calling model_fn.')
      1197

      <ipython-input-47-95d390a50046> in my_model(features, labels, mode, params)
      47
      48 sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
      ---> 49 labels=train_labels, num_sampled=64, num_classes=3096637)
      50
      51 loss = tf.reduce_mean( sSML )

      /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)
      1347 partition_strategy=partition_strategy,
      1348 name=name,
      -> 1349 seed=seed)
      1350 labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")
      1351 sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(

      /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)
      1029 with ops.name_scope(name, "compute_sampled_logits",
      1030 weights + [biases, inputs, labels]):
      -> 1031 if labels.dtype != dtypes.int64:
      1032 labels = math_ops.cast(labels, dtypes.int64)
      1033 labels_flat = array_ops.reshape(labels, [-1])

      TypeError: data type not understood


      Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.



      https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE



      And here is the full code, from the notebook.



      import math
      import numpy as np
      import random
      import zipfile
      import shutil
      from collections import namedtuple

      import os
      import pprint

      import tensorflow as tf

      import pandas as pd
      import pickle
      from numpy import genfromtxt

      !pip install -U -q PyDrive

      from google.colab import files
      from pydrive.auth import GoogleAuth
      from pydrive.drive import GoogleDrive
      from google.colab import auth
      from oauth2client.client import GoogleCredentials

      auth.authenticate_user()
      gauth = GoogleAuth()
      gauth.credentials = GoogleCredentials.get_application_default()
      drive = GoogleDrive(gauth)

      vocabulary_size = 3096637 #updated 10-25-18 3096636

      import gc



      dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18

      myDownload = drive.CreateFile({'id': dl_id})
      myDownload.GetContentFile('Data.npy')
      my_data = np.load('Data.npy')
      #os.remove('Data.npy')
      np.random.shuffle(my_data)
      print(my_data[0:15])

      data_index = 0
      epoch_index = 0
      recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models

      def generate_batch():
      global data_index, epoch_index

      features = np.ndarray(shape=(batch_size, num_inputs), dtype=np.int32)
      labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)

      n=0
      while n < batch_size:
      if len( set(my_data[data_index, 1]) ) >= num_inputs:
      labels[n,0] = my_data[data_index, 0]
      features[n] = random.sample( set(my_data[data_index, 1]), num_inputs)
      n = n+1
      data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]
      if data_index == 0:
      epoch_index = epoch_index + 1
      print('Completed %d Epochs' % epoch_index)
      else:
      data_index = (data_index + 1) % len(my_data)
      if data_index == 0:
      epoch_index = epoch_index + 1
      print('Completed %d Epochs' % epoch_index)

      return features, labels



      def my_model( features, labels, mode, params):

      train_dataset = features
      train_labels = labels

      batch_sizeE=params["batch_size"]
      embedding_sizeE=params["embedding_size"]
      num_inputsE=params["num_inputs"]
      num_sampledE=params["num_sampled"]

      print(features)
      print(labels)

      epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known
      update_epoch = tf.assign(epochCount, epochCount + 1)

      embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,
      initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )

      softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,
      initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],
      stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )

      softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,
      initializer= tf.zeros([vocabulary_size], dtype=tf.float32), trainable=False )

      embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is

      embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )

      segments= np.arange(batch_size).repeat(num_inputs)

      averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)

      print(softmax_weights )
      print(softmax_biases )

      if mode == "train":

      sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
      labels=train_labels, num_sampled=64, num_classes=3096637)

      loss = tf.reduce_mean( sSML )

      optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)

      saver = tf.train.Saver()



      word2vecEstimator = tf.estimator.Estimator(
      model_fn=my_model,
      params={
      'batch_size': 16,
      'embedding_size': 10,
      'num_inputs': 3,
      'num_sampled': 128
      })

      word2vecEstimator.train(
      input_fn=generate_batch,
      steps=10)






      python tensorflow






      share|improve this question















      share|improve this question













      share|improve this question




      share|improve this question








      edited Nov 21 at 5:40

























      asked Nov 21 at 5:17









      SantoshGupta7

      5851513




      5851513






      This question has an open bounty worth +50
      reputation from SantoshGupta7 ending in 6 days.


      This question has not received enough attention.


      Answer should figure out what is causing my error, and a solution. The solution should result in me being able to train my model without any error.








      This question has an open bounty worth +50
      reputation from SantoshGupta7 ending in 6 days.


      This question has not received enough attention.


      Answer should figure out what is causing my error, and a solution. The solution should result in me being able to train my model without any error.































          active

          oldest

          votes











          Your Answer






          StackExchange.ifUsing("editor", function () {
          StackExchange.using("externalEditor", function () {
          StackExchange.using("snippets", function () {
          StackExchange.snippets.init();
          });
          });
          }, "code-snippets");

          StackExchange.ready(function() {
          var channelOptions = {
          tags: "".split(" "),
          id: "1"
          };
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function() {
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled) {
          StackExchange.using("snippets", function() {
          createEditor();
          });
          }
          else {
          createEditor();
          }
          });

          function createEditor() {
          StackExchange.prepareEditor({
          heartbeatType: 'answer',
          convertImagesToLinks: true,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: 10,
          bindNavPrevention: true,
          postfix: "",
          imageUploader: {
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          },
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          });


          }
          });














           

          draft saved


          draft discarded


















          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53405657%2fconverting-tensorflow-graph-to-use-tensorflow-estimator-getting-typeerror-dat%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown






























          active

          oldest

          votes













          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes
















           

          draft saved


          draft discarded



















































           


          draft saved


          draft discarded














          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53405657%2fconverting-tensorflow-graph-to-use-tensorflow-estimator-getting-typeerror-dat%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          Popular posts from this blog

          Berounka

          Sphinx de Gizeh

          Different font size/position of beamer's navigation symbols template's content depending on regular/plain...