Converting Tensorflow Graph to use Tensorflow Estimator, getting 'TypeError: data type not understood', at...

up vote
1
down vote

favorite

I am trying to convert a working Tensorflow graph to use Tensorflow Estimator, using a custom Estimator. My model works when I was just using a model and then running it with a session. But when I try to use it with the Estimator API, it's not working.

This is where I defined my model

def my_model( features, labels, mode, params):



    train_dataset = features

    train_labels = labels



    batch_sizeE=params["batch_size"]

    embedding_sizeE=params["embedding_size"]

    num_inputsE=params["num_inputs"]

    num_sampledE=params["num_sampled"]



    print(features)

    print(labels)



    epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known

    update_epoch = tf.assign(epochCount, epochCount + 1)



    embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,

        initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )



    softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,

        initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],

                             stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )



    softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,

        initializer= tf.zeros([vocabulary_size], dtype=tf.float32),  trainable=False )



    embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is



    embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )



    segments= np.arange(batch_size).repeat(num_inputs)



    averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)



    if mode == "train":



        sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,

            labels=train_labels, num_sampled=64, num_classes=3096637)



        loss = tf.reduce_mean( sSML )



        optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) 



    saver = tf.train.Saver()

This is where I call the training

#Define the estimator

word2vecEstimator = tf.estimator.Estimator(

        model_fn=my_model,

        params={

            'batch_size': 16,

            'embedding_size': 10,

            'num_inputs': 3,

            'num_sampled': 128

        })



word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

And this is the error I get

INFO:tensorflow:Calling model_fn.



<tf.Variable 'softmax_weights:0' shape=(3096637, 50) dtype=float32_ref>

<tf.Variable 'softmax_biases:0' shape=(3096637,) dtype=float32_ref>

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-49-955f44867ee5> in <module>()

      1 word2vecEstimator.train(

      2     input_fn=generate_batch,

----> 3     steps=10)



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)

    352 

    353       saving_listeners = _check_listeners_type(saving_listeners)

--> 354       loss = self._train_model(input_fn, hooks, saving_listeners)

    355       logging.info('Loss for final step: %s.', loss)

    356       return self



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)

   1205       return self._train_model_distributed(input_fn, hooks, saving_listeners)

   1206     else:

-> 1207       return self._train_model_default(input_fn, hooks, saving_listeners)

   1208 

   1209   def _train_model_default(self, input_fn, hooks, saving_listeners):



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)

   1235       worker_hooks.extend(input_hooks)

   1236       estimator_spec = self._call_model_fn(

-> 1237           features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)

   1238       global_step_tensor = training_util.get_global_step(g)

   1239       return self._train_with_estimator_spec(estimator_spec, worker_hooks,



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)

   1193 

   1194     logging.info('Calling model_fn.')

-> 1195     model_fn_results = self._model_fn(features=features, **kwargs)

   1196     logging.info('Done calling model_fn.')

   1197 



<ipython-input-47-95d390a50046> in my_model(features, labels, mode, params)

     47 

     48         sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,

---> 49             labels=train_labels, num_sampled=64, num_classes=3096637)

     50 

     51         loss = tf.reduce_mean( sSML )



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)

   1347       partition_strategy=partition_strategy,

   1348       name=name,

-> 1349       seed=seed)

   1350   labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")

   1351   sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)

   1029   with ops.name_scope(name, "compute_sampled_logits",

   1030                       weights + [biases, inputs, labels]):

-> 1031     if labels.dtype != dtypes.int64:

   1032       labels = math_ops.cast(labels, dtypes.int64)

   1033     labels_flat = array_ops.reshape(labels, [-1])



TypeError: data type not understood

Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.

https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE

And here is the full code, from the notebook.

import math

import numpy as np

import random

import zipfile

import shutil

from collections import namedtuple



import os

import pprint



import tensorflow as tf



import pandas as pd

import pickle

from numpy import genfromtxt



!pip install -U -q PyDrive



from google.colab import files

from pydrive.auth import GoogleAuth

from pydrive.drive import GoogleDrive

from google.colab import auth

from oauth2client.client import GoogleCredentials



auth.authenticate_user()

gauth = GoogleAuth()

gauth.credentials = GoogleCredentials.get_application_default()

drive = GoogleDrive(gauth)



vocabulary_size = 3096637 #updated 10-25-18 3096636



import gc







dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18



myDownload = drive.CreateFile({'id': dl_id})

myDownload.GetContentFile('Data.npy')

my_data = np.load('Data.npy')

#os.remove('Data.npy')

np.random.shuffle(my_data)

print(my_data[0:15])



data_index = 0 

epoch_index = 0 

recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models



def generate_batch(): 

    global data_index, epoch_index



    features = np.ndarray(shape=(batch_size, num_inputs), dtype=np.int32) 

    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)



    n=0

    while n < batch_size:

      if len(    set(my_data[data_index, 1])   ) >= num_inputs:

        labels[n,0] = my_data[data_index, 0]

        features[n] = random.sample( set(my_data[data_index, 1]),  num_inputs)

        n = n+1

        data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]

        if data_index == 0:

          epoch_index = epoch_index + 1

          print('Completed %d Epochs' % epoch_index)

      else:

        data_index = (data_index + 1) % len(my_data)

        if data_index == 0:

          epoch_index = epoch_index + 1

          print('Completed %d Epochs' % epoch_index)



    return features, labels     







def my_model( features, labels, mode, params):



    train_dataset = features

    train_labels = labels



    batch_sizeE=params["batch_size"]

    embedding_sizeE=params["embedding_size"]

    num_inputsE=params["num_inputs"]

    num_sampledE=params["num_sampled"]



    print(features)

    print(labels)



    epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known

    update_epoch = tf.assign(epochCount, epochCount + 1)



    embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,

        initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )



    softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,

        initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],

                             stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )



    softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,

        initializer= tf.zeros([vocabulary_size], dtype=tf.float32),  trainable=False )



    embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is



    embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )



    segments= np.arange(batch_size).repeat(num_inputs)



    averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)



    print(softmax_weights )

    print(softmax_biases )



    if mode == "train":



        sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,

            labels=train_labels, num_sampled=64, num_classes=3096637)



        loss = tf.reduce_mean( sSML )



        optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) 



    saver = tf.train.Saver()







word2vecEstimator = tf.estimator.Estimator(

        model_fn=my_model,

        params={

            'batch_size': 16,

            'embedding_size': 10,

            'num_inputs': 3,

            'num_sampled': 128

        })



word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

edited Nov 21 at 5:40

asked Nov 21 at 5:17

SantoshGupta7

5851513

This question has an open bounty worth +50
reputation from SantoshGupta7 ending in 6 days.

This question has not received enough attention.

Answer should figure out what is causing my error, and a solution. The solution should result in me being able to train my model without any error.

add a comment |

up vote
1
down vote

favorite

This is where I defined my model

def my_model( features, labels, mode, params):



    train_dataset = features

    train_labels = labels



    batch_sizeE=params["batch_size"]

    embedding_sizeE=params["embedding_size"]

    num_inputsE=params["num_inputs"]

    num_sampledE=params["num_sampled"]



    print(features)

    print(labels)



    epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known

    update_epoch = tf.assign(epochCount, epochCount + 1)



    embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,

        initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )



    softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,

        initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],

                             stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )



    softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,

        initializer= tf.zeros([vocabulary_size], dtype=tf.float32),  trainable=False )



    embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is



    embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )



    segments= np.arange(batch_size).repeat(num_inputs)



    averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)



    if mode == "train":



        sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,

            labels=train_labels, num_sampled=64, num_classes=3096637)



        loss = tf.reduce_mean( sSML )



        optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) 



    saver = tf.train.Saver()

This is where I call the training

#Define the estimator

word2vecEstimator = tf.estimator.Estimator(

        model_fn=my_model,

        params={

            'batch_size': 16,

            'embedding_size': 10,

            'num_inputs': 3,

            'num_sampled': 128

        })



word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

And this is the error I get

INFO:tensorflow:Calling model_fn.



<tf.Variable 'softmax_weights:0' shape=(3096637, 50) dtype=float32_ref>

<tf.Variable 'softmax_biases:0' shape=(3096637,) dtype=float32_ref>

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-49-955f44867ee5> in <module>()

      1 word2vecEstimator.train(

      2     input_fn=generate_batch,

----> 3     steps=10)



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)

    352 

    353       saving_listeners = _check_listeners_type(saving_listeners)

--> 354       loss = self._train_model(input_fn, hooks, saving_listeners)

    355       logging.info('Loss for final step: %s.', loss)

    356       return self



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)

   1205       return self._train_model_distributed(input_fn, hooks, saving_listeners)

   1206     else:

-> 1207       return self._train_model_default(input_fn, hooks, saving_listeners)

   1208 

   1209   def _train_model_default(self, input_fn, hooks, saving_listeners):



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)

   1235       worker_hooks.extend(input_hooks)

   1236       estimator_spec = self._call_model_fn(

-> 1237           features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)

   1238       global_step_tensor = training_util.get_global_step(g)

   1239       return self._train_with_estimator_spec(estimator_spec, worker_hooks,



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)

   1193 

   1194     logging.info('Calling model_fn.')

-> 1195     model_fn_results = self._model_fn(features=features, **kwargs)

   1196     logging.info('Done calling model_fn.')

   1197 



<ipython-input-47-95d390a50046> in my_model(features, labels, mode, params)

     47 

     48         sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,

---> 49             labels=train_labels, num_sampled=64, num_classes=3096637)

     50 

     51         loss = tf.reduce_mean( sSML )



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)

   1347       partition_strategy=partition_strategy,

   1348       name=name,

-> 1349       seed=seed)

   1350   labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")

   1351   sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)

   1029   with ops.name_scope(name, "compute_sampled_logits",

   1030                       weights + [biases, inputs, labels]):

-> 1031     if labels.dtype != dtypes.int64:

   1032       labels = math_ops.cast(labels, dtypes.int64)

   1033     labels_flat = array_ops.reshape(labels, [-1])



TypeError: data type not understood

Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.

https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE

And here is the full code, from the notebook.

import math

import numpy as np

import random

import zipfile

import shutil

from collections import namedtuple



import os

import pprint



import tensorflow as tf



import pandas as pd

import pickle

from numpy import genfromtxt



!pip install -U -q PyDrive



from google.colab import files

from pydrive.auth import GoogleAuth

from pydrive.drive import GoogleDrive

from google.colab import auth

from oauth2client.client import GoogleCredentials



auth.authenticate_user()

gauth = GoogleAuth()

gauth.credentials = GoogleCredentials.get_application_default()

drive = GoogleDrive(gauth)



vocabulary_size = 3096637 #updated 10-25-18 3096636



import gc







dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18



myDownload = drive.CreateFile({'id': dl_id})

myDownload.GetContentFile('Data.npy')

my_data = np.load('Data.npy')

#os.remove('Data.npy')

np.random.shuffle(my_data)

print(my_data[0:15])



data_index = 0 

epoch_index = 0 

recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models



def generate_batch(): 

    global data_index, epoch_index



    features = np.ndarray(shape=(batch_size, num_inputs), dtype=np.int32) 

    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)



    n=0

    while n < batch_size:

      if len(    set(my_data[data_index, 1])   ) >= num_inputs:

        labels[n,0] = my_data[data_index, 0]

        features[n] = random.sample( set(my_data[data_index, 1]),  num_inputs)

        n = n+1

        data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]

        if data_index == 0:

          epoch_index = epoch_index + 1

          print('Completed %d Epochs' % epoch_index)

      else:

        data_index = (data_index + 1) % len(my_data)

        if data_index == 0:

          epoch_index = epoch_index + 1

          print('Completed %d Epochs' % epoch_index)



    return features, labels     







def my_model( features, labels, mode, params):



    train_dataset = features

    train_labels = labels



    batch_sizeE=params["batch_size"]

    embedding_sizeE=params["embedding_size"]

    num_inputsE=params["num_inputs"]

    num_sampledE=params["num_sampled"]



    print(features)

    print(labels)



    epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known

    update_epoch = tf.assign(epochCount, epochCount + 1)



    embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,

        initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )



    softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,

        initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],

                             stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )



    softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,

        initializer= tf.zeros([vocabulary_size], dtype=tf.float32),  trainable=False )



    embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is



    embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )



    segments= np.arange(batch_size).repeat(num_inputs)



    averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)



    print(softmax_weights )

    print(softmax_biases )



    if mode == "train":



        sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,

            labels=train_labels, num_sampled=64, num_classes=3096637)



        loss = tf.reduce_mean( sSML )



        optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) 



    saver = tf.train.Saver()







word2vecEstimator = tf.estimator.Estimator(

        model_fn=my_model,

        params={

            'batch_size': 16,

            'embedding_size': 10,

            'num_inputs': 3,

            'num_sampled': 128

        })



word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

edited Nov 21 at 5:40

asked Nov 21 at 5:17

SantoshGupta7

5851513

This question has an open bounty worth +50
reputation from SantoshGupta7 ending in 6 days.

This question has not received enough attention.

Answer should figure out what is causing my error, and a solution. The solution should result in me being able to train my model without any error.

add a comment |

up vote
1
down vote

favorite

This is where I defined my model

def my_model( features, labels, mode, params):



    train_dataset = features

    train_labels = labels



    batch_sizeE=params["batch_size"]

    embedding_sizeE=params["embedding_size"]

    num_inputsE=params["num_inputs"]

    num_sampledE=params["num_sampled"]



    print(features)

    print(labels)



    epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known

    update_epoch = tf.assign(epochCount, epochCount + 1)



    embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,

        initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )



    softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,

        initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],

                             stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )



    softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,

        initializer= tf.zeros([vocabulary_size], dtype=tf.float32),  trainable=False )



    embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is



    embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )



    segments= np.arange(batch_size).repeat(num_inputs)



    averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)



    if mode == "train":



        sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,

            labels=train_labels, num_sampled=64, num_classes=3096637)



        loss = tf.reduce_mean( sSML )



        optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) 



    saver = tf.train.Saver()

This is where I call the training

#Define the estimator

word2vecEstimator = tf.estimator.Estimator(

        model_fn=my_model,

        params={

            'batch_size': 16,

            'embedding_size': 10,

            'num_inputs': 3,

            'num_sampled': 128

        })



word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

And this is the error I get

INFO:tensorflow:Calling model_fn.



<tf.Variable 'softmax_weights:0' shape=(3096637, 50) dtype=float32_ref>

<tf.Variable 'softmax_biases:0' shape=(3096637,) dtype=float32_ref>

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-49-955f44867ee5> in <module>()

      1 word2vecEstimator.train(

      2     input_fn=generate_batch,

----> 3     steps=10)



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)

    352 

    353       saving_listeners = _check_listeners_type(saving_listeners)

--> 354       loss = self._train_model(input_fn, hooks, saving_listeners)

    355       logging.info('Loss for final step: %s.', loss)

    356       return self



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)

   1205       return self._train_model_distributed(input_fn, hooks, saving_listeners)

   1206     else:

-> 1207       return self._train_model_default(input_fn, hooks, saving_listeners)

   1208 

   1209   def _train_model_default(self, input_fn, hooks, saving_listeners):



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)

   1235       worker_hooks.extend(input_hooks)

   1236       estimator_spec = self._call_model_fn(

-> 1237           features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)

   1238       global_step_tensor = training_util.get_global_step(g)

   1239       return self._train_with_estimator_spec(estimator_spec, worker_hooks,



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)

   1193 

   1194     logging.info('Calling model_fn.')

-> 1195     model_fn_results = self._model_fn(features=features, **kwargs)

   1196     logging.info('Done calling model_fn.')

   1197 



<ipython-input-47-95d390a50046> in my_model(features, labels, mode, params)

     47 

     48         sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,

---> 49             labels=train_labels, num_sampled=64, num_classes=3096637)

     50 

     51         loss = tf.reduce_mean( sSML )



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)

   1347       partition_strategy=partition_strategy,

   1348       name=name,

-> 1349       seed=seed)

   1350   labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")

   1351   sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)

   1029   with ops.name_scope(name, "compute_sampled_logits",

   1030                       weights + [biases, inputs, labels]):

-> 1031     if labels.dtype != dtypes.int64:

   1032       labels = math_ops.cast(labels, dtypes.int64)

   1033     labels_flat = array_ops.reshape(labels, [-1])



TypeError: data type not understood

Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.

https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE

And here is the full code, from the notebook.

import math

import numpy as np

import random

import zipfile

import shutil

from collections import namedtuple



import os

import pprint



import tensorflow as tf



import pandas as pd

import pickle

from numpy import genfromtxt



!pip install -U -q PyDrive



from google.colab import files

from pydrive.auth import GoogleAuth

from pydrive.drive import GoogleDrive

from google.colab import auth

from oauth2client.client import GoogleCredentials



auth.authenticate_user()

gauth = GoogleAuth()

gauth.credentials = GoogleCredentials.get_application_default()

drive = GoogleDrive(gauth)



vocabulary_size = 3096637 #updated 10-25-18 3096636



import gc







dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18



myDownload = drive.CreateFile({'id': dl_id})

myDownload.GetContentFile('Data.npy')

my_data = np.load('Data.npy')

#os.remove('Data.npy')

np.random.shuffle(my_data)

print(my_data[0:15])



data_index = 0 

epoch_index = 0 

recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models



def generate_batch(): 

    global data_index, epoch_index



    features = np.ndarray(shape=(batch_size, num_inputs), dtype=np.int32) 

    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)



    n=0

    while n < batch_size:

      if len(    set(my_data[data_index, 1])   ) >= num_inputs:

        labels[n,0] = my_data[data_index, 0]

        features[n] = random.sample( set(my_data[data_index, 1]),  num_inputs)

        n = n+1

        data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]

        if data_index == 0:

          epoch_index = epoch_index + 1

          print('Completed %d Epochs' % epoch_index)

      else:

        data_index = (data_index + 1) % len(my_data)

        if data_index == 0:

          epoch_index = epoch_index + 1

          print('Completed %d Epochs' % epoch_index)



    return features, labels     







def my_model( features, labels, mode, params):



    train_dataset = features

    train_labels = labels



    batch_sizeE=params["batch_size"]

    embedding_sizeE=params["embedding_size"]

    num_inputsE=params["num_inputs"]

    num_sampledE=params["num_sampled"]



    print(features)

    print(labels)



    epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known

    update_epoch = tf.assign(epochCount, epochCount + 1)



    embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,

        initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )



    softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,

        initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],

                             stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )



    softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,

        initializer= tf.zeros([vocabulary_size], dtype=tf.float32),  trainable=False )



    embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is



    embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )



    segments= np.arange(batch_size).repeat(num_inputs)



    averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)



    print(softmax_weights )

    print(softmax_biases )



    if mode == "train":



        sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,

            labels=train_labels, num_sampled=64, num_classes=3096637)



        loss = tf.reduce_mean( sSML )



        optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) 



    saver = tf.train.Saver()







word2vecEstimator = tf.estimator.Estimator(

        model_fn=my_model,

        params={

            'batch_size': 16,

            'embedding_size': 10,

            'num_inputs': 3,

            'num_sampled': 128

        })



word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

edited Nov 21 at 5:40

asked Nov 21 at 5:17

SantoshGupta7

5851513

This is where I defined my model

def my_model( features, labels, mode, params):



    train_dataset = features

    train_labels = labels



    batch_sizeE=params["batch_size"]

    embedding_sizeE=params["embedding_size"]

    num_inputsE=params["num_inputs"]

    num_sampledE=params["num_sampled"]



    print(features)

    print(labels)



    epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known

    update_epoch = tf.assign(epochCount, epochCount + 1)



    embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,

        initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )



    softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,

        initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],

                             stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )



    softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,

        initializer= tf.zeros([vocabulary_size], dtype=tf.float32),  trainable=False )



    embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is



    embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )



    segments= np.arange(batch_size).repeat(num_inputs)



    averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)



    if mode == "train":



        sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,

            labels=train_labels, num_sampled=64, num_classes=3096637)



        loss = tf.reduce_mean( sSML )



        optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) 



    saver = tf.train.Saver()

This is where I call the training

#Define the estimator

word2vecEstimator = tf.estimator.Estimator(

        model_fn=my_model,

        params={

            'batch_size': 16,

            'embedding_size': 10,

            'num_inputs': 3,

            'num_sampled': 128

        })



word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

And this is the error I get

INFO:tensorflow:Calling model_fn.



<tf.Variable 'softmax_weights:0' shape=(3096637, 50) dtype=float32_ref>

<tf.Variable 'softmax_biases:0' shape=(3096637,) dtype=float32_ref>

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-49-955f44867ee5> in <module>()

      1 word2vecEstimator.train(

      2     input_fn=generate_batch,

----> 3     steps=10)



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)

    352 

    353       saving_listeners = _check_listeners_type(saving_listeners)

--> 354       loss = self._train_model(input_fn, hooks, saving_listeners)

    355       logging.info('Loss for final step: %s.', loss)

    356       return self



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)

   1205       return self._train_model_distributed(input_fn, hooks, saving_listeners)

   1206     else:

-> 1207       return self._train_model_default(input_fn, hooks, saving_listeners)

   1208 

   1209   def _train_model_default(self, input_fn, hooks, saving_listeners):



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)

   1235       worker_hooks.extend(input_hooks)

   1236       estimator_spec = self._call_model_fn(

-> 1237           features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)

   1238       global_step_tensor = training_util.get_global_step(g)

   1239       return self._train_with_estimator_spec(estimator_spec, worker_hooks,



/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)

   1193 

   1194     logging.info('Calling model_fn.')

-> 1195     model_fn_results = self._model_fn(features=features, **kwargs)

   1196     logging.info('Done calling model_fn.')

   1197 



<ipython-input-47-95d390a50046> in my_model(features, labels, mode, params)

     47 

     48         sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,

---> 49             labels=train_labels, num_sampled=64, num_classes=3096637)

     50 

     51         loss = tf.reduce_mean( sSML )



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, remove_accidental_hits, partition_strategy, name, seed)

   1347       partition_strategy=partition_strategy,

   1348       name=name,

-> 1349       seed=seed)

   1350   labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")

   1351   sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(



/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in _compute_sampled_logits(weights, biases, labels, inputs, num_sampled, num_classes, num_true, sampled_values, subtract_log_q, remove_accidental_hits, partition_strategy, name, seed)

   1029   with ops.name_scope(name, "compute_sampled_logits",

   1030                       weights + [biases, inputs, labels]):

-> 1031     if labels.dtype != dtypes.int64:

   1032       labels = math_ops.cast(labels, dtypes.int64)

   1033     labels_flat = array_ops.reshape(labels, [-1])



TypeError: data type not understood

Here is a link to the Google Colab notebook for people to run on their own. For anyone looking to execute this, this will download a data file that is ~500 mbs.

https://colab.research.google.com/drive/1LjIz04xhRi5Fsw_Q3IzoG_5KkkXI3WFE

And here is the full code, from the notebook.

import math

import numpy as np

import random

import zipfile

import shutil

from collections import namedtuple



import os

import pprint



import tensorflow as tf



import pandas as pd

import pickle

from numpy import genfromtxt



!pip install -U -q PyDrive



from google.colab import files

from pydrive.auth import GoogleAuth

from pydrive.drive import GoogleDrive

from google.colab import auth

from oauth2client.client import GoogleCredentials



auth.authenticate_user()

gauth = GoogleAuth()

gauth.credentials = GoogleCredentials.get_application_default()

drive = GoogleDrive(gauth)



vocabulary_size = 3096637 #updated 10-25-18 3096636



import gc







dl_id = '19yha9Scxq4zOdfPcw5s6L2lkYQWenApC' #updated 10-22-18



myDownload = drive.CreateFile({'id': dl_id})

myDownload.GetContentFile('Data.npy')

my_data = np.load('Data.npy')

#os.remove('Data.npy')

np.random.shuffle(my_data)

print(my_data[0:15])



data_index = 0 

epoch_index = 0 

recEpoch_indexA = 0 #Used to help keep store of the total number of epoches with the models



def generate_batch(): 

    global data_index, epoch_index



    features = np.ndarray(shape=(batch_size, num_inputs), dtype=np.int32) 

    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)



    n=0

    while n < batch_size:

      if len(    set(my_data[data_index, 1])   ) >= num_inputs:

        labels[n,0] = my_data[data_index, 0]

        features[n] = random.sample( set(my_data[data_index, 1]),  num_inputs)

        n = n+1

        data_index = (data_index + 1) % len(my_data) #may have to do something like len my_data[:]

        if data_index == 0:

          epoch_index = epoch_index + 1

          print('Completed %d Epochs' % epoch_index)

      else:

        data_index = (data_index + 1) % len(my_data)

        if data_index == 0:

          epoch_index = epoch_index + 1

          print('Completed %d Epochs' % epoch_index)



    return features, labels     







def my_model( features, labels, mode, params):



    train_dataset = features

    train_labels = labels



    batch_sizeE=params["batch_size"]

    embedding_sizeE=params["embedding_size"]

    num_inputsE=params["num_inputs"]

    num_sampledE=params["num_sampled"]



    print(features)

    print(labels)



    epochCount = tf.get_variable( 'epochCount', initializer= 0) #to store epoch count to total # of epochs are known

    update_epoch = tf.assign(epochCount, epochCount + 1)



    embeddings = tf.get_variable( 'embeddings', dtype=tf.float32,

        initializer= tf.random_uniform([vocabulary_size, embedding_sizeE], -1.0, 1.0, dtype=tf.float32) )



    softmax_weights = tf.get_variable( 'softmax_weights', dtype=tf.float32,

        initializer= tf.truncated_normal([vocabulary_size, embedding_sizeE],

                             stddev=1.0 / math.sqrt(embedding_sizeE), dtype=tf.float32 ) )



    softmax_biases = tf.get_variable('softmax_biases', dtype=tf.float32,

        initializer= tf.zeros([vocabulary_size], dtype=tf.float32),  trainable=False )



    embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is



    embed_reshaped = tf.reshape( embed, [batch_sizeE*num_inputs, embedding_sizeE] )



    segments= np.arange(batch_size).repeat(num_inputs)



    averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)



    print(softmax_weights )

    print(softmax_biases )



    if mode == "train":



        sSML = tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,

            labels=train_labels, num_sampled=64, num_classes=3096637)



        loss = tf.reduce_mean( sSML )



        optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) 



    saver = tf.train.Saver()







word2vecEstimator = tf.estimator.Estimator(

        model_fn=my_model,

        params={

            'batch_size': 16,

            'embedding_size': 10,

            'num_inputs': 3,

            'num_sampled': 128

        })



word2vecEstimator.train(

    input_fn=generate_batch,

    steps=10)

python tensorflow

edited Nov 21 at 5:40

asked Nov 21 at 5:17

SantoshGupta7

5851513

edited Nov 21 at 5:40

asked Nov 21 at 5:17

SantoshGupta7

5851513

edited Nov 21 at 5:40

asked Nov 21 at 5:17

SantoshGupta7

5851513

asked Nov 21 at 5:17

SantoshGupta7

5851513

asked Nov 21 at 5:17

SantoshGupta7

5851513

This question has an open bounty worth +50
reputation from SantoshGupta7 ending in 6 days.

This question has not received enough attention.

Answer should figure out what is causing my error, and a solution. The solution should result in me being able to train my model without any error.

This question has an open bounty worth +50
reputation from SantoshGupta7 ending in 6 days.

This question has not received enough attention.

Answer should figure out what is causing my error, and a solution. The solution should result in me being able to train my model without any error.

add a comment |

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53405657%2fconverting-tensorflow-graph-to-use-tensorflow-estimator-getting-typeerror-dat%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

active

oldest

votes

draft saved

draft discarded

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Htykuut