tf.data API cannot print all the batches
I am self-teaching myself about tf.data
API. I am using MNIST
dataset for binary classification. The training x and y data is zipped together in the full train_dataset. Chained along together with this zip method is first the batch()
dataset method. the data is batched with a batch size of 30. Since my training set size is 11623, with batch size 128, I will have 91 batches. The size of the last batch will be 103 which is fine since this is LSTM. Additionally, I am using drop-out. When I compute batch accuracy, I am turning off the drop-out.
The full code is given below:
#Ignore the warnings
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (8,7)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/")
Xtrain = mnist.train.images[mnist.train.labels < 2]
ytrain = mnist.train.labels[mnist.train.labels < 2]
print(Xtrain.shape)
print(ytrain.shape)
#Data parameters
num_inputs = 28
num_classes = 2
num_steps=28
# create the training dataset
Xtrain = tf.data.Dataset.from_tensor_slices(Xtrain).map(lambda x: tf.reshape(x,(num_steps, num_inputs)))
# apply a one-hot transformation to each label for use in the neural network
ytrain = tf.data.Dataset.from_tensor_slices(ytrain).map(lambda z: tf.one_hot(z, num_classes))
# zip the x and y training data together and batch and Prefetch data for faster consumption
train_dataset = tf.data.Dataset.zip((Xtrain, ytrain)).batch(128).prefetch(128)
iterator = tf.data.Iterator.from_structure(train_dataset.output_types,train_dataset.output_shapes)
X, y = iterator.get_next()
training_init_op = iterator.make_initializer(train_dataset)
#### model is here ####
#Network parameters
num_epochs = 2
batch_size = 128
output_keep_var = 0.5
with tf.Session() as sess:
init.run()
print("Initialized")
# Training cycle
for epoch in range(0, num_epochs):
num_batch = 0
print ("Epoch: ", epoch)
avg_cost = 0.
avg_accuracy =0
total_batch = int(11623 / batch_size + 1)
sess.run(training_init_op)
while True:
try:
_, miniBatchCost = sess.run([trainer, loss], feed_dict={output_keep_prob: output_keep_var})
miniBatchAccuracy = sess.run(accuracy, feed_dict={output_keep_prob: 1.0})
print('Batch %d: loss = %.2f, acc = %.2f' % (num_batch, miniBatchCost, miniBatchAccuracy * 100))
num_batch +=1
except tf.errors.OutOfRangeError:
break
When I run this code, it seems it is working and printing:
Batch 0: loss = 0.67276, acc = 0.94531
Batch 1: loss = 0.65672, acc = 0.92969
Batch 2: loss = 0.65927, acc = 0.89062
Batch 3: loss = 0.63996, acc = 0.99219
Batch 4: loss = 0.63693, acc = 0.99219
Batch 5: loss = 0.62714, acc = 0.9765
......
......
Batch 39: loss = 0.16812, acc = 0.98438
Batch 40: loss = 0.10677, acc = 0.96875
Batch 41: loss = 0.11704, acc = 0.99219
Batch 42: loss = 0.10592, acc = 0.98438
Batch 43: loss = 0.09682, acc = 0.97656
Batch 44: loss = 0.16449, acc = 1.00000
However, as one can see easily, there is something wrong. Only 45 batches are printed not 91 and I do not know why this is happening. I tried so many things and I think I am missing something out.
I can use repeat()
function but I do not want that because I have redundant observations for last batches and I want LSTM to handle it.
python tensorflow lstm tensorflow-datasets
add a comment |
I am self-teaching myself about tf.data
API. I am using MNIST
dataset for binary classification. The training x and y data is zipped together in the full train_dataset. Chained along together with this zip method is first the batch()
dataset method. the data is batched with a batch size of 30. Since my training set size is 11623, with batch size 128, I will have 91 batches. The size of the last batch will be 103 which is fine since this is LSTM. Additionally, I am using drop-out. When I compute batch accuracy, I am turning off the drop-out.
The full code is given below:
#Ignore the warnings
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (8,7)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/")
Xtrain = mnist.train.images[mnist.train.labels < 2]
ytrain = mnist.train.labels[mnist.train.labels < 2]
print(Xtrain.shape)
print(ytrain.shape)
#Data parameters
num_inputs = 28
num_classes = 2
num_steps=28
# create the training dataset
Xtrain = tf.data.Dataset.from_tensor_slices(Xtrain).map(lambda x: tf.reshape(x,(num_steps, num_inputs)))
# apply a one-hot transformation to each label for use in the neural network
ytrain = tf.data.Dataset.from_tensor_slices(ytrain).map(lambda z: tf.one_hot(z, num_classes))
# zip the x and y training data together and batch and Prefetch data for faster consumption
train_dataset = tf.data.Dataset.zip((Xtrain, ytrain)).batch(128).prefetch(128)
iterator = tf.data.Iterator.from_structure(train_dataset.output_types,train_dataset.output_shapes)
X, y = iterator.get_next()
training_init_op = iterator.make_initializer(train_dataset)
#### model is here ####
#Network parameters
num_epochs = 2
batch_size = 128
output_keep_var = 0.5
with tf.Session() as sess:
init.run()
print("Initialized")
# Training cycle
for epoch in range(0, num_epochs):
num_batch = 0
print ("Epoch: ", epoch)
avg_cost = 0.
avg_accuracy =0
total_batch = int(11623 / batch_size + 1)
sess.run(training_init_op)
while True:
try:
_, miniBatchCost = sess.run([trainer, loss], feed_dict={output_keep_prob: output_keep_var})
miniBatchAccuracy = sess.run(accuracy, feed_dict={output_keep_prob: 1.0})
print('Batch %d: loss = %.2f, acc = %.2f' % (num_batch, miniBatchCost, miniBatchAccuracy * 100))
num_batch +=1
except tf.errors.OutOfRangeError:
break
When I run this code, it seems it is working and printing:
Batch 0: loss = 0.67276, acc = 0.94531
Batch 1: loss = 0.65672, acc = 0.92969
Batch 2: loss = 0.65927, acc = 0.89062
Batch 3: loss = 0.63996, acc = 0.99219
Batch 4: loss = 0.63693, acc = 0.99219
Batch 5: loss = 0.62714, acc = 0.9765
......
......
Batch 39: loss = 0.16812, acc = 0.98438
Batch 40: loss = 0.10677, acc = 0.96875
Batch 41: loss = 0.11704, acc = 0.99219
Batch 42: loss = 0.10592, acc = 0.98438
Batch 43: loss = 0.09682, acc = 0.97656
Batch 44: loss = 0.16449, acc = 1.00000
However, as one can see easily, there is something wrong. Only 45 batches are printed not 91 and I do not know why this is happening. I tried so many things and I think I am missing something out.
I can use repeat()
function but I do not want that because I have redundant observations for last batches and I want LSTM to handle it.
python tensorflow lstm tensorflow-datasets
add a comment |
I am self-teaching myself about tf.data
API. I am using MNIST
dataset for binary classification. The training x and y data is zipped together in the full train_dataset. Chained along together with this zip method is first the batch()
dataset method. the data is batched with a batch size of 30. Since my training set size is 11623, with batch size 128, I will have 91 batches. The size of the last batch will be 103 which is fine since this is LSTM. Additionally, I am using drop-out. When I compute batch accuracy, I am turning off the drop-out.
The full code is given below:
#Ignore the warnings
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (8,7)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/")
Xtrain = mnist.train.images[mnist.train.labels < 2]
ytrain = mnist.train.labels[mnist.train.labels < 2]
print(Xtrain.shape)
print(ytrain.shape)
#Data parameters
num_inputs = 28
num_classes = 2
num_steps=28
# create the training dataset
Xtrain = tf.data.Dataset.from_tensor_slices(Xtrain).map(lambda x: tf.reshape(x,(num_steps, num_inputs)))
# apply a one-hot transformation to each label for use in the neural network
ytrain = tf.data.Dataset.from_tensor_slices(ytrain).map(lambda z: tf.one_hot(z, num_classes))
# zip the x and y training data together and batch and Prefetch data for faster consumption
train_dataset = tf.data.Dataset.zip((Xtrain, ytrain)).batch(128).prefetch(128)
iterator = tf.data.Iterator.from_structure(train_dataset.output_types,train_dataset.output_shapes)
X, y = iterator.get_next()
training_init_op = iterator.make_initializer(train_dataset)
#### model is here ####
#Network parameters
num_epochs = 2
batch_size = 128
output_keep_var = 0.5
with tf.Session() as sess:
init.run()
print("Initialized")
# Training cycle
for epoch in range(0, num_epochs):
num_batch = 0
print ("Epoch: ", epoch)
avg_cost = 0.
avg_accuracy =0
total_batch = int(11623 / batch_size + 1)
sess.run(training_init_op)
while True:
try:
_, miniBatchCost = sess.run([trainer, loss], feed_dict={output_keep_prob: output_keep_var})
miniBatchAccuracy = sess.run(accuracy, feed_dict={output_keep_prob: 1.0})
print('Batch %d: loss = %.2f, acc = %.2f' % (num_batch, miniBatchCost, miniBatchAccuracy * 100))
num_batch +=1
except tf.errors.OutOfRangeError:
break
When I run this code, it seems it is working and printing:
Batch 0: loss = 0.67276, acc = 0.94531
Batch 1: loss = 0.65672, acc = 0.92969
Batch 2: loss = 0.65927, acc = 0.89062
Batch 3: loss = 0.63996, acc = 0.99219
Batch 4: loss = 0.63693, acc = 0.99219
Batch 5: loss = 0.62714, acc = 0.9765
......
......
Batch 39: loss = 0.16812, acc = 0.98438
Batch 40: loss = 0.10677, acc = 0.96875
Batch 41: loss = 0.11704, acc = 0.99219
Batch 42: loss = 0.10592, acc = 0.98438
Batch 43: loss = 0.09682, acc = 0.97656
Batch 44: loss = 0.16449, acc = 1.00000
However, as one can see easily, there is something wrong. Only 45 batches are printed not 91 and I do not know why this is happening. I tried so many things and I think I am missing something out.
I can use repeat()
function but I do not want that because I have redundant observations for last batches and I want LSTM to handle it.
python tensorflow lstm tensorflow-datasets
I am self-teaching myself about tf.data
API. I am using MNIST
dataset for binary classification. The training x and y data is zipped together in the full train_dataset. Chained along together with this zip method is first the batch()
dataset method. the data is batched with a batch size of 30. Since my training set size is 11623, with batch size 128, I will have 91 batches. The size of the last batch will be 103 which is fine since this is LSTM. Additionally, I am using drop-out. When I compute batch accuracy, I am turning off the drop-out.
The full code is given below:
#Ignore the warnings
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (8,7)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/")
Xtrain = mnist.train.images[mnist.train.labels < 2]
ytrain = mnist.train.labels[mnist.train.labels < 2]
print(Xtrain.shape)
print(ytrain.shape)
#Data parameters
num_inputs = 28
num_classes = 2
num_steps=28
# create the training dataset
Xtrain = tf.data.Dataset.from_tensor_slices(Xtrain).map(lambda x: tf.reshape(x,(num_steps, num_inputs)))
# apply a one-hot transformation to each label for use in the neural network
ytrain = tf.data.Dataset.from_tensor_slices(ytrain).map(lambda z: tf.one_hot(z, num_classes))
# zip the x and y training data together and batch and Prefetch data for faster consumption
train_dataset = tf.data.Dataset.zip((Xtrain, ytrain)).batch(128).prefetch(128)
iterator = tf.data.Iterator.from_structure(train_dataset.output_types,train_dataset.output_shapes)
X, y = iterator.get_next()
training_init_op = iterator.make_initializer(train_dataset)
#### model is here ####
#Network parameters
num_epochs = 2
batch_size = 128
output_keep_var = 0.5
with tf.Session() as sess:
init.run()
print("Initialized")
# Training cycle
for epoch in range(0, num_epochs):
num_batch = 0
print ("Epoch: ", epoch)
avg_cost = 0.
avg_accuracy =0
total_batch = int(11623 / batch_size + 1)
sess.run(training_init_op)
while True:
try:
_, miniBatchCost = sess.run([trainer, loss], feed_dict={output_keep_prob: output_keep_var})
miniBatchAccuracy = sess.run(accuracy, feed_dict={output_keep_prob: 1.0})
print('Batch %d: loss = %.2f, acc = %.2f' % (num_batch, miniBatchCost, miniBatchAccuracy * 100))
num_batch +=1
except tf.errors.OutOfRangeError:
break
When I run this code, it seems it is working and printing:
Batch 0: loss = 0.67276, acc = 0.94531
Batch 1: loss = 0.65672, acc = 0.92969
Batch 2: loss = 0.65927, acc = 0.89062
Batch 3: loss = 0.63996, acc = 0.99219
Batch 4: loss = 0.63693, acc = 0.99219
Batch 5: loss = 0.62714, acc = 0.9765
......
......
Batch 39: loss = 0.16812, acc = 0.98438
Batch 40: loss = 0.10677, acc = 0.96875
Batch 41: loss = 0.11704, acc = 0.99219
Batch 42: loss = 0.10592, acc = 0.98438
Batch 43: loss = 0.09682, acc = 0.97656
Batch 44: loss = 0.16449, acc = 1.00000
However, as one can see easily, there is something wrong. Only 45 batches are printed not 91 and I do not know why this is happening. I tried so many things and I think I am missing something out.
I can use repeat()
function but I do not want that because I have redundant observations for last batches and I want LSTM to handle it.
python tensorflow lstm tensorflow-datasets
python tensorflow lstm tensorflow-datasets
edited Dec 5 '18 at 15:26
ARAT
asked Nov 23 '18 at 16:57
ARATARAT
3101517
3101517
add a comment |
add a comment |
1 Answer
1
active
oldest
votes
This is an annoying pitfall when defining a model based directly on the get_next()
output of a tf.data
iterator. In your loop, you have two sess.run
calls, both of which will advance the iterator by one step. This means each loop iteration actually consumes two batches (and also your loss and accuracy calculations are computed on different batches).
Not entirely sure if there is a "canonical" way of fixing this, but you could
- compute the accuracy in the same
run
call as the cost/training step. This would mean that the accuracy calculation is also affected by the dropout mask, but since it's an approximate value based on only one batch, that shouldn't be a huge issue. - define your model based on a placeholder instead, and in each loop iteration
run
theget_next
op itself, then feed the resulting numpy arrays (i.e. the batch) into the loss/accuracy computations.
Yes, actually I tried both approaches. For the second approach, I particularly did not want to use placeholders because my next step will be to usetf.data
API with a SQL query and I want to avoid using feed_dict to consume the data because the result of this SQL query is big. After I asked this question, I realized I cannot do twoses.run
calls so I put the accuracy in the samerun
call as the cost/training step, as this is the first approach you mention and it works. I think this approach is more feasible and I can calculate accuracies of each batches for a model with drop-out
– ARAT
Nov 23 '18 at 20:42
What are the other approaches though to usetf.data
API to define a model? it is either assigning output of atf.data
iterator to each arguments of a model definition OR to use thefeed_dict
to consume the output ofsess.run([X,y])
(orsess.run(next_batch)
) like given in here or here.
– ARAT
Nov 23 '18 at 20:46
1
I don't think there is any other "direct" way, since after alltf.data
iterators just return tensors and theget_next
op will advance each time it is called... One option could be to define a custom dataset that returns each element twice (ork
times, which can be given as a parameter) before advancing. This could e.g. be achieved via a datasetfrom_generator
.
– xdurch0
Nov 24 '18 at 10:31
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53450495%2ftf-data-api-cannot-print-all-the-batches%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
This is an annoying pitfall when defining a model based directly on the get_next()
output of a tf.data
iterator. In your loop, you have two sess.run
calls, both of which will advance the iterator by one step. This means each loop iteration actually consumes two batches (and also your loss and accuracy calculations are computed on different batches).
Not entirely sure if there is a "canonical" way of fixing this, but you could
- compute the accuracy in the same
run
call as the cost/training step. This would mean that the accuracy calculation is also affected by the dropout mask, but since it's an approximate value based on only one batch, that shouldn't be a huge issue. - define your model based on a placeholder instead, and in each loop iteration
run
theget_next
op itself, then feed the resulting numpy arrays (i.e. the batch) into the loss/accuracy computations.
Yes, actually I tried both approaches. For the second approach, I particularly did not want to use placeholders because my next step will be to usetf.data
API with a SQL query and I want to avoid using feed_dict to consume the data because the result of this SQL query is big. After I asked this question, I realized I cannot do twoses.run
calls so I put the accuracy in the samerun
call as the cost/training step, as this is the first approach you mention and it works. I think this approach is more feasible and I can calculate accuracies of each batches for a model with drop-out
– ARAT
Nov 23 '18 at 20:42
What are the other approaches though to usetf.data
API to define a model? it is either assigning output of atf.data
iterator to each arguments of a model definition OR to use thefeed_dict
to consume the output ofsess.run([X,y])
(orsess.run(next_batch)
) like given in here or here.
– ARAT
Nov 23 '18 at 20:46
1
I don't think there is any other "direct" way, since after alltf.data
iterators just return tensors and theget_next
op will advance each time it is called... One option could be to define a custom dataset that returns each element twice (ork
times, which can be given as a parameter) before advancing. This could e.g. be achieved via a datasetfrom_generator
.
– xdurch0
Nov 24 '18 at 10:31
add a comment |
This is an annoying pitfall when defining a model based directly on the get_next()
output of a tf.data
iterator. In your loop, you have two sess.run
calls, both of which will advance the iterator by one step. This means each loop iteration actually consumes two batches (and also your loss and accuracy calculations are computed on different batches).
Not entirely sure if there is a "canonical" way of fixing this, but you could
- compute the accuracy in the same
run
call as the cost/training step. This would mean that the accuracy calculation is also affected by the dropout mask, but since it's an approximate value based on only one batch, that shouldn't be a huge issue. - define your model based on a placeholder instead, and in each loop iteration
run
theget_next
op itself, then feed the resulting numpy arrays (i.e. the batch) into the loss/accuracy computations.
Yes, actually I tried both approaches. For the second approach, I particularly did not want to use placeholders because my next step will be to usetf.data
API with a SQL query and I want to avoid using feed_dict to consume the data because the result of this SQL query is big. After I asked this question, I realized I cannot do twoses.run
calls so I put the accuracy in the samerun
call as the cost/training step, as this is the first approach you mention and it works. I think this approach is more feasible and I can calculate accuracies of each batches for a model with drop-out
– ARAT
Nov 23 '18 at 20:42
What are the other approaches though to usetf.data
API to define a model? it is either assigning output of atf.data
iterator to each arguments of a model definition OR to use thefeed_dict
to consume the output ofsess.run([X,y])
(orsess.run(next_batch)
) like given in here or here.
– ARAT
Nov 23 '18 at 20:46
1
I don't think there is any other "direct" way, since after alltf.data
iterators just return tensors and theget_next
op will advance each time it is called... One option could be to define a custom dataset that returns each element twice (ork
times, which can be given as a parameter) before advancing. This could e.g. be achieved via a datasetfrom_generator
.
– xdurch0
Nov 24 '18 at 10:31
add a comment |
This is an annoying pitfall when defining a model based directly on the get_next()
output of a tf.data
iterator. In your loop, you have two sess.run
calls, both of which will advance the iterator by one step. This means each loop iteration actually consumes two batches (and also your loss and accuracy calculations are computed on different batches).
Not entirely sure if there is a "canonical" way of fixing this, but you could
- compute the accuracy in the same
run
call as the cost/training step. This would mean that the accuracy calculation is also affected by the dropout mask, but since it's an approximate value based on only one batch, that shouldn't be a huge issue. - define your model based on a placeholder instead, and in each loop iteration
run
theget_next
op itself, then feed the resulting numpy arrays (i.e. the batch) into the loss/accuracy computations.
This is an annoying pitfall when defining a model based directly on the get_next()
output of a tf.data
iterator. In your loop, you have two sess.run
calls, both of which will advance the iterator by one step. This means each loop iteration actually consumes two batches (and also your loss and accuracy calculations are computed on different batches).
Not entirely sure if there is a "canonical" way of fixing this, but you could
- compute the accuracy in the same
run
call as the cost/training step. This would mean that the accuracy calculation is also affected by the dropout mask, but since it's an approximate value based on only one batch, that shouldn't be a huge issue. - define your model based on a placeholder instead, and in each loop iteration
run
theget_next
op itself, then feed the resulting numpy arrays (i.e. the batch) into the loss/accuracy computations.
answered Nov 23 '18 at 20:36
xdurch0xdurch0
3,0544920
3,0544920
Yes, actually I tried both approaches. For the second approach, I particularly did not want to use placeholders because my next step will be to usetf.data
API with a SQL query and I want to avoid using feed_dict to consume the data because the result of this SQL query is big. After I asked this question, I realized I cannot do twoses.run
calls so I put the accuracy in the samerun
call as the cost/training step, as this is the first approach you mention and it works. I think this approach is more feasible and I can calculate accuracies of each batches for a model with drop-out
– ARAT
Nov 23 '18 at 20:42
What are the other approaches though to usetf.data
API to define a model? it is either assigning output of atf.data
iterator to each arguments of a model definition OR to use thefeed_dict
to consume the output ofsess.run([X,y])
(orsess.run(next_batch)
) like given in here or here.
– ARAT
Nov 23 '18 at 20:46
1
I don't think there is any other "direct" way, since after alltf.data
iterators just return tensors and theget_next
op will advance each time it is called... One option could be to define a custom dataset that returns each element twice (ork
times, which can be given as a parameter) before advancing. This could e.g. be achieved via a datasetfrom_generator
.
– xdurch0
Nov 24 '18 at 10:31
add a comment |
Yes, actually I tried both approaches. For the second approach, I particularly did not want to use placeholders because my next step will be to usetf.data
API with a SQL query and I want to avoid using feed_dict to consume the data because the result of this SQL query is big. After I asked this question, I realized I cannot do twoses.run
calls so I put the accuracy in the samerun
call as the cost/training step, as this is the first approach you mention and it works. I think this approach is more feasible and I can calculate accuracies of each batches for a model with drop-out
– ARAT
Nov 23 '18 at 20:42
What are the other approaches though to usetf.data
API to define a model? it is either assigning output of atf.data
iterator to each arguments of a model definition OR to use thefeed_dict
to consume the output ofsess.run([X,y])
(orsess.run(next_batch)
) like given in here or here.
– ARAT
Nov 23 '18 at 20:46
1
I don't think there is any other "direct" way, since after alltf.data
iterators just return tensors and theget_next
op will advance each time it is called... One option could be to define a custom dataset that returns each element twice (ork
times, which can be given as a parameter) before advancing. This could e.g. be achieved via a datasetfrom_generator
.
– xdurch0
Nov 24 '18 at 10:31
Yes, actually I tried both approaches. For the second approach, I particularly did not want to use placeholders because my next step will be to use
tf.data
API with a SQL query and I want to avoid using feed_dict to consume the data because the result of this SQL query is big. After I asked this question, I realized I cannot do two ses.run
calls so I put the accuracy in the same run
call as the cost/training step, as this is the first approach you mention and it works. I think this approach is more feasible and I can calculate accuracies of each batches for a model with drop-out– ARAT
Nov 23 '18 at 20:42
Yes, actually I tried both approaches. For the second approach, I particularly did not want to use placeholders because my next step will be to use
tf.data
API with a SQL query and I want to avoid using feed_dict to consume the data because the result of this SQL query is big. After I asked this question, I realized I cannot do two ses.run
calls so I put the accuracy in the same run
call as the cost/training step, as this is the first approach you mention and it works. I think this approach is more feasible and I can calculate accuracies of each batches for a model with drop-out– ARAT
Nov 23 '18 at 20:42
What are the other approaches though to use
tf.data
API to define a model? it is either assigning output of a tf.data
iterator to each arguments of a model definition OR to use the feed_dict
to consume the output of sess.run([X,y])
(or sess.run(next_batch)
) like given in here or here.– ARAT
Nov 23 '18 at 20:46
What are the other approaches though to use
tf.data
API to define a model? it is either assigning output of a tf.data
iterator to each arguments of a model definition OR to use the feed_dict
to consume the output of sess.run([X,y])
(or sess.run(next_batch)
) like given in here or here.– ARAT
Nov 23 '18 at 20:46
1
1
I don't think there is any other "direct" way, since after all
tf.data
iterators just return tensors and the get_next
op will advance each time it is called... One option could be to define a custom dataset that returns each element twice (or k
times, which can be given as a parameter) before advancing. This could e.g. be achieved via a dataset from_generator
.– xdurch0
Nov 24 '18 at 10:31
I don't think there is any other "direct" way, since after all
tf.data
iterators just return tensors and the get_next
op will advance each time it is called... One option could be to define a custom dataset that returns each element twice (or k
times, which can be given as a parameter) before advancing. This could e.g. be achieved via a dataset from_generator
.– xdurch0
Nov 24 '18 at 10:31
add a comment |
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53450495%2ftf-data-api-cannot-print-all-the-batches%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown