J'essaie d'exécuter un DNNClassifier avec TensorFlow sur des données de journal qui contiennent un mélange de données catégorielles et numériques. J'ai créé des colonnes de caractéristiques pour spécifier et classer/masquer les données pour TensorFlow. Lorsque j'exécute le code, je reçois l'erreur interne "Unable to get element as bytes". Note : Je n'ai pas voulu laisser tomber les valeurs Nan comme indiqué dans ce document. article donc je les ai convertis en 0 en utilisant ce code train = train.fillna(0, axis=0)
donc je ne sais pas pourquoi je reçois toujours cette erreur. Si je laisse tomber les Nan, cela fonctionne, mais je ne veux pas laisser tomber les Nan, car je pense qu'ils sont nécessaires à l'apprentissage du modèle.
def create_train_input_fn():
return tf.estimator.inputs.pandas_input_fn(
x=train,
y=train_label,
batch_size=32,
num_epochs=None,
shuffle=True)
def create_test_input_fn():
return tf.estimator.inputs.pandas_input_fn(
x=valid,
y=valid_label,
num_epochs=1,
shuffle=False)
feature_columns = []
end_time = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('end_time', 1000), 10)
feature_columns.append(end_time)
device = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device', 1000), 10)
feature_columns.append(device)
device_os = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_os', 1000), 10)
feature_columns.append(device_os)
device_os_version = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_os_version', 1000), 10)
feature_columns.append(device_os_version)
Latency = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('Latency'),
boundaries=[.000000, .000010, .000100, .001000, .010000, .100000])
feature_columns.append(Latency)
Megacycles = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('Megacycles'),
boundaries=[0, 50, 100, 200, 300])
feature_columns.append(Megacycles)
Cost = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('Cost'),
boundaries=[0.000001e-08, 1.000000e-08, 5.000000e-08, 10.000000e-08, 15.000000e-08 ])
feature_columns.append(Cost)
device_brand = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_brand', 1000), 10)
feature_columns.append(device_brand)
device_family = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('device_family', 1000), 10)
feature_columns.append(device_family)
browser_version = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('browser_version', 1000), 10)
feature_columns.append(browser_version)
app = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('app', 1000), 10)
feature_columns.append(app)
ua_parse = tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_hash_bucket('ua_parse', 1000), 10)
feature_columns.append(ua_parse)
estimator = tf.estimator.DNNClassifier(hidden_units=[256, 128, 64],
feature_columns=feature_columns,
n_classes=2,
model_dir='graphs/dnn')
train_input_fn = create_train_input_fn()
estimator.train(train_input_fn, steps=2000)
Ensuite, je reçois cette erreur :
InternalErrorTraceback (most recent call last)
<ipython-input-67-6abd6f1afc3a> in <module>()
1 train_input_fn = create_train_input_fn()
----> 2 estimator.train(train_input_fn, steps=2000)
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
312
313 saving_listeners = _check_listeners_type(saving_listeners)
--> 314 loss = self._train_model(input_fn, hooks, saving_listeners)
315 logging.info('Loss for final step: %s.', loss)
316 return self
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.pyc in _train_model(self, input_fn, hooks, saving_listeners)
813 loss = None
814 while not mon_sess.should_stop():
--> 815 _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
816 return loss
817
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in __exit__(self, exception_type, exception_value, traceback)
649 if exception_type in [errors.OutOfRangeError, StopIteration]:
650 exception_type = None
--> 651 self._close_internal(exception_type)
652 # __exit__ should return True to suppress an exception.
653 return exception_type is None
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in _close_internal(self, exception_type)
686 if self._sess is None:
687 raise RuntimeError('Session is already closed.')
--> 688 self._sess.close()
689 finally:
690 self._sess = None
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in close(self)
932 if self._sess:
933 try:
--> 934 self._sess.close()
935 except _PREEMPTION_ERRORS:
936 pass
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.pyc in close(self)
1076 self._coord.join(
1077 stop_grace_period_secs=self._stop_grace_period_secs,
-> 1078 ignore_live_threads=True)
1079 finally:
1080 try:
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/training/coordinator.pyc in join(self, threads, stop_grace_period_secs, ignore_live_threads)
385 self._registered_threads = set()
386 if self._exc_info_to_raise:
--> 387 six.reraise(*self._exc_info_to_raise)
388 elif stragglers:
389 if ignore_live_threads:
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.pyc in _run(self, sess, enqueue_op, feed_fn, coord)
92 try:
93 feed_dict = None if feed_fn is None else feed_fn()
---> 94 sess.run(enqueue_op, feed_dict=feed_dict)
95 except (errors.OutOfRangeError, errors.CancelledError):
96 # This exception indicates that a queue was closed.
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
893 try:
894 result = self._run(None, fetches, feed_dict, options_ptr,
--> 895 run_metadata_ptr)
896 if run_metadata:
897 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
1126 if final_fetches or final_targets or (handle and feed_dict_tensor):
1127 results = self._do_run(handle, final_targets, final_fetches,
-> 1128 feed_dict_tensor, options, run_metadata)
1129 else:
1130 results = []
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1342 if handle is None:
1343 return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1344 options, run_metadata)
1345 else:
1346 return self._do_call(_prun_fn, self._session, handle, feeds, fetches)
/usr/local/envs/py2env/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
1361 except KeyError:
1362 pass
-> 1363 raise type(e)(node_def, op, message)
1364
1365 def _extend_graph(self):
InternalError: Unable to get element as bytes.