import tensorflow as tf
import tensorflow.keras as keras
def create_model(optimizer="sgd"):
deep_model = keras.models.Sequential([
keras.layers.Conv2D(64, 7, activation="relu", padding="same",
input_shape=[1, 28, 28], name="input"),
keras.layers.MaxPooling2D(1,name="firstPool"),
keras.layers.Conv2D(128, 3, activation="relu", padding="same",
name="first_conv_1"),
keras.layers.Conv2D(128, 3, activation="relu", padding="same",
name="first_conv_2"),
keras.layers.MaxPooling2D(1, name="secondPool"),
keras.layers.Conv2D(256, 3, activation="relu", padding="same",
name="second_conv_1"),
keras.layers.Conv2D(256, 3, activation="relu", padding="same",
name="second_conv_2"),
keras.layers.MaxPooling2D(1, name="thirdPool"),
keras.layers.Flatten(name="flatten"),
keras.layers.Dense(128, activation="relu", name="pre-bottneck"),
keras.layers.Dropout(0.5, name="bottleneckDropout"),
keras.layers.Dense(64, activation="relu", name="bottleneck"),
keras.layers.Dropout(0.5, name="outputDropout"),
keras.layers.Dense(10, activation="softmax", name="output"),
])
deep_model.compile(loss="sparse_categorical_crossentropy",
optimizer=optimizer,
metrics=["accuracy"])
return deep_model
def fit_model(model, X_train, y_train, X_valid, y_valid, epochs):
history_conv = model.fit(X_train, y_train, validation_data=[X_valid, y_valid],
epochs=epochs, verbose=0)
return history_conv
def plot_history(history, name):
c10.plot_training(history, name, show=True)
model = create_model()
history = fit_model(model, X_train, y_train, X_valid, y_valid, epochs=10)
plot_history(history, "naive_deep_mnist")
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-26-95a7830ebd3c> in <module>
41
42 model = create_model()
---> 43 history = fit_model(model, X_train, y_train, X_valid, y_valid, epochs=10)
44 plot_history(history, "naive_deep_mnist")
<ipython-input-26-95a7830ebd3c> in fit_model(model, X_train, y_train, X_valid, y_valid, epochs)
34
35 def fit_model(model, X_train, y_train, X_valid, y_valid, epochs):
---> 36 history_conv = model.fit(X_train, y_train, validation_data=[X_valid, y_valid], epochs=epochs, verbose=0)
37 return history_conv
38
/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
821 # This is the first call of __call__, so we have to initialize.
822 initializers = []
--> 823 self._initialize(args, kwds, add_initializers_to=initializers)
824 finally:
825 # At this point we know that the initialization is complete (or less
/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
694 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
695 self._concrete_stateful_fn = (
--> 696 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
697 *args, **kwds))
698
/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2853 args, kwargs = None, None
2854 with self._lock:
-> 2855 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2856 return graph_function
2857
/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3211
3212 self._function_cache.missed.add(call_context_key)
-> 3213 graph_function = self._create_graph_function(args, kwargs)
3214 self._function_cache.primary[cache_key] = graph_function
3215 return graph_function, args, kwargs
/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3063 arg_names = base_arg_names + missing_arg_names
3064 graph_function = ConcreteFunction(
-> 3065 func_graph_module.func_graph_from_py_func(
3066 self._name,
3067 self._python_function,
/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
984 _, original_func = tf_decorator.unwrap(python_func)
985
--> 986 func_outputs = python_func(*func_args, **func_kwargs)
987
988 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
598 # __wrapped__ allows AutoGraph to swap in a converted function. We give
599 # the function a weak reference to itself to avoid a reference cycle.
--> 600 return weak_wrapped_fn().__wrapped__(*args, **kwds)
601 weak_wrapped_fn = weakref.ref(wrapped_fn)
602
/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
971 except Exception as e: # pylint:disable=broad-except
972 if hasattr(e, "ag_error_metadata"):
--> 973 raise e.ag_error_metadata.to_exception(e)
974 else:
975 raise
ValueError: in user code:
/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py:806 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py:796 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.8/dist-packages/tensorflow/python/distribute/distribute_lib.py:1211 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.8/dist-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.8/dist-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py:789 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py:747 train_step
y_pred = self(x, training=True)
/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py:975 __call__
input_spec.assert_input_compatibility(self.input_spec, inputs,
/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/input_spec.py:191 assert_input_compatibility
raise ValueError('Input ' + str(input_index) + ' of layer ' +
ValueError: Input 0 of layer sequential_17 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: [None, 28, 28]
Don't worry if you don't understand the error. In fact, that's the whole point. The error is the entire stack trace of the Python execution, with no human-readable information on the failing component.
Lesson: Tools are meant to be helpful to humans. When the failure is longer and less understandable than the entire source, the tool is unhelpful.
The problems are two-fold:
- Instead of referring to the layer name, the stack trace points to 'sequential_17'. All the layers have names, and none of them is called sequential_17.
- Clearly the input layer is to blame: it's the only 28x28 input layer. But why are the dimensions expected to be 4? I suspect the input data needs to be reshaped. But it isn't clear what it should be reshaped to.
The right thing here is for model.compile(...) and model.fit(...) to run a pass on all the layers, validating their input parameters for common mistakes. If the inputs fail, then it should list out the layer that caused the failure. It should also validate the sizes of previous outputs and present inputs, ensuring that they conform.
This error is after many minutes of getting the correct model specified. If you want a real headache of an error, run this code without 'name=' on one of the Dropout layers.
User-facing tools should have user-facing errors.
In case you came here because you are facing the same problem with a Conv2D layer, the answer is relatively simple. For grayscale images, you still need the input_shape to be (28, 28, 1). This is because the convolution works on layers, and monochrome images have a single layer. When you do that, you also need to reshape your input to be (size, 28, 28, 1) from (size, 28, 28).
# X_train.shape = (55000,28,28)
X_train = X_train.reshape(55000,28,28,1)
# X_valid.shape = (5000,28,28)
X_valid = X_valid.reshape(5000,28,28,1)
# Creating the model
deep_model = keras.models.Sequential([
keras.layers.Conv2D(32, 4, activation="relu", padding="same",
input_shape=(28, 28, 1), name="input"),
# ... and other layers.
# Training
deep_model.fit(x=X_train, ...