import tensorflow as tf
import tensorflow.keras as keras
def create_model(optimizer="sgd"):
deep_model = keras.models.Sequential([
keras.layers.Conv2D(64, 7, activation="relu", padding="same",
input_shape=[1, 28, 28], name="input"),
keras.layers.MaxPooling2D(1,name="firstPool"),
keras.layers.Conv2D(128, 3, activation="relu", padding="same",
name="first_conv_1"),
keras.layers.Conv2D(128, 3, activation="relu", padding="same",
name="first_conv_2"),
keras.layers.MaxPooling2D(1, name="secondPool"),
keras.layers.Conv2D(256, 3, activation="relu", padding="same",
name="second_conv_1"),
keras.layers.Conv2D(256, 3, activation="relu", padding="same",
name="second_conv_2"),
keras.layers.MaxPooling2D(1, name="thirdPool"),
keras.layers.Flatten(name="flatten"),
keras.layers.Dense(128, activation="relu", name="pre-bottneck"),
keras.layers.Dropout(0.5, name="bottleneckDropout"),
keras.layers.Dense(64, activation="relu", name="bottleneck"),
keras.layers.Dropout(0.5, name="outputDropout"),
keras.layers.Dense(10, activation="softmax", name="output"),
])
deep_model.compile(loss="sparse_categorical_crossentropy",
optimizer=optimizer,
metrics=["accuracy"])
return deep_model
def fit_model(model, X_train, y_train, X_valid, y_valid, epochs):
history_conv = model.fit(X_train, y_train, validation_data=[X_valid, y_valid],
epochs=epochs, verbose=0)
return history_conv
def plot_history(history, name):
c10.plot_training(history, name, show=True)
model = create_model()
history = fit_model(model, X_train, y_train, X_valid, y_valid, epochs=10)
plot_history(history, "naive_deep_mnist")
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-26-95a7830ebd3c> in <module> 41 42 model = create_model() ---> 43 history = fit_model(model, X_train, y_train, X_valid, y_valid, epochs=10) 44 plot_history(history, "naive_deep_mnist") <ipython-input-26-95a7830ebd3c> in fit_model(model, X_train, y_train, X_valid, y_valid, epochs) 34 35 def fit_model(model, X_train, y_train, X_valid, y_valid, epochs): ---> 36 history_conv = model.fit(X_train, y_train, validation_data=[X_valid, y_valid], epochs=epochs, verbose=0) 37 return history_conv 38 /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs) 106 def _method_wrapper(self, *args, **kwargs): 107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access --> 108 return method(self, *args, **kwargs) 109 110 # Running inside `run_distribute_coordinator` already. /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing) 1096 batch_size=batch_size): 1097 callbacks.on_train_batch_begin(step) -> 1098 tmp_logs = train_function(iterator) 1099 if data_handler.should_sync: 1100 context.async_wait() /usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds) 778 else: 779 compiler = "nonXla" --> 780 result = self._call(*args, **kwds) 781 782 new_tracing_count = self._get_tracing_count() /usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds) 821 # This is the first call of __call__, so we have to initialize. 822 initializers = [] --> 823 self._initialize(args, kwds, add_initializers_to=initializers) 824 finally: 825 # At this point we know that the initialization is complete (or less /usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to) 694 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph) 695 self._concrete_stateful_fn = ( --> 696 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access 697 *args, **kwds)) 698 /usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs) 2853 args, kwargs = None, None 2854 with self._lock: -> 2855 graph_function, _, _ = self._maybe_define_function(args, kwargs) 2856 return graph_function 2857 /usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs) 3211 3212 self._function_cache.missed.add(call_context_key) -> 3213 graph_function = self._create_graph_function(args, kwargs) 3214 self._function_cache.primary[cache_key] = graph_function 3215 return graph_function, args, kwargs /usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes) 3063 arg_names = base_arg_names + missing_arg_names 3064 graph_function = ConcreteFunction( -> 3065 func_graph_module.func_graph_from_py_func( 3066 self._name, 3067 self._python_function, /usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes) 984 _, original_func = tf_decorator.unwrap(python_func) 985 --> 986 func_outputs = python_func(*func_args, **func_kwargs) 987 988 # invariant: `func_outputs` contains only Tensors, CompositeTensors, /usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds) 598 # __wrapped__ allows AutoGraph to swap in a converted function. We give 599 # the function a weak reference to itself to avoid a reference cycle. --> 600 return weak_wrapped_fn().__wrapped__(*args, **kwds) 601 weak_wrapped_fn = weakref.ref(wrapped_fn) 602 /usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs) 971 except Exception as e: # pylint:disable=broad-except 972 if hasattr(e, "ag_error_metadata"): --> 973 raise e.ag_error_metadata.to_exception(e) 974 else: 975 raise ValueError: in user code: /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py:806 train_function * return step_function(self, iterator) /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py:796 step_function ** outputs = model.distribute_strategy.run(run_step, args=(data,)) /usr/local/lib/python3.8/dist-packages/tensorflow/python/distribute/distribute_lib.py:1211 run return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs) /usr/local/lib/python3.8/dist-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica return self._call_for_each_replica(fn, args, kwargs) /usr/local/lib/python3.8/dist-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica return fn(*args, **kwargs) /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py:789 run_step ** outputs = model.train_step(data) /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py:747 train_step y_pred = self(x, training=True) /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py:975 __call__ input_spec.assert_input_compatibility(self.input_spec, inputs, /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/input_spec.py:191 assert_input_compatibility raise ValueError('Input ' + str(input_index) + ' of layer ' + ValueError: Input 0 of layer sequential_17 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: [None, 28, 28]
Don't worry if you don't understand the error. In fact, that's the whole point. The error is the entire stack trace of the Python execution, with no human-readable information on the failing component.
Lesson: Tools are meant to be helpful to humans. When the failure is longer and less understandable than the entire source, the tool is unhelpful.
The problems are two-fold:
- Instead of referring to the layer name, the stack trace points to 'sequential_17'. All the layers have names, and none of them is called sequential_17.
- Clearly the input layer is to blame: it's the only 28x28 input layer. But why are the dimensions expected to be 4? I suspect the input data needs to be reshaped. But it isn't clear what it should be reshaped to.
The right thing here is for model.compile(...) and model.fit(...) to run a pass on all the layers, validating their input parameters for common mistakes. If the inputs fail, then it should list out the layer that caused the failure. It should also validate the sizes of previous outputs and present inputs, ensuring that they conform.
This error is after many minutes of getting the correct model specified. If you want a real headache of an error, run this code without 'name=' on one of the Dropout layers.
User-facing tools should have user-facing errors.
In case you came here because you are facing the same problem with a Conv2D layer, the answer is relatively simple. For grayscale images, you still need the input_shape to be (28, 28, 1). This is because the convolution works on layers, and monochrome images have a single layer. When you do that, you also need to reshape your input to be (size, 28, 28, 1) from (size, 28, 28).
# X_train.shape = (55000,28,28)
X_train = X_train.reshape(55000,28,28,1)
# X_valid.shape = (5000,28,28)
X_valid = X_valid.reshape(5000,28,28,1)
# Creating the model
deep_model = keras.models.Sequential([
keras.layers.Conv2D(32, 4, activation="relu", padding="same",
input_shape=(28, 28, 1), name="input"),
# ... and other layers.
# Training
deep_model.fit(x=X_train, ...