
Description
def build_classifier(batch_size, num_frames, backbone, resolution, num_classes):
# --- Video input ---
video_input = layers.Input(shape=(num_frames, resolution, resolution, 3), batch_size=batch_size, name='video_input')
# Feature extraction from MoViNet backbone
def extract_video_features(x):
endpoints = backbone(x, training=False)
x = endpoints['head']
x = tf.squeeze(x, axis=[2, 3])
x = tf.keras.layers.GlobalAveragePooling1D()(x)
return x
video_features = layers.Lambda(
extract_video_features,
output_shape=(480,),
name="video_features"
)(video_input)
# --- Landmark input ---
landmark_input = layers.Input(shape=(num_frames, 234), batch_size=batch_size, name='landmark_input')
landmark_features = layers.Bidirectional(layers.LSTM(128, return_sequences=False))(landmark_input)
landmark_features = layers.Dense(128, activation='relu')(landmark_features)
# --- Fusion ---
merged = layers.Concatenate()([video_features, landmark_features]) # shape: (B, 608)
x = layers.Dense(256, activation='relu')(merged)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(num_classes, activation='softmax')(x)
model = tf.keras.Model(inputs=[video_input, landmark_input], outputs=outputs)
return model
I build this model but when I run model.fit() it shows in extract_video_features(x)
6 def extract_video_features(x):
7 endpoints = backbone(x, training=False)
----> 8 x = endpoints['head'] # shape: (B, T, 1, 1, C)
9 x = tf.squeeze(x, axis=[2, 3]) # shape: (B, T, C)
10 x = tf.keras.layers.GlobalAveragePooling1D()(x) # shape: (B, C)
TypeError: Exception encountered when calling Lambda.call().
tuple indices must be integers or slices, not str
Arguments received by Lambda.call():
• inputs=tf.Tensor(shape=(None, 50, 224, 224, 3), dtype=float32)
• mask=None
• training=True.
How can I fix this or any other ways to apply landmarks?