summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavfilter/dnn/dnn_backend_native.c2
-rw-r--r--libavfilter/dnn/dnn_backend_native_layer_conv2d.c37
-rw-r--r--libavfilter/dnn/dnn_backend_native_layer_conv2d.h1
-rw-r--r--tests/dnn/dnn-layer-conv2d-test.c2
-rw-r--r--tools/python/convert_from_tensorflow.py54
-rw-r--r--tools/python/convert_header.py4
6 files changed, 82 insertions, 18 deletions
diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
index 06b010d90e..ff280b5506 100644
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -98,7 +98,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
char header_expected[] = "FFMPEGDNNNATIVE";
char *buf;
size_t size;
- int version, header_size, major_version_expected = 0;
+ int version, header_size, major_version_expected = 1;
ConvolutionalNetwork *network = NULL;
AVIOContext *model_file_context;
int file_size, dnn_size, parsed_size;
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
index 0de890217d..6ec0fa7a99 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
@@ -38,27 +38,41 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int fil
conv_params->input_num = (int32_t)avio_rl32(model_file_context);
conv_params->output_num = (int32_t)avio_rl32(model_file_context);
conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
+ conv_params->has_bias = (int32_t)avio_rl32(model_file_context);
+ dnn_size += 28;
+
kernel_size = conv_params->input_num * conv_params->output_num *
- conv_params->kernel_size * conv_params->kernel_size;
- dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
+ conv_params->kernel_size * conv_params->kernel_size;
+ dnn_size += kernel_size * 4;
+ if (conv_params->has_bias)
+ dnn_size += conv_params->output_num * 4;
+
if (dnn_size > file_size || conv_params->input_num <= 0 ||
conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
av_freep(&conv_params);
return 0;
}
+
conv_params->kernel = av_malloc(kernel_size * sizeof(float));
- conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
- if (!conv_params->kernel || !conv_params->biases){
- av_freep(&conv_params->kernel);
- av_freep(&conv_params->biases);
+ if (!conv_params->kernel) {
av_freep(&conv_params);
return 0;
}
- for (int i = 0; i < kernel_size; ++i){
+ for (int i = 0; i < kernel_size; ++i) {
conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
}
- for (int i = 0; i < conv_params->output_num; ++i){
- conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
+
+ conv_params->biases = NULL;
+ if (conv_params->has_bias) {
+ conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
+ if (!conv_params->biases){
+ av_freep(&conv_params->kernel);
+ av_freep(&conv_params);
+ return 0;
+ }
+ for (int i = 0; i < conv_params->output_num; ++i){
+ conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
+ }
}
layer->params = conv_params;
@@ -103,7 +117,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
for (int y = pad_size; y < height - pad_size; ++y) {
for (int x = pad_size; x < width - pad_size; ++x) {
for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) {
- output[n_filter] = conv_params->biases[n_filter];
+ if (conv_params->has_bias)
+ output[n_filter] = conv_params->biases[n_filter];
+ else
+ output[n_filter] = 0.f;
for (int ch = 0; ch < conv_params->input_num; ++ch) {
for (int kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y) {
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
index db90b2b6f6..bf872642dd 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
@@ -31,6 +31,7 @@ typedef struct ConvolutionalParams{
DNNActivationFunc activation;
DNNConvPaddingParam padding_method;
int32_t dilation;
+ int32_t has_bias;
float *kernel;
float *biases;
} ConvolutionalParams;
diff --git a/tests/dnn/dnn-layer-conv2d-test.c b/tests/dnn/dnn-layer-conv2d-test.c
index 9d13da37c8..2da01e5372 100644
--- a/tests/dnn/dnn-layer-conv2d-test.c
+++ b/tests/dnn/dnn-layer-conv2d-test.c
@@ -97,6 +97,7 @@ static int test_with_same_dilate(void)
float bias[2] = { -1.6574852, -0.72915393 };
params.activation = TANH;
+ params.has_bias = 1;
params.biases = bias;
params.dilation = 2;
params.input_num = 3;
@@ -196,6 +197,7 @@ static int test_with_valid(void)
float bias[2] = { -0.4773722, -0.19620377 };
params.activation = TANH;
+ params.has_bias = 1;
params.biases = bias;
params.dilation = 1;
params.input_num = 3;
diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py
index a663b34004..605158a32e 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -118,7 +118,7 @@ class TFConverter:
return knode, bnode, dnode, anode
- def dump_conv2d_to_file(self, node, f):
+ def dump_complex_conv2d_to_file(self, node, f):
assert(node.op == 'Conv2D')
self.layer_number = self.layer_number + 1
self.converted_nodes.add(node.name)
@@ -153,7 +153,8 @@ class TFConverter:
kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
kernel = np.transpose(kernel, [3, 0, 1, 2])
- np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height], dtype=np.uint32).tofile(f)
+ has_bias = 1
+ np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
kernel.tofile(f)
btensor = bnode.attr['value'].tensor
@@ -173,6 +174,41 @@ class TFConverter:
np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
+ def dump_simple_conv2d_to_file(self, node, f):
+ assert(node.op == 'Conv2D')
+ self.layer_number = self.layer_number + 1
+ self.converted_nodes.add(node.name)
+
+ node0 = self.name_node_dict[node.input[0]]
+ node1 = self.name_node_dict[node.input[1]]
+ if node0.op == 'Const':
+ knode = node0
+ input_name = node.input[1]
+ else:
+ knode = node1
+ input_name = node.input[0]
+
+ ktensor = knode.attr['value'].tensor
+ filter_height = ktensor.tensor_shape.dim[0].size
+ filter_width = ktensor.tensor_shape.dim[1].size
+ in_channels = ktensor.tensor_shape.dim[2].size
+ out_channels = ktensor.tensor_shape.dim[3].size
+ kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
+ kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
+ kernel = np.transpose(kernel, [3, 0, 1, 2])
+
+ has_bias = 0
+ dilation = 1
+ padding = node.attr['padding'].s.decode("utf-8")
+ np.array([self.op2code[node.op], dilation, self.conv_paddings[padding], self.conv_activations['None'],
+ in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
+ kernel.tofile(f)
+
+ input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
+ output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
+ np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
+
+
def dump_depth2space_to_file(self, node, f):
assert(node.op == 'DepthToSpace')
self.layer_number = self.layer_number + 1
@@ -222,10 +258,12 @@ class TFConverter:
scope_name = TFConverter.get_scope_name(node.name)
if scope_name in self.conv2d_scope_names:
if node.op == 'Conv2D':
- self.dump_conv2d_to_file(node, f)
+ self.dump_complex_conv2d_to_file(node, f)
continue
- if node.op == 'DepthToSpace':
+ if node.op == 'Conv2D':
+ self.dump_simple_conv2d_to_file(node, f)
+ elif node.op == 'DepthToSpace':
self.dump_depth2space_to_file(node, f)
elif node.op == 'MirrorPad':
self.dump_mirrorpad_to_file(node, f)
@@ -312,10 +350,16 @@ class TFConverter:
def generate_conv2d_scope_info(self):
- # conv2d is a sub block in graph, get the scope name
+ # mostly, conv2d is a sub block in graph, get the scope name
for node in self.nodes:
if node.op == 'Conv2D':
scope = TFConverter.get_scope_name(node.name)
+ # for the case tf.nn.conv2d is called directly
+ if scope == '':
+ continue
+ # for the case tf.nn.conv2d is called within a scope
+ if scope + '/kernel' not in self.name_node_dict:
+ continue
self.conv2d_scope_names.add(scope)
# get the input name to the conv2d sub block
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 3c2acd5b15..67672b2785 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -20,7 +20,7 @@
str = 'FFMPEGDNNNATIVE'
# increase major and reset minor when we have to re-convert the model file
-major = 0
+major = 1
# increase minor when we don't have to re-convert the model file
-minor = 2
+minor = 0