summaryrefslogtreecommitdiff
path: root/libavfilter/dnn_backend_native.c
diff options
context:
space:
mode:
authorSergey Lavrushkin <dualfal@gmail.com>2018-06-14 00:37:12 +0300
committerPedro Arthur <bygrandao@gmail.com>2018-07-02 10:47:14 -0300
commit575b7189908e1cfa55104b0d2c7c9f6ea30ca2dc (patch)
tree49ba8795536d104ec1d1c8cb08772fcceb8da431 /libavfilter/dnn_backend_native.c
parentd24c9e55f64eebf67a9e488daa17332533481c20 (diff)
Adds ESPCN super resolution filter merged with SRCNN filter.
Signed-off-by: Pedro Arthur <bygrandao@gmail.com>
Diffstat (limited to 'libavfilter/dnn_backend_native.c')
-rw-r--r--libavfilter/dnn_backend_native.c283
1 files changed, 198 insertions, 85 deletions
diff --git a/libavfilter/dnn_backend_native.c b/libavfilter/dnn_backend_native.c
index 6e80dd3663..02b174a054 100644
--- a/libavfilter/dnn_backend_native.c
+++ b/libavfilter/dnn_backend_native.c
@@ -25,9 +25,12 @@
#include "dnn_backend_native.h"
#include "dnn_srcnn.h"
+#include "dnn_espcn.h"
#include "libavformat/avio.h"
-typedef enum {INPUT, CONV} LayerType;
+typedef enum {INPUT, CONV, DEPTH_TO_SPACE} LayerType;
+
+typedef enum {RELU, TANH, SIGMOID} ActivationFunc;
typedef struct Layer{
LayerType type;
@@ -37,6 +40,7 @@ typedef struct Layer{
typedef struct ConvolutionalParams{
int32_t input_num, output_num, kernel_size;
+ ActivationFunc activation;
float* kernel;
float* biases;
} ConvolutionalParams;
@@ -45,17 +49,22 @@ typedef struct InputParams{
int height, width, channels;
} InputParams;
+typedef struct DepthToSpaceParams{
+ int block_size;
+} DepthToSpaceParams;
+
// Represents simple feed-forward convolutional network.
typedef struct ConvolutionalNetwork{
Layer* layers;
int32_t layers_num;
} ConvolutionalNetwork;
-static DNNReturnType set_input_output_native(void* model, const DNNData* input, const DNNData* output)
+static DNNReturnType set_input_output_native(void* model, DNNData* input, DNNData* output)
{
ConvolutionalNetwork* network = (ConvolutionalNetwork*)model;
InputParams* input_params;
ConvolutionalParams* conv_params;
+ DepthToSpaceParams* depth_to_space_params;
int cur_width, cur_height, cur_channels;
int32_t layer;
@@ -63,11 +72,17 @@ static DNNReturnType set_input_output_native(void* model, const DNNData* input,
return DNN_ERROR;
}
else{
- network->layers[0].output = input->data;
input_params = (InputParams*)network->layers[0].params;
input_params->width = cur_width = input->width;
input_params->height = cur_height = input->height;
input_params->channels = cur_channels = input->channels;
+ if (input->data){
+ av_freep(&input->data);
+ }
+ network->layers[0].output = input->data = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
+ if (!network->layers[0].output){
+ return DNN_ERROR;
+ }
}
for (layer = 1; layer < network->layers_num; ++layer){
@@ -78,32 +93,40 @@ static DNNReturnType set_input_output_native(void* model, const DNNData* input,
return DNN_ERROR;
}
cur_channels = conv_params->output_num;
- if (layer < network->layers_num - 1){
- if (!network->layers[layer].output){
- av_freep(&network->layers[layer].output);
- }
- network->layers[layer].output = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
- if (!network->layers[layer].output){
- return DNN_ERROR;
- }
- }
- else{
- network->layers[layer].output = output->data;
- if (output->width != cur_width || output->height != cur_height || output->channels != cur_channels){
- return DNN_ERROR;
- }
+ break;
+ case DEPTH_TO_SPACE:
+ depth_to_space_params = (DepthToSpaceParams*)network->layers[layer].params;
+ if (cur_channels % (depth_to_space_params->block_size * depth_to_space_params->block_size) != 0){
+ return DNN_ERROR;
}
+ cur_channels = cur_channels / (depth_to_space_params->block_size * depth_to_space_params->block_size);
+ cur_height *= depth_to_space_params->block_size;
+ cur_width *= depth_to_space_params->block_size;
break;
default:
return DNN_ERROR;
}
+ if (network->layers[layer].output){
+ av_freep(&network->layers[layer].output);
+ }
+ network->layers[layer].output = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
+ if (!network->layers[layer].output){
+ return DNN_ERROR;
+ }
}
+ output->data = network->layers[network->layers_num - 1].output;
+ output->height = cur_height;
+ output->width = cur_width;
+ output->channels = cur_channels;
+
return DNN_SUCCESS;
}
// Loads model and its parameters that are stored in a binary file with following structure:
-// layers_num,conv_input_num,conv_output_num,conv_kernel_size,conv_kernel,conv_biases,conv_input_num...
+// layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
+// For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
+// For DEPTH_TO_SPACE layer: block_size
DNNModel* ff_dnn_load_model_native(const char* model_filename)
{
DNNModel* model = NULL;
@@ -111,7 +134,9 @@ DNNModel* ff_dnn_load_model_native(const char* model_filename)
AVIOContext* model_file_context;
int file_size, dnn_size, kernel_size, i;
int32_t layer;
+ LayerType layer_type;
ConvolutionalParams* conv_params;
+ DepthToSpaceParams* depth_to_space_params;
model = av_malloc(sizeof(DNNModel));
if (!model){
@@ -156,39 +181,62 @@ DNNModel* ff_dnn_load_model_native(const char* model_filename)
}
for (layer = 1; layer < network->layers_num; ++layer){
- conv_params = av_malloc(sizeof(ConvolutionalParams));
- if (!conv_params){
- avio_closep(&model_file_context);
- ff_dnn_free_model_native(&model);
- return NULL;
- }
- conv_params->input_num = (int32_t)avio_rl32(model_file_context);
- conv_params->output_num = (int32_t)avio_rl32(model_file_context);
- conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
- kernel_size = conv_params->input_num * conv_params->output_num *
- conv_params->kernel_size * conv_params->kernel_size;
- dnn_size += 12 + (kernel_size + conv_params->output_num << 2);
- if (dnn_size > file_size || conv_params->input_num <= 0 ||
- conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
- avio_closep(&model_file_context);
- ff_dnn_free_model_native(&model);
- return NULL;
- }
- conv_params->kernel = av_malloc(kernel_size * sizeof(float));
- conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
- if (!conv_params->kernel || !conv_params->biases){
+ layer_type = (int32_t)avio_rl32(model_file_context);
+ dnn_size += 4;
+ switch (layer_type){
+ case CONV:
+ conv_params = av_malloc(sizeof(ConvolutionalParams));
+ if (!conv_params){
+ avio_closep(&model_file_context);
+ ff_dnn_free_model_native(&model);
+ return NULL;
+ }
+ conv_params->activation = (int32_t)avio_rl32(model_file_context);
+ conv_params->input_num = (int32_t)avio_rl32(model_file_context);
+ conv_params->output_num = (int32_t)avio_rl32(model_file_context);
+ conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
+ kernel_size = conv_params->input_num * conv_params->output_num *
+ conv_params->kernel_size * conv_params->kernel_size;
+ dnn_size += 16 + (kernel_size + conv_params->output_num << 2);
+ if (dnn_size > file_size || conv_params->input_num <= 0 ||
+ conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
+ avio_closep(&model_file_context);
+ ff_dnn_free_model_native(&model);
+ return NULL;
+ }
+ conv_params->kernel = av_malloc(kernel_size * sizeof(float));
+ conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
+ if (!conv_params->kernel || !conv_params->biases){
+ avio_closep(&model_file_context);
+ ff_dnn_free_model_native(&model);
+ return NULL;
+ }
+ for (i = 0; i < kernel_size; ++i){
+ conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
+ }
+ for (i = 0; i < conv_params->output_num; ++i){
+ conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
+ }
+ network->layers[layer].type = CONV;
+ network->layers[layer].params = conv_params;
+ break;
+ case DEPTH_TO_SPACE:
+ depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams));
+ if (!depth_to_space_params){
+ avio_closep(&model_file_context);
+ ff_dnn_free_model_native(&model);
+ return NULL;
+ }
+ depth_to_space_params->block_size = (int32_t)avio_rl32(model_file_context);
+ dnn_size += 4;
+ network->layers[layer].type = DEPTH_TO_SPACE;
+ network->layers[layer].params = depth_to_space_params;
+ break;
+ default:
avio_closep(&model_file_context);
ff_dnn_free_model_native(&model);
return NULL;
}
- for (i = 0; i < kernel_size; ++i){
- conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
- }
- for (i = 0; i < conv_params->output_num; ++i){
- conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
- }
- network->layers[layer].type = CONV;
- network->layers[layer].params = conv_params;
}
avio_closep(&model_file_context);
@@ -203,7 +251,8 @@ DNNModel* ff_dnn_load_model_native(const char* model_filename)
return model;
}
-static int set_up_conv_layer(Layer* layer, const float* kernel, const float* biases, int32_t input_num, int32_t output_num, int32_t size)
+static int set_up_conv_layer(Layer* layer, const float* kernel, const float* biases, ActivationFunc activation,
+ int32_t input_num, int32_t output_num, int32_t size)
{
ConvolutionalParams* conv_params;
int kernel_size;
@@ -212,6 +261,7 @@ static int set_up_conv_layer(Layer* layer, const float* kernel, const float* bia
if (!conv_params){
return DNN_ERROR;
}
+ conv_params->activation = activation;
conv_params->input_num = input_num;
conv_params->output_num = output_num;
conv_params->kernel_size = size;
@@ -236,6 +286,7 @@ DNNModel* ff_dnn_load_default_model_native(DNNDefaultModel model_type)
{
DNNModel* model = NULL;
ConvolutionalNetwork* network = NULL;
+ DepthToSpaceParams* depth_to_space_params;
int32_t layer;
model = av_malloc(sizeof(DNNModel));
@@ -253,45 +304,68 @@ DNNModel* ff_dnn_load_default_model_native(DNNDefaultModel model_type)
switch (model_type){
case DNN_SRCNN:
network->layers_num = 4;
+ break;
+ case DNN_ESPCN:
+ network->layers_num = 5;
+ break;
+ default:
+ av_freep(&network);
+ av_freep(&model);
+ return NULL;
+ }
- network->layers = av_malloc(network->layers_num * sizeof(Layer));
- if (!network->layers){
- av_freep(&network);
- av_freep(&model);
- return NULL;
- }
+ network->layers = av_malloc(network->layers_num * sizeof(Layer));
+ if (!network->layers){
+ av_freep(&network);
+ av_freep(&model);
+ return NULL;
+ }
- for (layer = 0; layer < network->layers_num; ++layer){
- network->layers[layer].output = NULL;
- network->layers[layer].params = NULL;
+ for (layer = 0; layer < network->layers_num; ++layer){
+ network->layers[layer].output = NULL;
+ network->layers[layer].params = NULL;
+ }
+ network->layers[0].type = INPUT;
+ network->layers[0].params = av_malloc(sizeof(InputParams));
+ if (!network->layers[0].params){
+ ff_dnn_free_model_native(&model);
+ return NULL;
+ }
+
+ switch (model_type){
+ case DNN_SRCNN:
+ if (set_up_conv_layer(network->layers + 1, srcnn_conv1_kernel, srcnn_conv1_biases, RELU, 1, 64, 9) != DNN_SUCCESS ||
+ set_up_conv_layer(network->layers + 2, srcnn_conv2_kernel, srcnn_conv2_biases, RELU, 64, 32, 1) != DNN_SUCCESS ||
+ set_up_conv_layer(network->layers + 3, srcnn_conv3_kernel, srcnn_conv3_biases, RELU, 32, 1, 5) != DNN_SUCCESS){
+ ff_dnn_free_model_native(&model);
+ return NULL;
}
- network->layers[0].type = INPUT;
- network->layers[0].params = av_malloc(sizeof(InputParams));
- if (!network->layers[0].params){
+ break;
+ case DNN_ESPCN:
+ if (set_up_conv_layer(network->layers + 1, espcn_conv1_kernel, espcn_conv1_biases, TANH, 1, 64, 5) != DNN_SUCCESS ||
+ set_up_conv_layer(network->layers + 2, espcn_conv2_kernel, espcn_conv2_biases, TANH, 64, 32, 3) != DNN_SUCCESS ||
+ set_up_conv_layer(network->layers + 3, espcn_conv3_kernel, espcn_conv3_biases, SIGMOID, 32, 4, 3) != DNN_SUCCESS){
ff_dnn_free_model_native(&model);
return NULL;
}
-
- if (set_up_conv_layer(network->layers + 1, conv1_kernel, conv1_biases, 1, 64, 9) != DNN_SUCCESS ||
- set_up_conv_layer(network->layers + 2, conv2_kernel, conv2_biases, 64, 32, 1) != DNN_SUCCESS ||
- set_up_conv_layer(network->layers + 3, conv3_kernel, conv3_biases, 32, 1, 5) != DNN_SUCCESS){
+ network->layers[4].type = DEPTH_TO_SPACE;
+ depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams));
+ if (!depth_to_space_params){
ff_dnn_free_model_native(&model);
return NULL;
}
+ depth_to_space_params->block_size = 2;
+ network->layers[4].params = depth_to_space_params;
+ }
- model->set_input_output = &set_input_output_native;
+ model->set_input_output = &set_input_output_native;
- return model;
- default:
- av_freep(&network);
- av_freep(&model);
- return NULL;
- }
+ return model;
}
#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
-static void convolve(const float* input, float* output, const ConvolutionalParams* conv_params, int32_t width, int32_t height)
+static void convolve(const float* input, float* output, const ConvolutionalParams* conv_params, int width, int height)
{
int y, x, n_filter, ch, kernel_y, kernel_x;
int radius = conv_params->kernel_size >> 1;
@@ -313,19 +387,53 @@ static void convolve(const float* input, float* output, const ConvolutionalParam
}
}
}
- output[n_filter] = FFMAX(output[n_filter], 0.0);
+ switch (conv_params->activation){
+ case RELU:
+ output[n_filter] = FFMAX(output[n_filter], 0.0);
+ break;
+ case TANH:
+ output[n_filter] = 2.0f / (1.0f + exp(-2.0f * output[n_filter])) - 1.0f;
+ break;
+ case SIGMOID:
+ output[n_filter] = 1.0f / (1.0f + exp(-output[n_filter]));
+ }
}
output += conv_params->output_num;
}
}
}
+static void depth_to_space(const float* input, float* output, int block_size, int width, int height, int channels)
+{
+ int y, x, by, bx, ch;
+ int new_channels = channels / (block_size * block_size);
+ int output_linesize = width * channels;
+ int by_linesize = output_linesize / block_size;
+ int x_linesize = new_channels * block_size;
+
+ for (y = 0; y < height; ++y){
+ for (x = 0; x < width; ++x){
+ for (by = 0; by < block_size; ++by){
+ for (bx = 0; bx < block_size; ++bx){
+ for (ch = 0; ch < new_channels; ++ch){
+ output[by * by_linesize + x * x_linesize + bx * new_channels + ch] = input[ch];
+ }
+ input += new_channels;
+ }
+ }
+ }
+ output += output_linesize;
+ }
+}
+
DNNReturnType ff_dnn_execute_model_native(const DNNModel* model)
{
ConvolutionalNetwork* network = (ConvolutionalNetwork*)model->model;
- InputParams* input_params;
- int cur_width, cur_height;
+ int cur_width, cur_height, cur_channels;
int32_t layer;
+ InputParams* input_params;
+ ConvolutionalParams* conv_params;
+ DepthToSpaceParams* depth_to_space_params;
if (network->layers_num <= 0 || network->layers[0].type != INPUT || !network->layers[0].output){
return DNN_ERROR;
@@ -334,6 +442,7 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel* model)
input_params = (InputParams*)network->layers[0].params;
cur_width = input_params->width;
cur_height = input_params->height;
+ cur_channels = input_params->channels;
}
for (layer = 1; layer < network->layers_num; ++layer){
@@ -342,7 +451,17 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel* model)
}
switch (network->layers[layer].type){
case CONV:
- convolve(network->layers[layer - 1].output, network->layers[layer].output, (ConvolutionalParams*)network->layers[layer].params, cur_width, cur_height);
+ conv_params = (ConvolutionalParams*)network->layers[layer].params;
+ convolve(network->layers[layer - 1].output, network->layers[layer].output, conv_params, cur_width, cur_height);
+ cur_channels = conv_params->output_num;
+ break;
+ case DEPTH_TO_SPACE:
+ depth_to_space_params = (DepthToSpaceParams*)network->layers[layer].params;
+ depth_to_space(network->layers[layer - 1].output, network->layers[layer].output,
+ depth_to_space_params->block_size, cur_width, cur_height, cur_channels);
+ cur_height *= depth_to_space_params->block_size;
+ cur_width *= depth_to_space_params->block_size;
+ cur_channels /= depth_to_space_params->block_size * depth_to_space_params->block_size;
break;
case INPUT:
return DNN_ERROR;
@@ -362,19 +481,13 @@ void ff_dnn_free_model_native(DNNModel** model)
{
network = (ConvolutionalNetwork*)(*model)->model;
for (layer = 0; layer < network->layers_num; ++layer){
- switch (network->layers[layer].type){
- case CONV:
- if (layer < network->layers_num - 1){
- av_freep(&network->layers[layer].output);
- }
+ av_freep(&network->layers[layer].output);
+ if (network->layers[layer].type == CONV){
conv_params = (ConvolutionalParams*)network->layers[layer].params;
av_freep(&conv_params->kernel);
av_freep(&conv_params->biases);
- av_freep(&conv_params);
- break;
- case INPUT:
- av_freep(&network->layers[layer].params);
}
+ av_freep(&network->layers[layer].params);
}
av_freep(network);
av_freep(model);