解析网络配置文件,初始化网络参数:
parse.c/parse_network_cfg()——>parse.c/parse_network_cfg_custom()——>parse.c/read_cfg()——>list_insert()
先看几个重要的结构体:
- list.h/list结构体
typedef struct list{
int size; //链表中节点的个数
node *front; //front节点指针
node *back; //back节点指针
} list;
- list.h/node结构体
typedef struct node{
void *val; //由具体存放的数据类型决定,如sections中node->val就是section*类型,而section->options->val就是kvp*类型
struct node *next; //指向后一个节点的指针
struct node *prev; //指向前一个节点的指针
} node;
- parse.c/section结构体
typedef struct{
char *type; //层类型
list *options; //层具体参数的链表
}section;
- option_list.h/kvp结构体
typedef struct{
char *key; //例:batch_normalize=1,batch_normalize就是key,1就是val
char *val;
int used; //?暂时不知道这个变量的作用
} kvp;
现在开始走流程:
network parse_network_cfg(char *filename)
{
return parse_network_cfg_custom(filename, 0);
}
network parse_network_cfg_custom(char *filename, int batch)
{
list *sections = read_cfg(filename); //读入网络配置文件存入一个链表,链表的每个节点存放一个section结构体和指向前后
//节点的指针,section存放单个layer的类型和一个存放具体参数的链表类似于存放数据配置文件的那个链表
//看到这里请转到下一个代码段read_cfg,等会再回来~
node *n = sections->front;
if(!n) error("Config file has no sections");
network net = make_network(sections->size - 1);
net.gpu_index = gpu_index;
size_params params; //创建新的layer后,用params来更新当前layer的输入图像的w,h,c,inputs
section *s = (section *)n->val;
list *options = s->options;
if(!is_network(s)) error("First section must be [net] or [network]");
parse_net_options(options, &net);
params.h = net.h;
params.w = net.w;
params.c = net.c;
params.inputs = net.inputs;
if (batch > 0) net.batch = batch;
params.batch = net.batch; //每一层的batch不变
params.time_steps = net.time_steps;
params.net = net;
float bflops = 0;
size_t workspace_size = 0;
n = n->next; //
int count = 0;
free_section(s);
fprintf(stderr, "layer filters size input output\n"); //以这个格式输出每层的配置参数,第二种输出参数出世!!!
while(n){
params.index = count;
fprintf(stderr, "%4d ", count);
s = (section *)n->val;
options = s->options;
layer l = {0}; //创建新的layer
LAYER_TYPE lt = string_to_layer_type(s->type); //将layer_type从string转为枚举类型
if(lt == CONVOLUTIONAL){
l = parse_convolutional(options, params);
}else if(lt == LOCAL){
l = parse_local(options, params);
}else if(lt == ACTIVE){
l = parse_activation(options, params);
}else if(lt == RNN){
l = parse_rnn(options, params);
}else if(lt == GRU){
l = parse_gru(options, params);
}else if(lt == CRNN){
l = parse_crnn(options, params);
}else if(lt == CONNECTED){
l = parse_connected(options, params);
}else if(lt == CROP){
l = parse_crop(options, params);
}else if(lt == COST){
l = parse_cost(options, params);
}else if(lt == REGION){
l = parse_region(options, params);
}else if (lt == YOLO) {
l = parse_yolo(options, params);
}else if(lt == DETECTION){
l = parse_detection(options, params);
}else if(lt == SOFTMAX){
l = parse_softmax(options, params);
net.hierarchy = l.softmax_tree;
}else if(lt == NORMALIZATION){
l = parse_normalization(options, params);
}else if(lt == BATCHNORM){
l = parse_batchnorm(options, params);
}else if(lt == MAXPOOL){
l = parse_maxpool(options, params);
}else if(lt == REORG){
l = parse_reorg(options, params); }
else if (lt == REORG_OLD) {
l = parse_reorg_old(options, params);
}else if(lt == AVGPOOL){
l = parse_avgpool(options, params);
}else if(lt == ROUTE){
l = parse_route(options, params, net);
}else if (lt == UPSAMPLE) {
l = parse_upsample(options, params, net);
}else if(lt == SHORTCUT){
l = parse_shortcut(options, params, net);
}else if(lt == DROPOUT){
l = parse_dropout(options, params);
l.output = net.layers[count-1].output;
l.delta = net.layers[count-1].delta;
#ifdef GPU
l.output_gpu = net.layers[count-1].output_gpu;
l.delta_gpu = net.layers[count-1].delta_gpu;
#endif
}else{
fprintf(stderr, "Type not recognized: %s\n", s->type);
}
l.onlyforward = option_find_int_quiet(options, "onlyforward", 0);
l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
l.dontload = option_find_int_quiet(options, "dontload", 0);
l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
option_unused(options);
net.layers[count] = l;
if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
free_section(s);
n = n->next;
++count;
if(n){
//记录上一层输出图像的w,h,c,ouputs,作为新层的输入图像的w,h,c,inputs
params.h = l.out_h;
params.w = l.out_w;
params.c = l.out_c;
params.inputs = l.outputs;
}
if (l.bflops > 0) bflops += l.bflops;
}
free_list(sections);
net.outputs = get_network_output_size(net);
net.output = get_network_output(net);
printf("Total BFLOPS %5.3f \n", bflops);
if(workspace_size){
//printf("%ld\n", workspace_size);
#ifdef GPU
if(gpu_index >= 0){
net.workspace = cuda_make_array(0, workspace_size/sizeof(float) + 1);
}else {
net.workspace = calloc(1, workspace_size);
}
#else
net.workspace = calloc(1, workspace_size);
#endif
}
LAYER_TYPE lt = net.layers[net.n - 1].type;
if ((net.w % 32 != 0 || net.h % 32 != 0) && (lt == YOLO || lt == REGION || lt == DETECTION)) {
printf("\n Warning: width=%d and height=%d in cfg-file must be divisible by 32 for default networks Yolo v1/v2/v3!!! \n\n",
net.w, net.h);
}
return net;
}
read_cfg()函数和list_insert()函数完成了cfg 文件中所有layer的读入,构成了一个框架,而具体参数的填充由read_option()函数、option_insert()函数和list_insert()函数完成。
/*该函数配合cfg/yolov3.cfg进行理解,逐行读取cfg文件时,每遇到[xxx]就表示是一个新的layer,于是为
**section结构动态分配内存,存放layer的类型[xxx]和指向存放layer具体参数的list的指针,并将其作为
**一个新的node插入到sections链表中;当遇到xxx=xxxx就表示这是当前layer的具体参数,于是为kvp结构
**动态分配内存,存放参数的key和val,并将其作为node插入到section->options链表中。
*/
list *read_cfg(char *filename)
{
FILE *file = fopen(filename, "r");
if(file == 0) file_error(filename);
char *line;
int nu = 0;
list *sections = make_list();
section *current = 0;
while((line=fgetl(file)) != 0){
++ nu;
strip(line);
switch(line[0]){
case '[':
current = malloc(sizeof(section));
list_insert(sections, current); //将新的layer作为节点插入sections链表中
current->options = make_list(); //动态分配内存,用于存放层具体参数
current->type = line; //层类型,如[net]、[convolutional]、[maxpool]...
break;
case '\0':
case '#':
case ';':
free(line);
break;
default:
if(!read_option(line, current->options)){
fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line);
free(line);
}
break;
}
}
fclose(file);
return sections;
}
void list_insert(list *l, void *val)
{
node *new = malloc(sizeof(node));
new->val = val; //val是一个section指针
new->next = 0;
if(!l->back){
l->front = new;
new->prev = 0;
}else{
l->back->next = new;
new->prev = l->back;
}
l->back = new;
++l->size;
}
read_option()函数、option_insert()函数和list_insert()函数完成了层具体参数的读入。
//功能:将xxx=xxxxx,划分成key和val两部分
int read_option(char *s, list *options)
{
size_t i;
size_t len = strlen(s);
char *val = 0;
for(i = 0; i < len; ++i){
if(s[i] == '='){
s[i] = '\0';
val = s+i+1;
break;
}
}
if(i == len-1) return 0;
char *key = s;
option_insert(options, key, val);
return 1;
}
void option_insert(list *l, char *key, char *val)
{
kvp *p = malloc(sizeof(kvp));
p->key = key;
p->val = val;
p->used = 0;
list_insert(l, p);
}
最终结构如下图所示:
返回到parse_network_cfg_custom()函数继续读代码,接着的是网络具体参数的解析,流程为:
构造网络,并初始化部分参数——>解析[net]层参数——>解析[convolutional][maxpool]等层的参数
step1:构造网络,并初始化部分参数
network make_network(int n)
{
network net = {0};
net.n = n; //网络层数
net.layers = calloc(net.n, sizeof(layer)); //为layer动态分配内存
net.seen = calloc(1, sizeof(int)); //为seen动态分配内存,记录当前使用的image数量
#ifdef GPU
net.input_gpu = calloc(1, sizeof(float *));
net.truth_gpu = calloc(1, sizeof(float *));
net.input16_gpu = calloc(1, sizeof(float *));
net.output16_gpu = calloc(1, sizeof(float *));
net.max_input16_size = calloc(1, sizeof(size_t));
net.max_output16_size = calloc(1, sizeof(size_t));
#endif
return net;
}
step2:解析[net]层参数
void parse_net_options(list *options, network *net)
{
net->batch = option_find_int(options, "batch",1); //实际的batchsize大小
net->learning_rate = option_find_float(options, "learning_rate", .001);
net->momentum = option_find_float(options, "momentum", .9);
net->decay = option_find_float(options, "decay", .0001);
int subdivs = option_find_int(options, "subdivisions",1);
net->time_steps = option_find_int_quiet(options, "time_steps",1);
net->batch /= subdivs; //代码中batch的大小
net->batch *= net->time_steps;
net->subdivisions = subdivs;
net->adam = option_find_int_quiet(options, "adam", 0);
if(net->adam){
net->B1 = option_find_float(options, "B1", .9);
net->B2 = option_find_float(options, "B2", .999);
net->eps = option_find_float(options, "eps", .000001);
}
net->h = option_find_int_quiet(options, "height",0);
net->w = option_find_int_quiet(options, "width",0);
net->c = option_find_int_quiet(options, "channels",0);
net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
net->flip = option_find_int_quiet(options, "flip", 1);
net->small_object = option_find_int_quiet(options, "small_object", 0);
net->angle = option_find_float_quiet(options, "angle", 0);
net->aspect = option_find_float_quiet(options, "aspect", 1);
net->saturation = option_find_float_quiet(options, "saturation", 1);
net->exposure = option_find_float_quiet(options, "exposure", 1);
net->hue = option_find_float_quiet(options, "hue", 0);
net->power = option_find_float_quiet(options, "power", 4);
if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
char *policy_s = option_find_str(options, "policy", "constant");
net->policy = get_policy(policy_s);
net->burn_in = option_find_int_quiet(options, "burn_in", 0);
#ifdef CUDNN_HALF
net->burn_in = 0;
#endif
if(net->policy == STEP){
net->step = option_find_int(options, "step", 1);
net->scale = option_find_float(options, "scale", 1);
} else if (net->policy == STEPS){
char *l = option_find(options, "steps");
char *p = option_find(options, "scales");
if(!l || !p) error("STEPS policy must have steps and scales in cfg file");
int len = strlen(l);
int n = 1;
int i;
for(i = 0; i < len; ++i){
if (l[i] == ',') ++n;
}
int *steps = calloc(n, sizeof(int));
float *scales = calloc(n, sizeof(float));
for(i = 0; i < n; ++i){
int step = atoi(l);
float scale = atof(p);
l = strchr(l, ',')+1;
p = strchr(p, ',')+1;
steps[i] = step;
scales[i] = scale;
}
net->scales = scales; //learning_rate变化时乘以的系数
net->steps = steps; //learning_rate变化时对应的迭代次数
net->num_steps = n; //learning_rate变化的次数
} else if (net->policy == EXP){
net->gamma = option_find_float(options, "gamma", 1);
} else if (net->policy == SIG){
net->gamma = option_find_float(options, "gamma", 1);
net->step = option_find_int(options, "step", 1);
} else if (net->policy == POLY || net->policy == RANDOM){
//net->power = option_find_float(options, "power", 1);
}
net->max_batches = option_find_int(options, "max_batches", 0);
}
step3:具体网络层参数的解析,以[convolutional]层为例
convolutional_layer parse_convolutional(list *options, size_params params)
{
int n = option_find_int(options, "filters",1); //filter的个数
int size = option_find_int(options, "size",1); //filter的大小
int stride = option_find_int(options, "stride",1); //卷积的步长
int pad = option_find_int_quiet(options, "pad",0); //输入map是否全0填充,是则为1
int padding = option_find_int_quiet(options, "padding",0);
if(pad) padding = size/2; //输入map周围填充0的个数
char *activation_s = option_find_str(options, "activation", "logistic"); //激活函数类型,默认为logistic
ACTIVATION activation = get_activation(activation_s); //将string转为枚举类型,logistic->LOGISTIC
int batch,h,w,c;
h = params.h;
w = params.w;
c = params.c;
batch=params.batch;
if(!(h && w && c)) error("Layer before convolutional layer must output image.");
int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
int binary = option_find_int_quiet(options, "binary", 0);
int xnor = option_find_int_quiet(options, "xnor", 0);
convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,padding,activation, batch_normalize, binary, xnor, params.net.adam);
layer.flipped = option_find_int_quiet(options, "flipped", 0);
layer.dot = option_find_float_quiet(options, "dot", 0);
if(params.net.adam){
layer.B1 = params.net.B1;
layer.B2 = params.net.B2;
layer.eps = params.net.eps;
}
return layer;
}
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam)
{
int i;
convolutional_layer l = {0};
l.type = CONVOLUTIONAL;
l.h = h; //输入图像高度
l.w = w; //输入图像宽度
l.c = c; //输入图像通道
l.n = n; //卷积核的个数
l.binary = binary; //是否对权重进行二值化
l.xnor = xnor; //是否对权重以及输入进行二值化
l.batch = batch; //mini_batch大小
l.stride = stride; //卷积的步长
l.size = size; //卷积核的尺度
l.pad = padding; //输入图像四周填充0的个数
l.batch_normalize = batch_normalize; //是否BN
l.weights = calloc(c*n*size*size, sizeof(float)); //当前权重
l.weight_updates = calloc(c*n*size*size, sizeof(float)); //loss对权重的偏导
l.biases = calloc(n, sizeof(float)); //当前偏置
l.bias_updates = calloc(n, sizeof(float)); //loss对偏执的偏导
// float scale = 1./sqrt(size*size*c);
float scale = sqrt(2./(size*size*c));
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1); //初始化权重,随机数服从均匀分布
int out_h = convolutional_out_height(l); //计算输出图的高
int out_w = convolutional_out_width(l); //计算输出图的宽
l.out_h = out_h; //输出图高
l.out_w = out_w; //输出图宽
l.out_c = n; //输出图深度
l.outputs = l.out_h * l.out_w * l.out_c; //输出图所含元素个数
l.inputs = l.w * l.h * l.c; //输入图所含元素个数
l.output = calloc(l.batch*l.outputs, sizeof(float)); //一个batch的输出
l.delta = calloc(l.batch*l.outputs, sizeof(float)); //一个batch的输出图的灵敏度
//卷积层三种函数指针,对应三种计算:前向,反向,更新
l.forward = forward_convolutional_layer;
l.backward = backward_convolutional_layer;
l.update = update_convolutional_layer;
if(binary){
l.binary_weights = calloc(c*n*size*size, sizeof(float));
l.cweights = calloc(c*n*size*size, sizeof(char));
l.scales = calloc(n, sizeof(float));
}
if(xnor){
l.binary_weights = calloc(c*n*size*size, sizeof(float));
l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
int align = 8;
int src_align = l.out_h*l.out_w;
l.bit_align = src_align + (align - src_align % align);
}
if(batch_normalize){
l.scales = calloc(n, sizeof(float));
l.scale_updates = calloc(n, sizeof(float));
for(i = 0; i < n; ++i){
l.scales[i] = 1;
}
l.mean = calloc(n, sizeof(float));
l.variance = calloc(n, sizeof(float));
l.mean_delta = calloc(n, sizeof(float));
l.variance_delta = calloc(n, sizeof(float));
l.rolling_mean = calloc(n, sizeof(float));
l.rolling_variance = calloc(n, sizeof(float));
l.x = calloc(l.batch*l.outputs, sizeof(float));
l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
}
if(adam){
l.adam = 1;
l.m = calloc(c*n*size*size, sizeof(float));
l.v = calloc(c*n*size*size, sizeof(float));
}
#ifdef GPU
l.forward_gpu = forward_convolutional_layer_gpu;
l.backward_gpu = backward_convolutional_layer_gpu;
l.update_gpu = update_convolutional_layer_gpu;
if(gpu_index >= 0){
if (adam) {
l.m_gpu = cuda_make_array(l.m, c*n*size*size);
l.v_gpu = cuda_make_array(l.v, c*n*size*size);
}
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
#ifdef CUDNN_HALF
l.weights_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weights, c*n*size*size / 2);
l.weight_updates_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weight_updates, c*n*size*size / 2);
#endif
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
l.biases_gpu = cuda_make_array(l.biases, n);
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
if(binary){
l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size);
}
if(xnor){
l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size);
l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch);
}
if(batch_normalize){
l.mean_gpu = cuda_make_array(l.mean, n);
l.variance_gpu = cuda_make_array(l.variance, n);
l.rolling_mean_gpu = cuda_make_array(l.mean, n);
l.rolling_variance_gpu = cuda_make_array(l.variance, n);
l.mean_delta_gpu = cuda_make_array(l.mean, n);
l.variance_delta_gpu = cuda_make_array(l.variance, n);
l.scales_gpu = cuda_make_array(l.scales, n);
l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
}
#ifdef CUDNN
cudnnCreateTensorDescriptor(&l.normDstTensorDesc);
cudnnCreateTensorDescriptor(&l.normDstTensorDescF16);
cudnnCreateTensorDescriptor(&l.normTensorDesc);
cudnnCreateTensorDescriptor(&l.srcTensorDesc);
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
cudnnCreateFilterDescriptor(&l.weightDesc);
cudnnCreateTensorDescriptor(&l.dsrcTensorDesc);
cudnnCreateTensorDescriptor(&l.ddstTensorDesc);
cudnnCreateFilterDescriptor(&l.dweightDesc);
cudnnCreateConvolutionDescriptor(&l.convDesc);
cudnn_convolutional_setup(&l, cudnn_fastest);
#endif
}
#endif
l.workspace_size = get_workspace_size(l);
l.activation = activation;
//fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
if (l.xnor) fprintf(stderr, "convX ");
else fprintf(stderr, "conv ");
fprintf(stderr, "%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops);
return l;
}
版权声明:本文为sinat_35862942原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。