基于FPGA的脉动阵列(Output Stationary)

单个PE的代码

`timescale 1ns / 1ps
//
// Company: 
// Engineer: 
// 
// Create Date: 2021/02/10 19:09:28
// Design Name: 
// Module Name: systolic_pe
// Project Name: 
// Target Devices: 
// Tool Versions: 
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//


module systolic_pe
#(parameter DATA_WIDTH = 16)
(
input clk,
input [DATA_WIDTH-1:0]A_shift_in,
input [DATA_WIDTH-1:0]B_shift_in,
input clear,                       //clear,set psum=0
input calc_en,                     
output [DATA_WIDTH-1:0]A_out,
output [DATA_WIDTH-1:0]B_out,
output [2*DATA_WIDTH-1:0]Sum
    );
logic [DATA_WIDTH-1:0] A;
logic [DATA_WIDTH-1:0] B;
logic [2*DATA_WIDTH-1:0] Psum;

assign A_out=A;
assign B_out=B;
//Psum
always_ff@(posedge clk)
begin
    if(calc_en)
        if(clear)
            Psum<=A_shift_in*B_shift_in;
        else
            Psum<=Psum+A_shift_in*B_shift_in;
end
//A,B
always_ff@(posedge clk)
begin
    A<=A_shift_in;
    B<=B_shift_in;
end
//Sum
assign Sum=Psum;

endmodule

PE脉动阵列

`timescale 1ns / 1ps
//
// Company: 
// Engineer: 
// 
// Create Date: 2021/02/10 19:22:59
// Design Name: 
// Module Name: systolic_pe_array
// Project Name: 
// Target Devices: 
// Tool Versions: 
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//


module systolic_pe_array
#(parameter DATA_WIDTH = 16,
  parameter N = 4)
(
input logic clk,
input logic rst,
input [15:0]k,
input logic [DATA_WIDTH-1:0] A_in [0:N-1],
input logic [DATA_WIDTH-1:0] B_in [0:N-1],
input logic start,
output logic done,
output logic [2*DATA_WIDTH-1:0] Sum [0:N-1][0:N-1]
    );

logic [9:0]count;
logic busy;
logic busy_ff;
logic clear;
logic calc_en;

logic [DATA_WIDTH-1:0] A_shift [0:N-1][0:N];
logic [DATA_WIDTH-1:0] B_shift [0:N][0:N-1];

//count
always_ff@(posedge clk,posedge rst)
if(rst)
    count<=0;
else if(start)
    count<=0;
else if(busy)
    count<=count+1;
//busy
always_ff@(posedge clk,posedge rst)
if(rst)
    busy<=0;
else if(start)
    busy<=1;
else if(count==2*N+k-3)
    busy<=0;
//busy_ff
always_ff@(posedge clk,posedge rst)
if(rst)
    busy_ff<=0;
else
    busy_ff<=busy;
//done
assign done=(busy_ff&&~busy);
//clear
always_ff@(posedge clk,posedge rst)
if(rst)
    clear<=0;
else if(start)
    clear<=1;
else
    clear<=0;
//calc_en
assign calc_en=busy;    
//A_in
always_comb
begin
for(int i=0;i<N;i++)
    A_shift[i][0]=A_in[i];
end
//B_in
always_comb
begin
for(int j=0;j<N;j++)
    B_shift[0][j]=B_in[j];
end
//pe_array
genvar i,j;
generate
    for(i=0;i<N;i++)
    begin:pe_loop_i
        for(j=0;j<N;j++)
        begin: pe_loop_j
        systolic_pe pe_inst(
            .clk(clk),
            .A_shift_in(A_shift[i][j]),
            .B_shift_in(B_shift[i][j]),
            .clear(clear),                       //clear,set psum=0
            .calc_en(calc_en),                     
            .A_out(A_shift[i][j+1]),
            .B_out(B_shift[i+1][j]),
            .Sum(Sum[i][j])
        );
        end
    end
endgenerate

endmodule

测试平台

`timescale 1ns / 1ps
//
// Company: 
// Engineer: 
// 
// Create Date: 2021/02/10 19:46:38
// Design Name: 
// Module Name: test
// Project Name: 
// Target Devices: 
// Tool Versions: 
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//


module test;
parameter N = 4;
parameter DATA_WIDTH = 16;
parameter k = 3;

logic [9:0]count;
logic busy;
logic [DATA_WIDTH-1:0] MatrixA [0:N-1][0:k-1];
logic [DATA_WIDTH-1:0] MatrixB [0:k-1][0:N-1];
logic clk;
logic rst;
logic start;
logic done;
logic [DATA_WIDTH-1:0] A_shift_in [0:N-1];        
logic [DATA_WIDTH-1:0] B_shift_in [0:N-1];
logic [2*DATA_WIDTH-1:0] Sum [0:N-1][0:N-1];
//MatrixA and B
initial 
begin
    MatrixA='{'{1,2,3},'{4,5,6},'{7,8,9},'{10,11,12}};
    MatrixB='{'{1,2,3,4},'{5,6,7,8},'{9,10,11,12}};
    $display("A:");
    for(int i=0;i<N;i++)
       for(int j=0;j<k;j++)
       begin
           if(j==k-1)
               $write("%d\n",MatrixA[i][j]);
            else
               $write("%d,",MatrixA[i][j]);
       end
    $display("B:");
    for(int i=0;i<k;i++)
        for(int j=0;j<N;j++)
        begin
            if(j==N-1)
                $write("%d\n",MatrixB[i][j]);
            else
                $write("%d,",MatrixB[i][j]);
        end
end
//clk
initial 
begin
    clk=0;
    forever begin
        #5 clk=~clk;
    end
end
//rst
initial 
begin
    rst=1;
    #10
    rst=0;
end
//start
initial
begin
    start=0;
    #50
    start=1;
    #10
    start=0;
end
//count
always_ff@(posedge clk,posedge rst)
if(rst)
    count<=0;
else if(start)
    count<=0;
else if(busy)
    count<=count+1;
//busy
always_ff@(posedge clk,posedge rst)
if(rst)
    busy<=0;
else if(start)
    busy<=1;
else if(count==2*N+k-3)
    busy<=0;
//A_shift_in
always_comb
begin
    for(int i=0;i<N;i++)
        A_shift_in[i]=(count>=i&&count<k+i)?MatrixA[i][count-i]:0;
end
//B_shift_in
always_comb 
begin
    for(int j=0;j<N;j++)
        B_shift_in[j]=(count>=j&&count<k+j)?MatrixB[count-j][j]:0;    
end
//inst
systolic_pe_array U
(
.clk(clk),
.rst(rst),
.k(k),
.A_in(A_shift_in),
.B_in(B_shift_in),
.start(start),
.done(done),
.Sum(Sum)
    );

endmodule


版权声明:本文为qq_40268672原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。