FPUコアを使ってみる
こちらのFPUコアを試してみる。
GitHubリポジトリ
ドキュメント
インターフェース
input clk
input rmode (Rounding Mode)
input fpu_op (Floating Point Operation Select)
input opa (Operand A)
input opb (Operand B)
input out (Resul Output)
output snan (opaとopbのどちらかがSNANならアサート)
output qnan (outがQNANならアサート)
output inf (outが無限大ならアサート)
output ine (outが不正確(なんだそれ?))ならアサート)
output overflow (必要になったら調べる)
output underflow (必要になったら調べる)
output div_by_zero (fpu_opが割り算で、opbが0の場合アサート)
output zero (outが0ならアサート)
FPUオペレーション(fpu_op)
fpu_op (Floating Point Operation Select)
0 Add
1 Subtract
2 Multiply
3 Divide
4 Int to float conversion
5 Float to int conversion
6 Remainder (未実装)
7 予約
丸めモード(rmode)
rmode (Rounding Mode)
0 Round to nearest even
1 Round to zero(切り捨て)
2 Round to +INF(正の無限大方向へ丸め・Round up)
3 Round to -INF(負の無限大方向へ丸め・Round down)
動かしてみる
$ cd ~/src
$ git clone git@github.com:freecores/fpu.git
$ cd fpu/
fpu/verilog/ 以下のデバッグ用のコードを手作業で削除する(詳細は後述)
加算だけ試してみる
$ iverilog -g 2012 -s fpu_test fpu_test.sv ./verilog/* && ./a.out
浮動小数点数のバイナリ表現はこちらを参考に
波形をみると良い感じに動いてそう
https://gyazo.com/f4e9ec26499669737c8fb5ce45ca7917
気になる点
clkの立ち上がりから1ns遅れさせてるのは何か意図があるのだろうか?
code:fpu_test.sv
`timescale 1ns/1ps
module fpu_test();
reg clk;
wire snan, qnan, inf, ine, overflow, underflow, div_by_zero, zero;
fpu dut(
.clk(clk),
.rmode(rmode),
.fpu_op(fpu_op),
.opa(a),
.opb(b),
.out(out),
.snan(snan),
.qnan(qnan),
.inf(inf),
.ine(ine),
.overflow(overflow),
.underflow(underflow),
.div_by_zero(div_by_zero),
.zero(zero)
);
initial begin
clk = 0;
$dumpfile("fpu_test.vcd");
$dumpvars(1, dut);
rmode = 2'b00;
// 0.0 + 0.0 = 0.0
fpu_op = 3'b000;
a = 32'b0_00000000_00000000000000000000000;
b = 32'b0_00000000_00000000000000000000000;
assert(out == 32'h00000000) $display("PASSED"); else $display("FAILED");
// 1.0 + 1.0 = 2.0
fpu_op = 3'b000;
a = 32'b0_01111111_00000000000000000000000;
b = 32'b0_01111111_00000000000000000000000;
assert(out == 32'b0_10000000_00000000000000000000000) $display("PASSED"); else $display("FAILED");
// 1.0 - 1.0 = 0.0
fpu_op = 3'b001;
a = 32'b0_01111111_00000000000000000000000;
b = 32'b0_01111111_00000000000000000000000;
assert(out == 32'b0_00000000_0000000000000000000000) $display("PASSED"); else $display("FAILED");
// 1.0 * 1.0 = 1.0
fpu_op = 3'b010;
a = 32'b0_01111111_00000000000000000000000;
b = 32'b0_01111111_00000000000000000000000;
assert(out == 32'b0_01111111_00000000000000000000000) $display("PASSED"); else $display("FAILED");
// 2.0 * 2.0 = 4.0
fpu_op = 3'b010;
a = 32'b0_10000000_00000000000000000000000;
b = 32'b0_10000000_00000000000000000000000;
assert(out == 32'b0_10000001_00000000000000000000000) $display("PASSED"); else $display("FAILED");
// // 2.0 * 1.0 = 2.0
// fpu_op = 3'd2;
// a = 32'b0_10000000_0000000000000000000000;
// b = 32'b0_0111111_00000000000000000000000;
// assert(out == 32'b0_10000000_0000000000000000000000) $display("PASSED"); else $display("FAILED");
// 3F80000...
// 0011_1111_100000000
// 0_01111111_0000000000000000
end
// 5nsごとにclkを反転することで100MHzのクロックを生成
clk <= ~clk;
endmodule
デバッグ用のソースを削除
このFPUコアではデバッグ用のソースがsynopsys translate_off と synopsys translate_onの間に記述されており、XilinxのVerilog処理系などでは合成時はそれを無視するようになっている。しかしIcarus Verilogは synopsys translate_off と synopsys translate_on をサポートしていないため、事前に以下のソースファイル中のsynopsys translate_off と synopsys translate_onに挟まれたコードを全て削除しておく必要がある。
fpu/verilog/fpu.v
fpu/verilog/.post_normv
code:diff
diff --git a/verilog/fpu.v b/verilog/fpu.v
index 165a1d2..aa89590 100644
--- a/verilog/fpu.v
+++ b/verilog/fpu.v
@@ -475,46 +475,6 @@ always @(posedge clk)
always @(posedge clk)
-// synopsys translate_off
-wire mul_uf_del;
-wire uf2_del, ufb2_del, ufc2_del, underflow_d_del;
-wire co_del;
-wire ov_fasu_del, ov_fmul_del;
-
-delay1 #0 ud000(clk, underflow_fmul1, mul_uf_del); -delay1 #0 ud001(clk, underflow_fmul_r0, uf2_del); -delay1 #0 ud002(clk, underflow_fmul_r1, ufb2_del); -delay1 #0 ud003(clk, underflow_d, underflow_d_del); -delay1 #0 ud004(clk, test.u0.u4.exp_out1_co, co_del); -delay1 #0 ud005(clk, underflow_fmul_r2, ufc2_del); -delay1 #30 ud006(clk, out_d, out_d_del); -
-delay1 #0 ud007(clk, overflow_fasu, ov_fasu_del); -delay1 #0 ud008(clk, overflow_fmul, ov_fmul_del); -
-delay1 #2 ud009(clk, fpu_op_r3, fop); -
-delay3 #4 ud010(clk, div_opa_ldz_d, ldza_del); -
-delay1 #49 ud012(clk, quo, quo_del); -
-always @(test.error_event)
- begin
- $display("muf: %b uf0: %b uf1: %b uf2: %b, tx0: %b, co: %b, out_d: %h (%h %h), ov_fasu: %b, ov_fmul: %b, fop: %h",
- mul_uf_del, uf2_del, ufb2_del, ufc2_del, underflow_d_del, co_del, out_d_del, out_d_del30:23, out_d_del22:0, - ov_fasu_del, ov_fmul_del, fop );
- $display("ldza: %h, quo: %b",
- ldza_del, quo_del);
- end
-// synopsys translate_on
-
-
-
// Status Outputs
always @(posedge clk)
qnan <= #1 fpu_op_r32 ? 0 : ( diff --git a/verilog/post_norm.v b/verilog/post_norm.v
index ff9cf6f..1da4183 100644
--- a/verilog/post_norm.v
+++ b/verilog/post_norm.v
@@ -533,144 +533,4 @@ assign ine = op_f2i ? f2i_ine :
op_i2f ? (|fract_trunc) :
((r & !dn) | (s & !dn) | max_num | (op_div & !rem_00));
-// ---------------------------------------------------------------------
-// Debugging Stuff
-
-// synopsys translate_off
-
-wire 26:0 fracta_del, fractb_del; -wire dn_del;
-wire 22:0 fract_out_del; -wire overflow_del;
-wire 22:0 fract_out_x_del, fract_out_rnd2a_del; -wire exp_rnd_adj2a_del;
-wire 4:0 div_opa_ldz_del; -wire 23:0 fracta_div_del; -wire 23:0 fractb_div_del; -wire div_inf_del;
-wire inf_out_del, max_out_del;
-wire rx_del;
-wire ez_del;
-wire lr;
-wire 7:0 shr, shl, exp_div_del; -
-delay2 #26 ud000(clk, test.u0.fracta, fracta_del); -delay2 #26 ud001(clk, test.u0.fractb, fractb_del); -delay1 #2 ud002(clk, {g,r,s}, grs_del); -delay1 #0 ud004(clk, dn, dn_del); -delay1 #7 ud005(clk, exp_in, exp_in_del); -delay1 #7 ud007(clk, exp_out_rnd, exp_out_del); -delay1 #47 ud009(clk, fract_in, fract_in_del); -delay1 #0 ud010(clk, overflow, overflow_del); -delay1 #1 ud011(clk, exp_ovf, exp_ovf_del); -delay1 #22 ud014(clk, fract_out, fract_out_x_del); -delay1 #24 ud015(clk, fract_trunc, trunc_xx_del); -delay1 #0 ud017(clk, exp_rnd_adj2a, exp_rnd_adj2a_del); -delay1 #4 ud019(clk, div_opa_ldz, div_opa_ldz_del); -delay3 #23 ud020(clk, test.u0.fdiv_opa49:26, fracta_div_del); -delay3 #23 ud021(clk, test.u0.fractb_mul, fractb_div_del); -delay1 #0 ud023(clk, div_inf, div_inf_del); -delay1 #7 ud024(clk, fi_ldz_2, fi_ldz_2_del); -delay1 #0 ud025(clk, inf_out, inf_out_del); -delay1 #0 ud026(clk, max_num, max_num_del); -delay1 #5 ud027(clk, fi_ldz, fi_ldz_del); -delay1 #0 ud028(clk, rem_00, rx_del); -
-delay1 #0 ud029(clk, left_right, lr); -delay1 #7 ud030(clk, shift_right, shr); -delay1 #7 ud031(clk, shift_left, shl); -delay1 #22 ud032(clk, fract_out_rnd2a, fract_out_rnd2a_del); -
-delay1 #7 ud033(clk, exp_div, exp_div_del); -
-always @(test.error_event)
- begin
-
- $display("\n----------------------------------------------");
-
- $display("ERROR: GRS: %b exp_ovf: %b dn: %h exp_in: %h exp_out: %h, exp_rnd_adj2a: %b",
- grs_del, exp_ovf_del, dn_del, exp_in_del, exp_out_del, exp_rnd_adj2a_del);
-
- $display(" div_opa: %b, div_opb: %b, rem_00: %b, exp_div: %h",
- fracta_div_del, fractb_div_del, rx_del, exp_div_del);
-
- $display(" lr: %b, shl: %h, shr: %h",
- lr, shl, shr);
-
-
- $display(" overflow: %b, fract_in=%b fa:%h fb:%h",
- overflow_del, fract_in_del, fracta_del, fractb_del);
-
- $display(" div_opa_ldz: %h, div_inf: %b, inf_out: %b, max_num: %b, fi_ldz: %h, fi_ldz_2: %h",
- div_opa_ldz_del, div_inf_del, inf_out_del, max_num_del, fi_ldz_del, fi_ldz_2_del);
-
- $display(" fract_out_x: %b, fract_out_rnd2a_del: %h, fract_trunc: %b\n",
- fract_out_x_del, fract_out_rnd2a_del, trunc_xx_del);
- end
-
-
-// synopsys translate_on
-
-endmodule
-
-// synopsys translate_off
-
-module delay1(clk, in, out);
-parameter N = 1;
-input clk;
-
-
-always @(posedge clk)
-
-endmodule
-
-
-module delay2(clk, in, out);
-parameter N = 1;
-input clk;
-
-
-always @(posedge clk)
-
-always @(posedge clk)
-
endmodule
-
-module delay3(clk, in, out);
-parameter N = 1;
-input clk;
-
-
-always @(posedge clk)
-
-always @(posedge clk)
-
-always @(posedge clk)
-
-endmodule
-
-// synopsys translate_on
\ No newline at end of file