/* **************************** MODULE PREAMBLE ********************************
Copyright (c) 2012, ArchiTek
This document constitutes confidential and proprietary information
of ArchiTek. All rights reserved.
*/
// ***************************** MODULE HEADER *********************************
module fftBfCalc (
iVld, iStall, iRadix,
oVld, oStall,
wIndex, wPhase,
wRe0, wIm0, wRe1, wIm1, wRe2, wIm2, wRe3, wIm3,
ramRF, ramRP,
ram0RE, ram1RE, ram2RE, ram3RE,
ram0RA, ram1RA, ram2RA, ram3RA,
ram0RD, ram1RD, ram2RD, ram3RD,
ramWF, ramWP,
ram0WE, ram1WE, ram2WE, ram3WE,
ram0WA, ram1WA, ram2WA, ram3WA,
ram0WD, ram1WD, ram2WD, ram3WD,
reset, clk
);
// ************************* PARAMETER DECLARATIONS ****************************
// 最大のポイント数の指数はカウンタ等のビット範囲を定める
parameter MRR = 10; // Max Radix Radix
// 半精度で頻繁に利用する固定値
parameter NEG = 16'h8000;
parameter ZERO = 16'h0000;
// *************************** I/O DECLARATIONS ********************************
// Pipe Input
// iRadixはiVldに同期していれば、タスクごとに値の変更が可能
input iVld;
output iStall;
input [3:0] iRadix;
// Pipe Output
// oVldは処理の終了を伝達するために存在
output oVld;
input oStall;
// Coefficient
// 係数テーブルのインターフェイス、4つの係数を同時アクセス
output [MRR-3:0] wIndex;
output [2:0] wPhase;
input [15:0] wRe0, wIm0, wRe1, wIm1, wRe2, wIm2, wRe3, wIm3;
// SRAM Read
// ramRFはSRAM Read時のSRAMセットの選択(LoadDataでのoVldに属していたoFlipと対)
output [1:0] ramRF;
// ramRPはSRAM Read時の領域の選択
output ramRP;
output ram0RE, ram1RE, ram2RE, ram3RE;
output [MRR-3:0] ram0RA, ram1RA, ram2RA, ram3RA;
input [31:0] ram0RD, ram1RD, ram2RD, ram3RD;
// SRAM Write
// ramWFはSRAM Write時のSRAMセットの選択(StoreDataでのoVldに属していたoFlipと対)
output [1:0] ramWF;
// ramWPはSRAM Write時の領域の選択
output ramWP;
output ram0WE, ram1WE, ram2WE, ram3WE;
output [MRR-3:0] ram0WA, ram1WA, ram2WA, ram3WA;
output [31:0] ram0WD, ram1WD, ram2WD, ram3WD;
// Utility
input reset;
input clk;
// **************************** LOCAL DECLARATIONS *****************************
// Control
reg [1:0] iStat;
reg [1:0] iStatD;
reg iEn;
reg iStall;
reg [3:0] gapCnt;
wire gapEnd;
wire stall_s;
// Counter
// ポイント数のカウンタとPhaseのカウンタを用意、前者が下位で後者が上位の2Dカウンタ
// Radix-4は4つ同時に処理するので、ポイント数のカウンタもN/4個数えればよい
reg [MRR-3:0] qntCnt, iQntNum;
reg [2:0] rdxCnt, iRdxNum;
// iTrigはポイント数のカウンタの区切り、iFinはPhaseのカウンタの区切り(終了)
wire iTrig;
wire iFin;
// Pipeline Variable
// パイプラインは0〜10の11段、SuffixはパイプラインStageを示す
reg vld_0, vld_1, vld_2, vld_3, vld_4,
vld_5, vld_6, vld_7, vld_8, vld_9, vld_10;
wire stall_0, stall_1, stall_2, stall_3, stall_4,
stall_5, stall_6, stall_7, stall_8, stall_9, stall_10;
reg fin_0, fin_1, fin_2, fin_3, fin_4,
fin_5, fin_6, fin_7, fin_8, fin_9, fin_10;
reg [MRR-3:0] index_0, index_1, index_2, index_3, index_4,
index_5, index_6, index_7, index_8, index_9, index_10;
reg [2:0] phase_0, phase_1, phase_2, phase_3, phase_4,
phase_5, phase_6, phase_7, phase_8, phase_9, phase_10;
reg [3:0] radix_0, radix_1, radix_2, radix_3, radix_4,
radix_5, radix_6, radix_7, radix_8, radix_9, radix_10;
reg [MRR-1:0] addr0_0, addr1_0, addr2_0, addr3_0;
reg [1:0] addr0_1, addr1_1, addr2_1, addr3_1;
reg [MRR-1:0] addr0_10, addr1_10, addr2_10, addr3_10;
reg [15:0] dRe0_2, dIm0_2, dRe1_2, dIm1_2;
reg [15:0] dRe2_2, dIm2_2, dRe3_2, dIm3_2;
reg [15:0] wRe0_2, wIm0_2, wRe1_2, wIm1_2;
reg [15:0] wRe2_2, wIm2_2, wRe3_2, wIm3_2;
wire [15:0] rr0_4, ri0_4, ir0_4, ii0_4, rr1_4, ri1_4, ir1_4, ii1_4;
wire [15:0] rr2_4, ri2_4, ir2_4, ii2_4, rr3_4, ri3_4, ir3_4, ii3_4;
wire [15:0] re0_6, im0_6, re1_6, im1_6, re2_6, im2_6, re3_6, im3_6;
wire [15:0] pRe02_8, pIm02_8, pRe13_8, pIm13_8;
wire [15:0] mRe02_8, mIm02_8, mRe13_8, mIm13_8;
wire [15:0] xRe0_10, xIm0_10, xRe1_10, xIm1_10;
wire [15:0] xRe2_10, xIm2_10, xRe3_10, xIm3_10;
wire [15:0] dRe0, dIm0, dRe1, dIm1, dRe2, dIm2, dRe3, dIm3;
// SRAM Flip
reg wFlip, rFlip;
// ******************************** MODULE BODY ********************************
// -----------------------------------------------------------------------------
// Control
// 停止・実行・保留のStateマシン、Gapがなければカウンタだけで状態が分かるので不必要
parameter IDLE = 2'h0,
PROC = 2'h1,
GAP = 2'h2;
// iEnは保留(Gap)中'0'にして余分なSRAMアクセス等を禁止するために用意
always @(
iStat or
iVld or
iTrig or
iFin or
gapEnd or
stall_s
) begin
iStatD = iStat;
iEn = 1'b1;
iStall = stall_s;
if (!stall_s)
case (iStat)
IDLE: if (iVld) begin // すぐさま実行状態へ
iStatD = PROC;
iEn = 1'b1;
iStall = 1'b1;
end
PROC: casex ({iTrig, iFin})
2'b0x: begin // 実行中
iStatD = PROC;
iEn = 1'b1;
iStall = 1'b1;
end
2'b10: begin // Phase切り替え時は保留状態へ
// iRadixによってはGapへの遷移は不必要
// iStatD = PROC;
iStatD = GAP;
iEn = 1'b1;
iStall = 1'b1;
end
2'b11: begin // 終了時は停止状態へ
iStatD = IDLE;
iEn = 1'b1;
iStall = 1'b0;
end
endcase
GAP: if (gapEnd) begin // Gapを数え終われば実行状態へ
iStatD = PROC;
iEn = 1'b0;
iStall = 1'b1;
end
else begin // Gapカウント中
iStatD = GAP;
iEn = 1'b0;
iStall = 1'b1;
end
endcase
end
always @(posedge clk)
if (reset)
iStat <= #1 IDLE;
else
iStat <= #1 iStatD;
// -----------------------------------------------------------------------------
// Gap
// Gapをカウント、パイプライン長程度を数えることが必要(11なので4bitカウンタ)
always @(posedge clk)
if (reset)
gapCnt <= #1 4'h0;
else
gapCnt <= #1 gapCnt + {3'h0, iVld & !iEn};
// 正確にはパイプライン長-Xを設定する(XはSRAMアクセスのスケジュールで判断)
// 16, 64ポイントFFTに限定されるので、その処理に重要性がなければ簡単な手法を採用(ここでは16)
assign gapEnd = &gapCnt;
// -----------------------------------------------------------------------------
// Counter
// 2Dカウンタになっており、理論通りN/4 log4Nを数える
always @(posedge clk)
if (reset) begin
qntCnt <= #1 {MRR-2{1'b0}};
rdxCnt <= #1 3'h0;
end
else if (!stall_s) begin
qntCnt <= #1 iTrig
? {MRR-2{1'b0}}
: qntCnt + {{MRR-3{1'b0}}, iVld & iEn};
rdxCnt <= #1 iFin
? 3'h0
: rdxCnt + {2'h0, iVld & iTrig};
end
// iCntのデフォルト設定はポイント数2^10、MRRを増やす場合は隙間(10,12,14,,,)を積み増して行く
// case()文でiRadix依存しない記述方法があればそれを採用すべき
always @(
iRadix
)
case (iRadix)
4'h4: begin
iQntNum = 8'h03;
iRdxNum = 3'h1;
end
4'h6: begin
iQntNum = 8'h0f;
iRdxNum = 3'h2;
end
4'h8: begin
iQntNum = 8'h3f;
iRdxNum = 3'h3;
end
default: begin
// MRR
iQntNum = 8'hff;
iRdxNum = 3'h4;
end
endcase
assign iTrig = iVld & (qntCnt == iQntNum);
assign iFin = iTrig & (rdxCnt == iRdxNum);
// -----------------------------------------------------------------------------
// Pipeline (Valid Control)
// 11段パイプライン、SRAMアドレスを生成するためカウンタ値等を送る
always @(posedge clk)
if (reset)
{vld_0, fin_0, index_0, phase_0, radix_0}
<= #1 {MRR+7{1'b0}};
else if (!stall_s)
{vld_0, fin_0, index_0, phase_0, radix_0}
<= #1 {iVld & iEn, iFin, qntCnt, rdxCnt, iRadix};
always @(posedge clk)
if (reset)
{vld_1, fin_1, index_1, phase_1, radix_1}
<= #1 {MRR+7{1'b0}};
else if (!stall_0)
{vld_1, fin_1, index_1, phase_1, radix_1}
<= #1 {vld_0, fin_0, index_0, phase_0, radix_0};
always @(posedge clk)
if (reset)
{vld_2, fin_2, index_2, phase_2, radix_2}
<= #1 {MRR+7{1'b0}};
else if (!stall_1)
{vld_2, fin_2, index_2, phase_2, radix_2}
<= #1 {vld_1, fin_1, index_1, phase_1, radix_1};
always @(posedge clk)
if (reset)
{vld_3, fin_3, index_3, phase_3, radix_3}
<= #1 {MRR+7{1'b0}};
else if (!stall_2)
{vld_3, fin_3, index_3, phase_3, radix_3}
<= #1 {vld_2, fin_2, index_2, phase_2, radix_2};
always @(posedge clk)
if (reset)
{vld_4, fin_4, index_4, phase_4, radix_4}
<= #1 {MRR+7{1'b0}};
else if (!stall_3)
{vld_4, fin_4, index_4, phase_4, radix_4}
<= #1 {vld_3, fin_3, index_3, phase_3, radix_3};
always @(posedge clk)
if (reset)
{vld_5, fin_5, index_5, phase_5, radix_5}
<= #1 {MRR+7{1'b0}};
else if (!stall_4)
{vld_5, fin_5, index_5, phase_5, radix_5}
<= #1 {vld_4, fin_4, index_4, phase_4, radix_4};
always @(posedge clk)
if (reset)
{vld_6, fin_6, index_6, phase_6, radix_6}
<= #1 {MRR+7{1'b0}};
else if (!stall_5)
{vld_6, fin_6, index_6, phase_6, radix_6}
<= #1 {vld_5, fin_5, index_5, phase_5, radix_5};
always @(posedge clk)
if (reset)
{vld_7, fin_7, index_7, phase_7, radix_7}
<= #1 {MRR+7{1'b0}};
else if (!stall_6)
{vld_7, fin_7, index_7, phase_7, radix_7}
<= #1 {vld_6, fin_6, index_6, phase_6, radix_6};
always @(posedge clk)
if (reset)
{vld_8, fin_8, index_8, phase_8, radix_8}
<= #1 {MRR+7{1'b0}};
else if (!stall_7)
{vld_8, fin_8, index_8, phase_8, radix_8}
<= #1 {vld_7, fin_7, index_7, phase_7, radix_7};
always @(posedge clk)
if (reset)
{vld_9, fin_9, index_9, phase_9, radix_9}
<= #1 {MRR+7{1'b0}};
else if (!stall_8)
{vld_9, fin_9, index_9, phase_9, radix_9}
<= #1 {vld_8, fin_8, index_8, phase_8, radix_8};
always @(posedge clk)
if (reset)
{vld_10, fin_10, index_10, phase_10, radix_10}
<= #1 {MRR+7{1'b0}};
else if (!stall_9)
{vld_10, fin_10, index_10, phase_10, radix_10}
<= #1 {vld_9, fin_9, index_9, phase_9, radix_9};
// -----------------------------------------------------------------------------
// Pipeline (Stall Control)
// バッファ型のパイプライン記述
// 全体の処理数を考えるとパイプライン長はたいしたことがないので、基本型を用いてもよい(以下vldを削除)
assign stall_s = vld_0 & stall_0;
assign stall_0 = vld_1 & stall_1;
assign stall_1 = vld_2 & stall_2;
assign stall_2 = vld_3 & stall_3;
assign stall_3 = vld_4 & stall_4;
assign stall_4 = vld_5 & stall_5;
assign stall_5 = vld_6 & stall_6;
assign stall_6 = vld_7 & stall_7;
assign stall_7 = vld_8 & stall_8;
assign stall_8 = vld_9 & stall_9;
assign stall_9 = vld_10 & stall_10;
assign stall_10 = oStall & fin_10;
// -----------------------------------------------------------------------------
// SRAM Read Address & Data Latch
// データアドレスに対する操作を4ポートに対して実施、SRMAアドレスとしてSRAM Readへ
always @(posedge clk)
if (!stall_s) begin
addr0_0 <= #1 addrInFunc({qntCnt, 2'h0}, rdxCnt, iRadix);
addr1_0 <= #1 addrInFunc({qntCnt, 2'h1}, rdxCnt, iRadix);
addr2_0 <= #1 addrInFunc({qntCnt, 2'h2}, rdxCnt, iRadix);
addr3_0 <= #1 addrInFunc({qntCnt, 2'h3}, rdxCnt, iRadix);
end
// SRAMから出てくるReadデータは1サイクル遅れるので、データの分配情報もラッチして遅延させる
always @(posedge clk)
if (!stall_0) begin
addr0_1 <= #1 addr0_0[1:0];
addr1_1 <= #1 addr1_0[1:0];
addr2_1 <= #1 addr2_0[1:0];
addr3_1 <= #1 addr3_0[1:0];
end
// 4つのSRAMアドレスLSB2ビットは必ず排他的になり、これによりSRAM Bankに配分する
// ここでBank1とBank2はひねられていることに注意(Radix-4の理論)
assign {dIm0, dRe0} = ram0RD & {32{addr0_1 == 2'h0}}
| ram1RD & {32{addr0_1 == 2'h1}}
| ram2RD & {32{addr0_1 == 2'h2}}
| ram3RD & {32{addr0_1 == 2'h3}};
assign {dIm1, dRe1} = ram0RD & {32{addr2_1 == 2'h0}} // addr2_1!
| ram1RD & {32{addr2_1 == 2'h1}}
| ram2RD & {32{addr2_1 == 2'h2}}
| ram3RD & {32{addr2_1 == 2'h3}};
assign {dIm2, dRe2} = ram0RD & {32{addr1_1 == 2'h0}} // addr_11!
| ram1RD & {32{addr1_1 == 2'h1}}
| ram2RD & {32{addr1_1 == 2'h2}}
| ram3RD & {32{addr1_1 == 2'h3}};
assign {dIm3, dRe3} = ram0RD & {32{addr3_1 == 2'h0}}
| ram1RD & {32{addr3_1 == 2'h1}}
| ram2RD & {32{addr3_1 == 2'h2}}
| ram3RD & {32{addr3_1 == 2'h3}};
// SRAMのReadデータを一旦ラッチ
always @(posedge clk)
if (vld_1 & !stall_1) begin
dRe0_2 <= #1 dRe0;
dRe1_2 <= #1 dRe1;
dRe2_2 <= #1 dRe2;
dRe3_2 <= #1 dRe3;
dIm0_2 <= #1 dIm0;
dIm1_2 <= #1 dIm1;
dIm2_2 <= #1 dIm2;
dIm3_2 <= #1 dIm3;
wRe0_2 <= #1 wRe0;
wRe1_2 <= #1 wRe1;
wRe2_2 <= #1 wRe2;
wRe3_2 <= #1 wRe3;
wIm0_2 <= #1 wIm0;
wIm1_2 <= #1 wIm1;
wIm2_2 <= #1 wIm2;
wIm3_2 <= #1 wIm3;
end
// ここから演算器が並ぶ、実際の記述はポート接続を使用すること!(掲載の見やすさの都合上変更している)
// -----------------------------------------------------------------------------
// Stage 3,4 - Data x Coef
// 複素数乗算のための乗算器16個、最初の4つは係数が固定(1.0, 0.0)なので省略可能
fmul mul_rr0 (vld_2,, dRe0_2, wRe0_2,, stall_4, rr0_4, reset, clk);
fmul mul_ri0 (vld_2,, dRe0_2, wIm0_2,, stall_4, ri0_4, reset, clk);
fmul mul_ir0 (vld_2,, dIm0_2, wRe0_2,, stall_4, ir0_4, reset, clk);
fmul mul_ii0 (vld_2,, dIm0_2, wIm0_2,, stall_4, ii0_4, reset, clk);
fmul mul_rr1 (vld_2,, dRe1_2, wRe1_2,, stall_4, rr1_4, reset, clk);
fmul mul_ri1 (vld_2,, dRe1_2, wIm1_2,, stall_4, ri1_4, reset, clk);
fmul mul_ir1 (vld_2,, dIm1_2, wRe1_2,, stall_4, ir1_4, reset, clk);
fmul mul_ii1 (vld_2,, dIm1_2, wIm1_2,, stall_4, ii1_4, reset, clk);
fmul mul_rr2 (vld_2,, dRe2_2, wRe2_2,, stall_4, rr2_4, reset, clk);
fmul mul_ri2 (vld_2,, dRe2_2, wIm2_2,, stall_4, ri2_4, reset, clk);
fmul mul_ir2 (vld_2,, dIm2_2, wRe2_2,, stall_4, ir2_4, reset, clk);
fmul mul_ii2 (vld_2,, dIm2_2, wIm2_2,, stall_4, ii2_4, reset, clk);
fmul mul_rr3 (vld_2,, dRe3_2, wRe3_2,, stall_4, rr3_4, reset, clk);
fmul mul_ri3 (vld_2,, dRe3_2, wIm3_2,, stall_4, ri3_4, reset, clk);
fmul mul_ir3 (vld_2,, dIm3_2, wRe3_2,, stall_4, ir3_4, reset, clk);
fmul mul_ii3 (vld_2,, dIm3_2, wIm3_2,, stall_4, ii3_4, reset, clk);
// -----------------------------------------------------------------------------
// Stage 5,6 - Re-Re/Im+Im
// 複素数乗算のための加算器8個、最初の2つは係数が固定(1.0, 0.0)なので省略可能
// NEGの排他的論理和はオペランドの減算を意味する
fadd add_r0 (vld_4,, rr0_4, ii0_4 ^ NEG,, stall_6, re0_6, reset, clk);
fadd add_i0 (vld_4,, ri0_4, ir0_4,, stall_6, im0_6, reset, clk);
fadd add_r1 (vld_4,, rr1_4, ii1_4 ^ NEG,, stall_6, re1_6, reset, clk);
fadd add_i1 (vld_4,, ri1_4, ir1_4,, stall_6, im1_6, reset, clk);
fadd add_r2 (vld_4,, rr2_4, ii2_4 ^ NEG,, stall_6, re2_6, reset, clk);
fadd add_i2 (vld_4,, ri2_4, ir2_4,, stall_6, im2_6, reset, clk);
fadd add_r3 (vld_4,, rr3_4, ii3_4 ^ NEG,, stall_6, re3_6, reset, clk);
fadd add_i3 (vld_4,, ri3_4, ir3_4,, stall_6, im3_6, reset, clk);
// -----------------------------------------------------------------------------
// Stage 7,8 - Det0
// 行列演算の前処理のための加算器8個
fadd add_pr02 (vld_6,, re0_6, re2_6,, stall_8, pRe02_8, reset, clk);
fadd add_pi02 (vld_6,, im0_6, im2_6,, stall_8, pIm02_8, reset, clk);
fadd add_mr02 (vld_6,, re0_6, re2_6 ^ NEG,, stall_8, mRe02_8, reset, clk);
fadd add_mi02 (vld_6,, im0_6, im2_6 ^ NEG,, stall_8, mIm02_8, reset, clk);
fadd add_pr13 (vld_6,, re1_6, re3_6,, stall_8, pRe13_8, reset, clk);
fadd add_pi13 (vld_6,, im1_6, im3_6,, stall_8, pIm13_8, reset, clk);
fadd add_mr13 (vld_6,, re1_6, re3_6 ^ NEG,, stall_8, mRe13_8, reset, clk);
fadd add_mi13 (vld_6,, im1_6, im3_6 ^ NEG,, stall_8, mIm13_8, reset, clk);
// -----------------------------------------------------------------------------
// Stage 9,10 - Det1
// X0 = P02 + P13
// Re(X0) = Re(P02) + Re(P13)
// Im(X0) = Im(P02) + Im(P13)
// X1 = M02 - jM13
// Re(X1) = Re(M02) + Im(M13)
// Im(X1) = Im(M02) - Re(M13)
// X2 = P02 - P13
// Re(X2) = Re(P02) - Re(P13)
// Im(X2) = Im(P02) - Im(P13)
// X3 = M02 + jM13
// Re(X3) = Re(M02) - Im(M13)
// Im(X3) = Im(M02) + Re(M13)
// 行列演算の後処理のための加算器8個
fadd add_xr0 (vld_8,, pRe02_8, pRe13_8,, stall_10, xRe0_10, reset, clk);
fadd add_xi0 (vld_8,, pIm02_8, pIm13_8,, stall_10, xIm0_10, reset, clk);
fadd add_xr1 (vld_8,, mRe02_8, mIm13_8,, stall_10, xRe1_10, reset, clk);
fadd add_xi1 (vld_8,, mIm02_8, mRe13_8 ^ NEG,, stall_10, xIm1_10, reset, clk);
fadd add_xr2 (vld_8,, pRe02_8, pRe13_8 ^ NEG,, stall_10, xRe2_10, reset, clk);
fadd add_xi2 (vld_8,, pIm02_8, pIm13_8 ^ NEG,, stall_10, xIm2_10, reset, clk);
fadd add_xr3 (vld_8,, mRe02_8, mIm13_8 ^ NEG,, stall_10, xRe3_10, reset, clk);
fadd add_xi3 (vld_8,, mIm02_8, mRe13_8,, stall_10, xIm3_10, reset, clk);
// -----------------------------------------------------------------------------
// SRAM Write Address Latch
// データアドレスに対する操作を4ポートに対して前もって実施、SRMAアドレスとしてSRAM Writeへ
always @(posedge clk)
if (!stall_9) begin
addr0_10 <= #1 addrOutFunc({index_9, 2'h0}, phase_9, radix_9);
addr1_10 <= #1 addrOutFunc({index_9, 2'h1}, phase_9, radix_9);
addr2_10 <= #1 addrOutFunc({index_9, 2'h2}, phase_9, radix_9);
addr3_10 <= #1 addrOutFunc({index_9, 2'h3}, phase_9, radix_9);
end
// -----------------------------------------------------------------------------
// Output
assign oVld = vld_10 & fin_10;
// -----------------------------------------------------------------------------
// SRAM Flip
// FFTの実行の度にFlipすることで、使用するSRAMセットをReadとWriteに分けて選択する
// また、SRAMを使用する状態(vld=1)を組み合わせて出力(トップモジュールはこの信号でデータをブレンド)
always @(posedge clk)
if (reset)
rFlip <= #1 1'b0;
else if (vld_0 & fin_0 & !stall_0)
rFlip <= #1 ~rFlip;
always @(posedge clk)
if (reset)
wFlip <= #1 1'b0;
else if (vld_10 & fin_10 & !stall_10)
wFlip <= #1 ~wFlip;
assign ramRF = {1'b0, vld_0 & !stall_0} << rFlip;
assign ramWF = {1'b0, vld_10 & !stall_10} << wFlip;
// -----------------------------------------------------------------------------
// Coefficient
// 係数テーブルへのアクセスはSRAMアクセスに1サイクル遅れて実施(SRAMのレイテンシが異なるため)
assign wIndex = index_1;
assign wPhase = phase_1;
// -----------------------------------------------------------------------------
// SRAM Read
// SRAM Read用の信号を生成
// ramRPは領域を示すがこの記述だとiRadix=64,1024にしか対応できずNG(ここの脚注[4]を参照)
assign ramRP = phase_0[0];
// SRAMにStallを効かす、また未使用時はRE(Read Enable)をActiveにしないことで低消費電力化を考慮する
assign ram0RE = vld_0 & !stall_0;
assign ram1RE = vld_0 & !stall_0;
assign ram2RE = vld_0 & !stall_0;
assign ram3RE = vld_0 & !stall_0;
assign ram0RA = {
addr0_0[MRR-1:2] & {MRR-2{addr0_0[1:0] == 2'h0}} |
addr1_0[MRR-1:2] & {MRR-2{addr1_0[1:0] == 2'h0}} |
addr2_0[MRR-1:2] & {MRR-2{addr2_0[1:0] == 2'h0}} |
addr3_0[MRR-1:2] & {MRR-2{addr3_0[1:0] == 2'h0}}
};
assign ram1RA = {
addr0_0[MRR-1:2] & {MRR-2{addr0_0[1:0] == 2'h1}} |
addr1_0[MRR-1:2] & {MRR-2{addr1_0[1:0] == 2'h1}} |
addr2_0[MRR-1:2] & {MRR-2{addr2_0[1:0] == 2'h1}} |
addr3_0[MRR-1:2] & {MRR-2{addr3_0[1:0] == 2'h1}}
};
assign ram2RA = {
addr0_0[MRR-1:2] & {MRR-2{addr0_0[1:0] == 2'h2}} |
addr1_0[MRR-1:2] & {MRR-2{addr1_0[1:0] == 2'h2}} |
addr2_0[MRR-1:2] & {MRR-2{addr2_0[1:0] == 2'h2}} |
addr3_0[MRR-1:2] & {MRR-2{addr3_0[1:0] == 2'h2}}
};
assign ram3RA = {
addr0_0[MRR-1:2] & {MRR-2{addr0_0[1:0] == 2'h3}} |
addr1_0[MRR-1:2] & {MRR-2{addr1_0[1:0] == 2'h3}} |
addr2_0[MRR-1:2] & {MRR-2{addr2_0[1:0] == 2'h3}} |
addr3_0[MRR-1:2] & {MRR-2{addr3_0[1:0] == 2'h3}}
};
// -----------------------------------------------------------------------------
// SRAM Write
// SRAM Write用の信号を生成
// ramWPは領域を示すがこの記述だとiRadix=64,1024にしか対応できずNG(ここの脚注[4]を参照)
assign ramWP = ~phase_10[0];
// SRAMにStallを効かす、また未使用時はWE(Write Enable)をActiveにしないことで低消費電力化を考慮する
assign ram0WE = vld_10 & !stall_10;
assign ram1WE = vld_10 & !stall_10;
assign ram2WE = vld_10 & !stall_10;
assign ram3WE = vld_10 & !stall_10;
assign ram0WA = {
addr0_10[MRR-1:2] & {MRR-2{addr0_10[1:0] == 2'h0}} |
addr1_10[MRR-1:2] & {MRR-2{addr1_10[1:0] == 2'h0}} |
addr2_10[MRR-1:2] & {MRR-2{addr2_10[1:0] == 2'h0}} |
addr3_10[MRR-1:2] & {MRR-2{addr3_10[1:0] == 2'h0}}
};
assign ram1WA = {
addr0_10[MRR-1:2] & {MRR-2{addr0_10[1:0] == 2'h1}} |
addr1_10[MRR-1:2] & {MRR-2{addr1_10[1:0] == 2'h1}} |
addr2_10[MRR-1:2] & {MRR-2{addr2_10[1:0] == 2'h1}} |
addr3_10[MRR-1:2] & {MRR-2{addr3_10[1:0] == 2'h1}}
};
assign ram2WA = {
addr0_10[MRR-1:2] & {MRR-2{addr0_10[1:0] == 2'h2}} |
addr1_10[MRR-1:2] & {MRR-2{addr1_10[1:0] == 2'h2}} |
addr2_10[MRR-1:2] & {MRR-2{addr2_10[1:0] == 2'h2}} |
addr3_10[MRR-1:2] & {MRR-2{addr3_10[1:0] == 2'h2}}
};
assign ram3WA = {
addr0_10[MRR-1:2] & {MRR-2{addr0_10[1:0] == 2'h3}} |
addr1_10[MRR-1:2] & {MRR-2{addr1_10[1:0] == 2'h3}} |
addr2_10[MRR-1:2] & {MRR-2{addr2_10[1:0] == 2'h3}} |
addr3_10[MRR-1:2] & {MRR-2{addr3_10[1:0] == 2'h3}}
};
assign ram0WD = {
{xIm0_10, xRe0_10} & {32{addr0_10[1:0] == 2'h0}} |
{xIm1_10, xRe1_10} & {32{addr1_10[1:0] == 2'h0}} |
{xIm2_10, xRe2_10} & {32{addr2_10[1:0] == 2'h0}} |
{xIm3_10, xRe3_10} & {32{addr3_10[1:0] == 2'h0}}
};
assign ram1WD = {
{xIm0_10, xRe0_10} & {32{addr0_10[1:0] == 2'h1}} |
{xIm1_10, xRe1_10} & {32{addr1_10[1:0] == 2'h1}} |
{xIm2_10, xRe2_10} & {32{addr2_10[1:0] == 2'h1}} |
{xIm3_10, xRe3_10} & {32{addr3_10[1:0] == 2'h1}}
};
assign ram2WD = {
{xIm0_10, xRe0_10} & {32{addr0_10[1:0] == 2'h2}} |
{xIm1_10, xRe1_10} & {32{addr1_10[1:0] == 2'h2}} |
{xIm2_10, xRe2_10} & {32{addr2_10[1:0] == 2'h2}} |
{xIm3_10, xRe3_10} & {32{addr3_10[1:0] == 2'h2}}
};
assign ram3WD = {
{xIm0_10, xRe0_10} & {32{addr0_10[1:0] == 2'h3}} |
{xIm1_10, xRe1_10} & {32{addr1_10[1:0] == 2'h3}} |
{xIm2_10, xRe2_10} & {32{addr2_10[1:0] == 2'h3}} |
{xIm3_10, xRe3_10} & {32{addr3_10[1:0] == 2'h3}}
};
// **************************** FUNCTIONS and TASKS ****************************
// 入力SRAMアドレスの生成:
// 先ずPhaseに従ってカウンタの回転を行いRadix-4の入力に相応しいアドレスを生成
// 次に前Phaseで行ったアドレスの攪乱を元に戻すための攪乱を行う
function [MRR-1:0] addrInFunc;
input [MRR-1:0] idx;
input [2:0] phase;
input [3:0] radix;
reg [MRR-1:0] result;
reg [MRR-1:0] twid;
begin
// Rotate
casex (phase)
3'h1: result = {idx[9:4], idx[1:0], idx[3:2]};
3'h2: result = {idx[9:6], idx[1:0], idx[5:2]};
3'h3: result = {idx[9:8], idx[1:0], idx[7:2]};
3'h4: result = { idx[1:0], idx[9:2]};
default: result = idx;
endcase
// Twiddle & Twidle Factor
casex (phase)
3'h1: twid = {{MRR-2{1'b0}}, idx[1:0] };
3'h2: twid = {{MRR-2{1'b0}}, idx[1:0] ^ idx[5:4]};
3'h3: twid = {{MRR-2{1'b0}}, idx[1:0] ^ idx[7:6]};
3'h4: twid = {{MRR-2{1'b0}}, idx[1:0] ^ idx[9:8]};
default:
// LoadDataでアドレスの攪乱(2度することで元に戻る)
case (radix)
4'h4: twid = {{MRR-2{1'b0}}, idx[3:2]};
4'h6: twid = {{MRR-2{1'b0}}, idx[5:4]};
4'h8: twid = {{MRR-2{1'b0}}, idx[7:6]};
default:
twid = {{MRR-2{1'b0}}, idx[MRR-1:MRR-2]};
endcase
endcase
// Result
addrInFunc = result ^ twid;
end
endfunction
// 出力SRAMアドレスの生成:
// 先ずPhaseに従ってカウンタの回転を行いRadix-4の出力に相応しいアドレスを生成
// 次に最終Phase(border)でない限り前Phaseで行ったアドレスの攪乱を元に戻すための攪乱を行う
function [MRR-1:0] addrOutFunc;
input [MRR-1:0] idx;
input [2:0] phase;
input [3:0] radix;
reg border;
reg [MRR-1:0] result;
reg [MRR-1:0] twid;
begin
case (radix)
4'h4: border = (phase == 3'h1);
4'h6: border = (phase == 3'h2);
4'h8: border = (phase == 3'h3);
default: border = (phase == 3'h4);
endcase
// Rotate
case (phase)
3'h1: result = {idx[9:4], idx[1:0], idx[3:2]};
3'h2: result = {idx[9:6], idx[1:0], idx[5:2]};
3'h3: result = {idx[9:8], idx[1:0], idx[7:2]};
3'h4: result = { idx[1:0], idx[9:2]};
default: result = idx;
endcase
// Twiddle & Twidle Factor
casex ({border, phase})
{1'b0, 3'h0}: twid = {{MRR-2{1'b0}}, result[3:2]};
{1'b0, 3'h1}: twid = {{MRR-2{1'b0}}, result[3:2] ^ result[5:4]};
{1'b0, 3'h2}: twid = {{MRR-2{1'b0}}, result[5:4] ^ result[7:6]};
{1'b0, 3'h3}: twid = {{MRR-2{1'b0}}, result[7:6] ^ result[9:8]};
default:
// storeDataでアドレスの攪乱(2度することで元に戻る)
case (radix)
4'h4: twid = {{MRR-2{1'b0}}, result[3:2]};
4'h6: twid = {{MRR-2{1'b0}}, result[5:4]};
4'h8: twid = {{MRR-2{1'b0}}, result[7:6]};
default:
twid = {{MRR-2{1'b0}}, result[MRR-1:MRR-2]};
endcase
endcase
// Result
addrInFunc = result ^ twid;
end
endfunction
endmodule // bfCalc
// *****************************************************************************