論理回路デザイン
ArchiTek home page
コーディング3

コード(BfCalc RTL)

/* **************************** MODULE PREAMBLE ********************************

        Copyright (c) 2012, ArchiTek
        This document constitutes confidential and proprietary information
        of ArchiTek. All rights reserved.
*/

// ***************************** MODULE HEADER *********************************

module fftBfCalc (
        iVld, iStall, iRadix,

        oVld, oStall,

        wIndex, wPhase,
        wRe0, wIm0, wRe1, wIm1, wRe2, wIm2, wRe3, wIm3,

        ramRF, ramRP,
        ram0RE, ram1RE, ram2RE, ram3RE,
        ram0RA, ram1RA, ram2RA, ram3RA,
        ram0RD, ram1RD, ram2RD, ram3RD,

        ramWF, ramWP,
        ram0WE, ram1WE, ram2WE, ram3WE,
        ram0WA, ram1WA, ram2WA, ram3WA,
        ram0WD, ram1WD, ram2WD, ram3WD,

        reset, clk

        );

// ************************* PARAMETER DECLARATIONS ****************************

        // 最大のポイント数の指数はカウンタ等のビット範囲を定める
        parameter               MRR     = 10;           // Max Radix Radix

        // 半精度で頻繁に利用する固定値
        parameter               NEG     = 16'h8000;
        parameter               ZERO    = 16'h0000;

// *************************** I/O DECLARATIONS ********************************

        // Pipe Input
        // iRadixはiVldに同期していれば、タスクごとに値の変更が可能
        input                   iVld;
        output                  iStall;
        input   [3:0]           iRadix;

        // Pipe Output
        // oVldは処理の終了を伝達するために存在
        output                  oVld;
        input                   oStall;

        // Coefficient
        // 係数テーブルのインターフェイス、4つの係数を同時アクセス
        output  [MRR-3:0]       wIndex;
        output  [2:0]           wPhase;
        input   [15:0]          wRe0, wIm0, wRe1, wIm1, wRe2, wIm2, wRe3, wIm3;

        // SRAM Read
        // ramRFはSRAM Read時のSRAMセットの選択(LoadDataでのoVldに属していたoFlipと対)
        output  [1:0]           ramRF;
        // ramRPはSRAM Read時の領域の選択
        output                  ramRP;
        output                  ram0RE, ram1RE, ram2RE, ram3RE;
        output  [MRR-3:0]       ram0RA, ram1RA, ram2RA, ram3RA;
        input   [31:0]          ram0RD, ram1RD, ram2RD, ram3RD;

        // SRAM Write
        // ramWFはSRAM Write時のSRAMセットの選択(StoreDataでのoVldに属していたoFlipと対)
        output  [1:0]           ramWF;
        // ramWPはSRAM Write時の領域の選択
        output                  ramWP;
        output                  ram0WE, ram1WE, ram2WE, ram3WE;
        output  [MRR-3:0]       ram0WA, ram1WA, ram2WA, ram3WA;
        output  [31:0]          ram0WD, ram1WD, ram2WD, ram3WD;

        // Utility
        input                   reset;
        input                   clk;

// **************************** LOCAL DECLARATIONS *****************************

        // Control
        reg     [1:0]           iStat;
        reg     [1:0]           iStatD;
        reg                     iEn;
        reg                     iStall;
        reg     [3:0]           gapCnt;
        wire                    gapEnd;
        wire                    stall_s;

        // Counter
        // ポイント数のカウンタとPhaseのカウンタを用意、前者が下位で後者が上位の2Dカウンタ
        // Radix-4は4つ同時に処理するので、ポイント数のカウンタもN/4個数えればよい
        reg     [MRR-3:0]       qntCnt, iQntNum;
        reg     [2:0]           rdxCnt, iRdxNum;
        // iTrigはポイント数のカウンタの区切り、iFinはPhaseのカウンタの区切り(終了)
        wire                    iTrig;
        wire                    iFin;

        // Pipeline Variable
        // パイプラインは0〜10の11段、SuffixはパイプラインStageを示す
        reg                     vld_0, vld_1, vld_2, vld_3, vld_4,
                                vld_5, vld_6, vld_7, vld_8, vld_9, vld_10;
        wire                    stall_0, stall_1, stall_2, stall_3, stall_4,
                                stall_5, stall_6, stall_7, stall_8, stall_9, stall_10;
        reg                     fin_0, fin_1, fin_2, fin_3, fin_4,
                                fin_5, fin_6, fin_7, fin_8, fin_9, fin_10;
        reg     [MRR-3:0]       index_0, index_1, index_2, index_3, index_4,
                                index_5, index_6, index_7, index_8, index_9, index_10;
        reg     [2:0]           phase_0, phase_1, phase_2, phase_3, phase_4,
                                phase_5, phase_6, phase_7, phase_8, phase_9, phase_10;
        reg     [3:0]           radix_0, radix_1, radix_2, radix_3, radix_4,
                                radix_5, radix_6, radix_7, radix_8, radix_9, radix_10;
        reg     [MRR-1:0]       addr0_0, addr1_0, addr2_0, addr3_0;
        reg     [1:0]           addr0_1, addr1_1, addr2_1, addr3_1;
        reg     [MRR-1:0]       addr0_10, addr1_10, addr2_10, addr3_10;
        reg     [15:0]          dRe0_2, dIm0_2, dRe1_2, dIm1_2;
        reg     [15:0]          dRe2_2, dIm2_2, dRe3_2, dIm3_2;
        reg     [15:0]          wRe0_2, wIm0_2, wRe1_2, wIm1_2;
        reg     [15:0]          wRe2_2, wIm2_2, wRe3_2, wIm3_2;
        wire    [15:0]          rr0_4, ri0_4, ir0_4, ii0_4, rr1_4, ri1_4, ir1_4, ii1_4;
        wire    [15:0]          rr2_4, ri2_4, ir2_4, ii2_4, rr3_4, ri3_4, ir3_4, ii3_4;
        wire    [15:0]          re0_6, im0_6, re1_6, im1_6, re2_6, im2_6, re3_6, im3_6;
        wire    [15:0]          pRe02_8, pIm02_8, pRe13_8, pIm13_8;
        wire    [15:0]          mRe02_8, mIm02_8, mRe13_8, mIm13_8;
        wire    [15:0]          xRe0_10, xIm0_10, xRe1_10, xIm1_10;
        wire    [15:0]          xRe2_10, xIm2_10, xRe3_10, xIm3_10;
        wire    [15:0]          dRe0, dIm0, dRe1, dIm1, dRe2, dIm2, dRe3, dIm3;

        // SRAM Flip
        reg                     wFlip, rFlip;

// ******************************** MODULE BODY ********************************

// -----------------------------------------------------------------------------
// Control
// 停止・実行・保留のStateマシン、Gapがなければカウンタだけで状態が分かるので不必要
parameter       IDLE    = 2'h0,
                PROC    = 2'h1,
                GAP     = 2'h2;

// iEnは保留(Gap)中'0'にして余分なSRAMアクセス等を禁止するために用意
always @(
        iStat or
        iVld or
        iTrig or
        iFin or
        gapEnd or
        stall_s
        ) begin

        iStatD          = iStat;
        iEn             = 1'b1;
        iStall          = stall_s;

        if (!stall_s)
                case (iStat)
                        IDLE:   if (iVld) begin         // すぐさま実行状態へ
                                        iStatD          = PROC;
                                        iEn             = 1'b1;
                                        iStall          = 1'b1;
                                end
                        PROC:   casex ({iTrig, iFin})
                                2'b0x: begin            // 実行中
                                        iStatD          = PROC;
                                        iEn             = 1'b1;
                                        iStall          = 1'b1;
                                end
                                2'b10: begin            // Phase切り替え時は保留状態へ
                                                        // iRadixによってはGapへの遷移は不必要
//                                      iStatD          = PROC;
                                        iStatD          = GAP;
                                        iEn             = 1'b1;
                                        iStall          = 1'b1;
                                end
                                2'b11: begin            // 終了時は停止状態へ
                                        iStatD          = IDLE;
                                        iEn             = 1'b1;
                                        iStall          = 1'b0;
                                end
                        endcase
                        GAP:    if (gapEnd) begin       // Gapを数え終われば実行状態へ
                                        iStatD          = PROC;
                                        iEn             = 1'b0;
                                        iStall          = 1'b1;
                                end
                                else begin              // Gapカウント中
                                        iStatD          = GAP;
                                        iEn             = 1'b0;
                                        iStall          = 1'b1;
                                end
                endcase
end

always @(posedge clk)
        if (reset)
                iStat           <= #1 IDLE;
        else
                iStat           <= #1 iStatD;

// -----------------------------------------------------------------------------
// Gap
// Gapをカウント、パイプライン長程度を数えることが必要(11なので4bitカウンタ)
always @(posedge clk)
        if (reset)
                gapCnt          <= #1 4'h0;
        else
                gapCnt          <= #1 gapCnt + {3'h0, iVld & !iEn};

// 正確にはパイプライン長-Xを設定する(XはSRAMアクセスのスケジュールで判断)
// 16, 64ポイントFFTに限定されるので、その処理に重要性がなければ簡単な手法を採用(ここでは16)
assign gapEnd           = &gapCnt;

// -----------------------------------------------------------------------------
// Counter
// 2Dカウンタになっており、理論通りN/4 log4Nを数える
always @(posedge clk)
        if (reset) begin
                qntCnt          <= #1 {MRR-2{1'b0}};
                rdxCnt          <= #1 3'h0;
        end
        else if (!stall_s) begin
                qntCnt          <= #1 iTrig
                                        ? {MRR-2{1'b0}}
                                        : qntCnt + {{MRR-3{1'b0}}, iVld & iEn};
                rdxCnt          <= #1 iFin
                                        ? 3'h0
                                        : rdxCnt + {2'h0, iVld & iTrig};
        end

// iCntのデフォルト設定はポイント数2^10、MRRを増やす場合は隙間(10,12,14,,,)を積み増して行く
// case()文でiRadix依存しない記述方法があればそれを採用すべき
always @(
        iRadix
        )
        case (iRadix)
                4'h4: begin
                        iQntNum         = 8'h03;
                        iRdxNum         = 3'h1;
                end
                4'h6: begin
                        iQntNum         = 8'h0f;
                        iRdxNum         = 3'h2;
                end
                4'h8: begin
                        iQntNum         = 8'h3f;
                        iRdxNum         = 3'h3;
                end
                default: begin
                        // MRR
                        iQntNum         = 8'hff;
                        iRdxNum         = 3'h4;
                end
        endcase

assign iTrig            = iVld & (qntCnt == iQntNum);
assign iFin             = iTrig & (rdxCnt == iRdxNum);

// -----------------------------------------------------------------------------
// Pipeline (Valid Control)
// 11段パイプライン、SRAMアドレスを生成するためカウンタ値等を送る
always @(posedge clk)
        if (reset)
                {vld_0, fin_0, index_0, phase_0, radix_0}
                                <= #1 {MRR+7{1'b0}};
        else if (!stall_s)
                {vld_0, fin_0, index_0, phase_0, radix_0}
                                <= #1 {iVld & iEn, iFin, qntCnt, rdxCnt, iRadix};

always @(posedge clk)
        if (reset)
                {vld_1, fin_1, index_1, phase_1, radix_1}
                                <= #1 {MRR+7{1'b0}};
        else if (!stall_0)
                {vld_1, fin_1, index_1, phase_1, radix_1}
                                <= #1 {vld_0, fin_0, index_0, phase_0, radix_0};

always @(posedge clk)
        if (reset)
                {vld_2, fin_2, index_2, phase_2, radix_2}
                                <= #1 {MRR+7{1'b0}};
        else if (!stall_1)
                {vld_2, fin_2, index_2, phase_2, radix_2}
                                <= #1 {vld_1, fin_1, index_1, phase_1, radix_1};

always @(posedge clk)
        if (reset)
                {vld_3, fin_3, index_3, phase_3, radix_3}
                                <= #1 {MRR+7{1'b0}};
        else if (!stall_2)
                {vld_3, fin_3, index_3, phase_3, radix_3}
                                <= #1 {vld_2, fin_2, index_2, phase_2, radix_2};

always @(posedge clk)
        if (reset)
                {vld_4, fin_4, index_4, phase_4, radix_4}
                                <= #1 {MRR+7{1'b0}};
        else if (!stall_3)
                {vld_4, fin_4, index_4, phase_4, radix_4}
                                <= #1 {vld_3, fin_3, index_3, phase_3, radix_3};

always @(posedge clk)
        if (reset)
                {vld_5, fin_5, index_5, phase_5, radix_5}
                                <= #1 {MRR+7{1'b0}};
        else if (!stall_4)
                {vld_5, fin_5, index_5, phase_5, radix_5}
                                <= #1 {vld_4, fin_4, index_4, phase_4, radix_4};

always @(posedge clk)
        if (reset)
                {vld_6, fin_6, index_6, phase_6, radix_6}
                                <= #1 {MRR+7{1'b0}};
        else if (!stall_5)
                {vld_6, fin_6, index_6, phase_6, radix_6}
                                <= #1 {vld_5, fin_5, index_5, phase_5, radix_5};

always @(posedge clk)
        if (reset)
                {vld_7, fin_7, index_7, phase_7, radix_7}
                                <= #1 {MRR+7{1'b0}};
        else if (!stall_6)
                {vld_7, fin_7, index_7, phase_7, radix_7}
                                <= #1 {vld_6, fin_6, index_6, phase_6, radix_6};

always @(posedge clk)
        if (reset)
                {vld_8, fin_8, index_8, phase_8, radix_8}
                                <= #1 {MRR+7{1'b0}};
        else if (!stall_7)
                {vld_8, fin_8, index_8, phase_8, radix_8}
                                <= #1 {vld_7, fin_7, index_7, phase_7, radix_7};

always @(posedge clk)
        if (reset)
                {vld_9, fin_9, index_9, phase_9, radix_9}
                                <= #1 {MRR+7{1'b0}};
        else if (!stall_8)
                {vld_9, fin_9, index_9, phase_9, radix_9}
                                <= #1 {vld_8, fin_8, index_8, phase_8, radix_8};

always @(posedge clk)
        if (reset)
                {vld_10, fin_10, index_10, phase_10, radix_10}
                                <= #1 {MRR+7{1'b0}};
        else if (!stall_9)
                {vld_10, fin_10, index_10, phase_10, radix_10}
                                <= #1 {vld_9, fin_9, index_9, phase_9, radix_9};

// -----------------------------------------------------------------------------
// Pipeline (Stall Control)
// バッファ型のパイプライン記述
// 全体の処理数を考えるとパイプライン長はたいしたことがないので、基本型を用いてもよい(以下vldを削除)
assign stall_s          = vld_0 & stall_0;
assign stall_0          = vld_1 & stall_1;
assign stall_1          = vld_2 & stall_2;
assign stall_2          = vld_3 & stall_3;
assign stall_3          = vld_4 & stall_4;
assign stall_4          = vld_5 & stall_5;
assign stall_5          = vld_6 & stall_6;
assign stall_6          = vld_7 & stall_7;
assign stall_7          = vld_8 & stall_8;
assign stall_8          = vld_9 & stall_9;
assign stall_9          = vld_10 & stall_10;
assign stall_10         = oStall & fin_10;

// -----------------------------------------------------------------------------
// SRAM Read Address & Data Latch
// データアドレスに対する操作を4ポートに対して実施、SRMAアドレスとしてSRAM Readへ
always @(posedge clk)
        if (!stall_s) begin
                addr0_0         <= #1 addrInFunc({qntCnt, 2'h0}, rdxCnt, iRadix);
                addr1_0         <= #1 addrInFunc({qntCnt, 2'h1}, rdxCnt, iRadix);
                addr2_0         <= #1 addrInFunc({qntCnt, 2'h2}, rdxCnt, iRadix);
                addr3_0         <= #1 addrInFunc({qntCnt, 2'h3}, rdxCnt, iRadix);
        end

// SRAMから出てくるReadデータは1サイクル遅れるので、データの分配情報もラッチして遅延させる
always @(posedge clk)
        if (!stall_0) begin
                addr0_1         <= #1 addr0_0[1:0];
                addr1_1         <= #1 addr1_0[1:0];
                addr2_1         <= #1 addr2_0[1:0];
                addr3_1         <= #1 addr3_0[1:0];
        end

// 4つのSRAMアドレスLSB2ビットは必ず排他的になり、これによりSRAM Bankに配分する
// ここでBank1とBank2はひねられていることに注意(Radix-4の理論)
assign {dIm0, dRe0}     = ram0RD & {32{addr0_1 == 2'h0}}
                        | ram1RD & {32{addr0_1 == 2'h1}}
                        | ram2RD & {32{addr0_1 == 2'h2}}
                        | ram3RD & {32{addr0_1 == 2'h3}};

assign {dIm1, dRe1}     = ram0RD & {32{addr2_1 == 2'h0}}        // addr2_1!
                        | ram1RD & {32{addr2_1 == 2'h1}}
                        | ram2RD & {32{addr2_1 == 2'h2}}
                        | ram3RD & {32{addr2_1 == 2'h3}};

assign {dIm2, dRe2}     = ram0RD & {32{addr1_1 == 2'h0}}        // addr_11!
                        | ram1RD & {32{addr1_1 == 2'h1}}
                        | ram2RD & {32{addr1_1 == 2'h2}}
                        | ram3RD & {32{addr1_1 == 2'h3}};

assign {dIm3, dRe3}     = ram0RD & {32{addr3_1 == 2'h0}}
                        | ram1RD & {32{addr3_1 == 2'h1}}
                        | ram2RD & {32{addr3_1 == 2'h2}}
                        | ram3RD & {32{addr3_1 == 2'h3}};

// SRAMのReadデータを一旦ラッチ
always @(posedge clk)
        if (vld_1 & !stall_1) begin
                dRe0_2          <= #1 dRe0;
                dRe1_2          <= #1 dRe1;
                dRe2_2          <= #1 dRe2;
                dRe3_2          <= #1 dRe3;
                dIm0_2          <= #1 dIm0;
                dIm1_2          <= #1 dIm1;
                dIm2_2          <= #1 dIm2;
                dIm3_2          <= #1 dIm3;
                wRe0_2          <= #1 wRe0;
                wRe1_2          <= #1 wRe1;
                wRe2_2          <= #1 wRe2;
                wRe3_2          <= #1 wRe3;
                wIm0_2          <= #1 wIm0;
                wIm1_2          <= #1 wIm1;
                wIm2_2          <= #1 wIm2;
                wIm3_2          <= #1 wIm3;
        end

// ここから演算器が並ぶ、実際の記述はポート接続を使用すること!(掲載の見やすさの都合上変更している)

// -----------------------------------------------------------------------------
// Stage 3,4 - Data x Coef
// 複素数乗算のための乗算器16個、最初の4つは係数が固定(1.0, 0.0)なので省略可能
fmul mul_rr0 (vld_2,, dRe0_2, wRe0_2,, stall_4, rr0_4, reset, clk);
fmul mul_ri0 (vld_2,, dRe0_2, wIm0_2,, stall_4, ri0_4, reset, clk);
fmul mul_ir0 (vld_2,, dIm0_2, wRe0_2,, stall_4, ir0_4, reset, clk);
fmul mul_ii0 (vld_2,, dIm0_2, wIm0_2,, stall_4, ii0_4, reset, clk);

fmul mul_rr1 (vld_2,, dRe1_2, wRe1_2,, stall_4, rr1_4, reset, clk);
fmul mul_ri1 (vld_2,, dRe1_2, wIm1_2,, stall_4, ri1_4, reset, clk);
fmul mul_ir1 (vld_2,, dIm1_2, wRe1_2,, stall_4, ir1_4, reset, clk);
fmul mul_ii1 (vld_2,, dIm1_2, wIm1_2,, stall_4, ii1_4, reset, clk);

fmul mul_rr2 (vld_2,, dRe2_2, wRe2_2,, stall_4, rr2_4, reset, clk);
fmul mul_ri2 (vld_2,, dRe2_2, wIm2_2,, stall_4, ri2_4, reset, clk);
fmul mul_ir2 (vld_2,, dIm2_2, wRe2_2,, stall_4, ir2_4, reset, clk);
fmul mul_ii2 (vld_2,, dIm2_2, wIm2_2,, stall_4, ii2_4, reset, clk);

fmul mul_rr3 (vld_2,, dRe3_2, wRe3_2,, stall_4, rr3_4, reset, clk);
fmul mul_ri3 (vld_2,, dRe3_2, wIm3_2,, stall_4, ri3_4, reset, clk);
fmul mul_ir3 (vld_2,, dIm3_2, wRe3_2,, stall_4, ir3_4, reset, clk);
fmul mul_ii3 (vld_2,, dIm3_2, wIm3_2,, stall_4, ii3_4, reset, clk);

// -----------------------------------------------------------------------------
// Stage 5,6 - Re-Re/Im+Im
// 複素数乗算のための加算器8個、最初の2つは係数が固定(1.0, 0.0)なので省略可能
// NEGの排他的論理和はオペランドの減算を意味する
fadd add_r0 (vld_4,, rr0_4, ii0_4 ^ NEG,, stall_6, re0_6, reset, clk);
fadd add_i0 (vld_4,, ri0_4, ir0_4,,       stall_6, im0_6, reset, clk);

fadd add_r1 (vld_4,, rr1_4, ii1_4 ^ NEG,, stall_6, re1_6, reset, clk);
fadd add_i1 (vld_4,, ri1_4, ir1_4,,       stall_6, im1_6, reset, clk);

fadd add_r2 (vld_4,, rr2_4, ii2_4 ^ NEG,, stall_6, re2_6, reset, clk);
fadd add_i2 (vld_4,, ri2_4, ir2_4,,       stall_6, im2_6, reset, clk);

fadd add_r3 (vld_4,, rr3_4, ii3_4 ^ NEG,, stall_6, re3_6, reset, clk);
fadd add_i3 (vld_4,, ri3_4, ir3_4,,       stall_6, im3_6, reset, clk);

// -----------------------------------------------------------------------------
// Stage 7,8 - Det0
// 行列演算の前処理のための加算器8個
fadd add_pr02 (vld_6,, re0_6, re2_6,,       stall_8, pRe02_8, reset, clk);
fadd add_pi02 (vld_6,, im0_6, im2_6,,       stall_8, pIm02_8, reset, clk);

fadd add_mr02 (vld_6,, re0_6, re2_6 ^ NEG,, stall_8, mRe02_8, reset, clk);
fadd add_mi02 (vld_6,, im0_6, im2_6 ^ NEG,, stall_8, mIm02_8, reset, clk);

fadd add_pr13 (vld_6,, re1_6, re3_6,,       stall_8, pRe13_8, reset, clk);
fadd add_pi13 (vld_6,, im1_6, im3_6,,       stall_8, pIm13_8, reset, clk);

fadd add_mr13 (vld_6,, re1_6, re3_6 ^ NEG,, stall_8, mRe13_8, reset, clk);
fadd add_mi13 (vld_6,, im1_6, im3_6 ^ NEG,, stall_8, mIm13_8, reset, clk);

// -----------------------------------------------------------------------------
// Stage 9,10 - Det1
//     X0 = P02 + P13
//         Re(X0) = Re(P02) + Re(P13)
//         Im(X0) = Im(P02) + Im(P13)
//     X1 = M02 - jM13
//         Re(X1) = Re(M02) + Im(M13)
//         Im(X1) = Im(M02) - Re(M13)
//     X2 = P02 - P13
//         Re(X2) = Re(P02) - Re(P13)
//         Im(X2) = Im(P02) - Im(P13)
//     X3 = M02 + jM13
//         Re(X3) = Re(M02) - Im(M13)
//         Im(X3) = Im(M02) + Re(M13)
// 行列演算の後処理のための加算器8個
fadd add_xr0 (vld_8,, pRe02_8, pRe13_8,,       stall_10, xRe0_10, reset, clk);
fadd add_xi0 (vld_8,, pIm02_8, pIm13_8,,       stall_10, xIm0_10, reset, clk);

fadd add_xr1 (vld_8,, mRe02_8, mIm13_8,,       stall_10, xRe1_10, reset, clk);
fadd add_xi1 (vld_8,, mIm02_8, mRe13_8 ^ NEG,, stall_10, xIm1_10, reset, clk);

fadd add_xr2 (vld_8,, pRe02_8, pRe13_8 ^ NEG,, stall_10, xRe2_10, reset, clk);
fadd add_xi2 (vld_8,, pIm02_8, pIm13_8 ^ NEG,, stall_10, xIm2_10, reset, clk);

fadd add_xr3 (vld_8,, mRe02_8, mIm13_8 ^ NEG,, stall_10, xRe3_10, reset, clk);
fadd add_xi3 (vld_8,, mIm02_8, mRe13_8,,       stall_10, xIm3_10, reset, clk);

// -----------------------------------------------------------------------------
// SRAM Write Address Latch
// データアドレスに対する操作を4ポートに対して前もって実施、SRMAアドレスとしてSRAM Writeへ
always @(posedge clk)
        if (!stall_9) begin
                addr0_10        <= #1 addrOutFunc({index_9, 2'h0}, phase_9, radix_9);
                addr1_10        <= #1 addrOutFunc({index_9, 2'h1}, phase_9, radix_9);
                addr2_10        <= #1 addrOutFunc({index_9, 2'h2}, phase_9, radix_9);
                addr3_10        <= #1 addrOutFunc({index_9, 2'h3}, phase_9, radix_9);
        end

// -----------------------------------------------------------------------------
// Output
assign oVld             = vld_10 & fin_10;

// -----------------------------------------------------------------------------
// SRAM Flip
// FFTの実行の度にFlipすることで、使用するSRAMセットをReadとWriteに分けて選択する
// また、SRAMを使用する状態(vld=1)を組み合わせて出力(トップモジュールはこの信号でデータをブレンド)
always @(posedge clk)
        if (reset)
                rFlip           <= #1 1'b0;
        else if (vld_0 & fin_0 & !stall_0)
                rFlip           <= #1 ~rFlip;

always @(posedge clk)
        if (reset)
                wFlip           <= #1 1'b0;
        else if (vld_10 & fin_10 & !stall_10)
                wFlip           <= #1 ~wFlip;

assign ramRF            = {1'b0, vld_0 & !stall_0} << rFlip;
assign ramWF            = {1'b0, vld_10 & !stall_10} << wFlip;

// -----------------------------------------------------------------------------
// Coefficient
// 係数テーブルへのアクセスはSRAMアクセスに1サイクル遅れて実施(SRAMのレイテンシが異なるため)
assign wIndex           = index_1;
assign wPhase           = phase_1;

// -----------------------------------------------------------------------------
// SRAM Read
// SRAM Read用の信号を生成
// ramRPは領域を示すがこの記述だとiRadix=64,1024にしか対応できずNG(ここの脚注[4]を参照)
assign ramRP            = phase_0[0];

// SRAMにStallを効かす、また未使用時はRE(Read Enable)をActiveにしないことで低消費電力化を考慮する
assign ram0RE           = vld_0 & !stall_0;
assign ram1RE           = vld_0 & !stall_0;
assign ram2RE           = vld_0 & !stall_0;
assign ram3RE           = vld_0 & !stall_0;

assign ram0RA           = {
                                addr0_0[MRR-1:2] & {MRR-2{addr0_0[1:0] == 2'h0}} |
                                addr1_0[MRR-1:2] & {MRR-2{addr1_0[1:0] == 2'h0}} |
                                addr2_0[MRR-1:2] & {MRR-2{addr2_0[1:0] == 2'h0}} |
                                addr3_0[MRR-1:2] & {MRR-2{addr3_0[1:0] == 2'h0}}
                                };

assign ram1RA           = {
                                addr0_0[MRR-1:2] & {MRR-2{addr0_0[1:0] == 2'h1}} |
                                addr1_0[MRR-1:2] & {MRR-2{addr1_0[1:0] == 2'h1}} |
                                addr2_0[MRR-1:2] & {MRR-2{addr2_0[1:0] == 2'h1}} |
                                addr3_0[MRR-1:2] & {MRR-2{addr3_0[1:0] == 2'h1}}
                                };

assign ram2RA           = {
                                addr0_0[MRR-1:2] & {MRR-2{addr0_0[1:0] == 2'h2}} |
                                addr1_0[MRR-1:2] & {MRR-2{addr1_0[1:0] == 2'h2}} |
                                addr2_0[MRR-1:2] & {MRR-2{addr2_0[1:0] == 2'h2}} |
                                addr3_0[MRR-1:2] & {MRR-2{addr3_0[1:0] == 2'h2}}
                                };

assign ram3RA           = {
                                addr0_0[MRR-1:2] & {MRR-2{addr0_0[1:0] == 2'h3}} |
                                addr1_0[MRR-1:2] & {MRR-2{addr1_0[1:0] == 2'h3}} |
                                addr2_0[MRR-1:2] & {MRR-2{addr2_0[1:0] == 2'h3}} |
                                addr3_0[MRR-1:2] & {MRR-2{addr3_0[1:0] == 2'h3}}
                                };

// -----------------------------------------------------------------------------
// SRAM Write
// SRAM Write用の信号を生成
// ramWPは領域を示すがこの記述だとiRadix=64,1024にしか対応できずNG(ここの脚注[4]を参照)
assign ramWP            = ~phase_10[0];

// SRAMにStallを効かす、また未使用時はWE(Write Enable)をActiveにしないことで低消費電力化を考慮する
assign ram0WE           = vld_10 & !stall_10;
assign ram1WE           = vld_10 & !stall_10;
assign ram2WE           = vld_10 & !stall_10;
assign ram3WE           = vld_10 & !stall_10;

assign ram0WA           = {
                                addr0_10[MRR-1:2] & {MRR-2{addr0_10[1:0] == 2'h0}} |
                                addr1_10[MRR-1:2] & {MRR-2{addr1_10[1:0] == 2'h0}} |
                                addr2_10[MRR-1:2] & {MRR-2{addr2_10[1:0] == 2'h0}} |
                                addr3_10[MRR-1:2] & {MRR-2{addr3_10[1:0] == 2'h0}}
                                };

assign ram1WA           = {
                                addr0_10[MRR-1:2] & {MRR-2{addr0_10[1:0] == 2'h1}} |
                                addr1_10[MRR-1:2] & {MRR-2{addr1_10[1:0] == 2'h1}} |
                                addr2_10[MRR-1:2] & {MRR-2{addr2_10[1:0] == 2'h1}} |
                                addr3_10[MRR-1:2] & {MRR-2{addr3_10[1:0] == 2'h1}}
                                };

assign ram2WA           = {
                                addr0_10[MRR-1:2] & {MRR-2{addr0_10[1:0] == 2'h2}} |
                                addr1_10[MRR-1:2] & {MRR-2{addr1_10[1:0] == 2'h2}} |
                                addr2_10[MRR-1:2] & {MRR-2{addr2_10[1:0] == 2'h2}} |
                                addr3_10[MRR-1:2] & {MRR-2{addr3_10[1:0] == 2'h2}}
                                };

assign ram3WA           = {
                                addr0_10[MRR-1:2] & {MRR-2{addr0_10[1:0] == 2'h3}} |
                                addr1_10[MRR-1:2] & {MRR-2{addr1_10[1:0] == 2'h3}} |
                                addr2_10[MRR-1:2] & {MRR-2{addr2_10[1:0] == 2'h3}} |
                                addr3_10[MRR-1:2] & {MRR-2{addr3_10[1:0] == 2'h3}}
                                };

assign ram0WD           = {
                                {xIm0_10, xRe0_10} & {32{addr0_10[1:0] == 2'h0}} |
                                {xIm1_10, xRe1_10} & {32{addr1_10[1:0] == 2'h0}} |
                                {xIm2_10, xRe2_10} & {32{addr2_10[1:0] == 2'h0}} |
                                {xIm3_10, xRe3_10} & {32{addr3_10[1:0] == 2'h0}}
                                };

assign ram1WD           = {
                                {xIm0_10, xRe0_10} & {32{addr0_10[1:0] == 2'h1}} |
                                {xIm1_10, xRe1_10} & {32{addr1_10[1:0] == 2'h1}} |
                                {xIm2_10, xRe2_10} & {32{addr2_10[1:0] == 2'h1}} |
                                {xIm3_10, xRe3_10} & {32{addr3_10[1:0] == 2'h1}}
                                };

assign ram2WD           = {
                                {xIm0_10, xRe0_10} & {32{addr0_10[1:0] == 2'h2}} |
                                {xIm1_10, xRe1_10} & {32{addr1_10[1:0] == 2'h2}} |
                                {xIm2_10, xRe2_10} & {32{addr2_10[1:0] == 2'h2}} |
                                {xIm3_10, xRe3_10} & {32{addr3_10[1:0] == 2'h2}}
                                };

assign ram3WD           = {
                                {xIm0_10, xRe0_10} & {32{addr0_10[1:0] == 2'h3}} |
                                {xIm1_10, xRe1_10} & {32{addr1_10[1:0] == 2'h3}} |
                                {xIm2_10, xRe2_10} & {32{addr2_10[1:0] == 2'h3}} |
                                {xIm3_10, xRe3_10} & {32{addr3_10[1:0] == 2'h3}}
                                };

// **************************** FUNCTIONS and TASKS ****************************

// 入力SRAMアドレスの生成:
// 先ずPhaseに従ってカウンタの回転を行いRadix-4の入力に相応しいアドレスを生成
// 次に前Phaseで行ったアドレスの攪乱を元に戻すための攪乱を行う
function [MRR-1:0] addrInFunc;
        input   [MRR-1:0]       idx;
        input   [2:0]           phase;
        input   [3:0]           radix;
        reg     [MRR-1:0]       result;
        reg     [MRR-1:0]       twid;

        begin
                // Rotate
                casex (phase)
                        3'h1:           result  = {idx[9:4], idx[1:0], idx[3:2]};
                        3'h2:           result  = {idx[9:6], idx[1:0], idx[5:2]};
                        3'h3:           result  = {idx[9:8], idx[1:0], idx[7:2]};
                        3'h4:           result  = {          idx[1:0], idx[9:2]};
                        default:        result  = idx;
                endcase

                // Twiddle & Twidle Factor
                casex (phase)
                        3'h1:           twid    = {{MRR-2{1'b0}}, idx[1:0]           };
                        3'h2:           twid    = {{MRR-2{1'b0}}, idx[1:0] ^ idx[5:4]};
                        3'h3:           twid    = {{MRR-2{1'b0}}, idx[1:0] ^ idx[7:6]};
                        3'h4:           twid    = {{MRR-2{1'b0}}, idx[1:0] ^ idx[9:8]};
                        default:
                        // LoadDataでアドレスの攪乱(2度することで元に戻る)
                        case (radix)
                                4'h4:   twid    = {{MRR-2{1'b0}}, idx[3:2]};
                                4'h6:   twid    = {{MRR-2{1'b0}}, idx[5:4]};
                                4'h8:   twid    = {{MRR-2{1'b0}}, idx[7:6]};
                                default:
                                        twid    = {{MRR-2{1'b0}}, idx[MRR-1:MRR-2]};
                        endcase
                endcase

                // Result
                addrInFunc      = result ^ twid;
        end
endfunction

// 出力SRAMアドレスの生成:
// 先ずPhaseに従ってカウンタの回転を行いRadix-4の出力に相応しいアドレスを生成
// 次に最終Phase(border)でない限り前Phaseで行ったアドレスの攪乱を元に戻すための攪乱を行う
function [MRR-1:0] addrOutFunc;
        input   [MRR-1:0]       idx;
        input   [2:0]           phase;
        input   [3:0]           radix;
        reg                     border;
        reg     [MRR-1:0]       result;
        reg     [MRR-1:0]       twid;

        begin
                case (radix)
                        4'h4:           border  = (phase == 3'h1);
                        4'h6:           border  = (phase == 3'h2);
                        4'h8:           border  = (phase == 3'h3);
                        default:        border  = (phase == 3'h4);
                endcase

                // Rotate
                case (phase)
                        3'h1:           result  = {idx[9:4], idx[1:0], idx[3:2]};
                        3'h2:           result  = {idx[9:6], idx[1:0], idx[5:2]};
                        3'h3:           result  = {idx[9:8], idx[1:0], idx[7:2]};
                        3'h4:           result  = {          idx[1:0], idx[9:2]};
                        default:        result  = idx;
                endcase

                // Twiddle & Twidle Factor
                casex ({border, phase})
                        {1'b0, 3'h0}:   twid    = {{MRR-2{1'b0}},               result[3:2]};
                        {1'b0, 3'h1}:   twid    = {{MRR-2{1'b0}}, result[3:2] ^ result[5:4]};
                        {1'b0, 3'h2}:   twid    = {{MRR-2{1'b0}}, result[5:4] ^ result[7:6]};
                        {1'b0, 3'h3}:   twid    = {{MRR-2{1'b0}}, result[7:6] ^ result[9:8]};
                        default:
                        // storeDataでアドレスの攪乱(2度することで元に戻る)
                        case (radix)
                                4'h4:   twid    = {{MRR-2{1'b0}}, result[3:2]};
                                4'h6:   twid    = {{MRR-2{1'b0}}, result[5:4]};
                                4'h8:   twid    = {{MRR-2{1'b0}}, result[7:6]};
                                default:
                                        twid    = {{MRR-2{1'b0}}, result[MRR-1:MRR-2]};
                        endcase
                endcase

                // Result
                addrInFunc      = result ^ twid;
        end
endfunction

endmodule       // bfCalc

// *****************************************************************************
        

コード(Coef RTL)

/* **************************** MODULE PREAMBLE ********************************

        Copyright (c) 2012, ArchiTek
        This document constitutes confidential and proprietary information
        of ArchiTek. All rights reserved.
*/

// ***************************** MODULE HEADER *********************************

module fftCoef (
        wIndex,
        wPhase,
        wRe0, wIm0, wRe1, wIm1, wRe2, wIm2, wRe3, wIm3
        );

// ************************* PARAMETER DECLARATIONS ****************************

        parameter               MRR     = 10;           // Max Radix Radix

        parameter               ZERO    = 16'h0000;
        parameter               ONEP    = 16'h3c00;
        parameter               ONEM    = 16'hbc00;

// *************************** I/O DECLARATIONS ********************************

        // ROM(組み合わせ回路)なのでクロックはない
        input   [MRR-3:0]       wIndex;
        input   [2:0]           wPhase;
        output  [15:0]          wRe0, wIm0, wRe1, wIm1, wRe2, wIm2, wRe3, wIm3;

// **************************** LOCAL DECLARATIONS *****************************

        // Rotation Primitive
        reg     [MRR-3:0]       index;
        reg     [3:0]           div;
        wire    [MRR-1:0]       rot;

        // Index
        wire    [MRR-1:0]       index0, index1, index2, index3;
        wire    [MRR-3:0]       fIndex0, fIndex1, fIndex2, fIndex3;
        wire    [MRR-3:0]       iIndex0, iIndex1, iIndex2, iIndex3;

        // Indicator
        wire                    z0, z1, z2, z3;                 // Zero
        wire    [15:0]          rs0, rs1, rs2, rs3;             // Re Sign
        wire    [15:0]          is0, is1, is2, is3;             // Im Sign

        // Temporary
        reg     [15:0]          wRe0, wIm0, wRe1, wIm1, wRe2, wIm2, wRe3, wIm3;

// ******************************** MODULE BODY ********************************

// -----------------------------------------------------------------------------
// Prepare
// Phaseによってテーブルをアクセスする角度粒度が異なるので調整する
always @(
        wIndex or
        wPhase
        )
        case (wPhase)
                3'h0: begin
                        index           = 8'd0;
                        div             = 4'h8;
                end
                3'h1: begin
                        index           = {6'd0, wIndex[1:0]};
                        div             = 4'h6;
                end
                3'h2: begin
                        index           = {4'd0, wIndex[3:0]};
                        div             = 4'h4;
                end
                3'h3: begin
                        index           = {2'd0, wIndex[5:0]};
                        div             = 4'h2;
                end
                default: begin
                        index           = wIndex;
                        div             = 4'h0;
                end
        endcase

assign rot              = {2'h0, index} << div;

// Radix-4の第一係数は常に角度0、その他は上記で求めた角度の倍数
assign index0           = {MRR{1'b0}};
assign index1           = rot;
assign index2           = rot << 1;
assign index3           = rot + (rot << 1);

// 正の方向と負の方向の角度を求めておく(π/4ずらす)、それぞれでsin(), cos()を求める
assign fIndex0          = index0[MRR-3:0];
assign fIndex1          = index1[MRR-3:0];
assign fIndex2          = index2[MRR-3:0];
assign fIndex3          = index3[MRR-3:0];

assign iIndex0          = ~fIndex0 + 1'b1;
assign iIndex1          = ~fIndex1 + 1'b1;
assign iIndex2          = ~fIndex2 + 1'b1;
assign iIndex3          = ~fIndex3 + 1'b1;

// 角度0を検知
assign z0               = ~|index0[MRR-3:0];
assign z1               = ~|index1[MRR-3:0];
assign z2               = ~|index2[MRR-3:0];
assign z3               = ~|index3[MRR-3:0];

// π/4を超えるものは符号の反転とsin(), cos()の入れ替えになるので、そのフラグを生成
assign rs0              = {^index0[MRR-1:MRR-2], 15'd0};
assign rs1              = {^index1[MRR-1:MRR-2], 15'd0};
assign rs2              = {^index2[MRR-1:MRR-2], 15'd0};
assign rs3              = {^index3[MRR-1:MRR-2], 15'd0};

assign is0              = {~index0[MRR-1], 15'd0};
assign is1              = {~index1[MRR-1], 15'd0};
assign is2              = {~index2[MRR-1], 15'd0};
assign is3              = {~index3[MRR-1], 15'd0};

// -----------------------------------------------------------------------------
// Rotation #0
// Radix-4の第一ポートの計算(常に1.0と0.0だが、汎用性を持たせるため記述)
always @(
        index0 or
        fIndex0 or
        iIndex0 or
        rs0 or
        z0
        )
        casex ({z0, index0[MRR-1:MRR-2]})
                3'b100: wRe0    = ONEP;                                 //  1.0
                3'b110: wRe0    = ONEM;                                 // -1.0
                3'b1x1: wRe0    = ZERO;                                 //  0.0
                3'b0x0: wRe0    = coefFunc(iIndex0) ^ rs0;              // cos()
                3'b0x1: wRe0    = coefFunc(fIndex0) ^ rs0;              // sin()
        endcase

always @(
        index0 or
        fIndex0 or
        iIndex0 or
        is0 or
        z0
        )
        casex ({z0, index0[MRR-1:MRR-2]})
                3'b1x0: wIm0    = ZERO;                                 //  0.0
                3'b101: wIm0    = ONEM;                                 // -1.0
                3'b111: wIm0    = ONEP;                                 //  1.0
                3'b0x0: wIm0    = coefFunc(fIndex0) ^ is0;              // sin()
                3'b0x1: wIm0    = coefFunc(iIndex0) ^ is0;              // cos()
        endcase

// -----------------------------------------------------------------------------
// Rotation #1
// Radix-4の第二ポートの計算
always @(
        index1 or
        fIndex1 or
        iIndex1 or
        rs1 or
        z1
        )
        casex ({z1, index1[MRR-1:MRR-2]})
                3'b100: wRe1    = ONEP;                                 //  1.0
                3'b110: wRe1    = ONEM;                                 // -1.0
                3'b1x1: wRe1    = ZERO;                                 //  0.0
                3'b0x0: wRe1    = coefFunc(iIndex1) ^ rs1;              // cos()
                3'b0x1: wRe1    = coefFunc(fIndex1) ^ rs1;              // sin()
        endcase

always @(
        index1 or
        fIndex1 or
        iIndex1 or
        is1 or
        z1
        )
        casex ({z1, index1[MRR-1:MRR-2]})
                3'b1x0: wIm1    = ZERO;                                 //  0.0
                3'b101: wIm1    = ONEM;                                 // -1.0
                3'b111: wIm1    = ONEP;                                 //  1.0
                3'b0x0: wIm1    = coefFunc(fIndex1) ^ is1;              // sin()
                3'b0x1: wIm1    = coefFunc(iIndex1) ^ is1;              // cos()
        endcase

// -----------------------------------------------------------------------------
// Rotation #2
// Radix-4の第三ポートの計算
always @(
        index2 or
        fIndex2 or
        iIndex2 or
        rs2 or
        z2
        )
        casex ({z2, index2[MRR-1:MRR-2]})
                3'b100: wRe2    = ONEP;                                 //  1.0
                3'b110: wRe2    = ONEM;                                 // -1.0
                3'b1x1: wRe2    = ZERO;                                 //  0.0
                3'b0x0: wRe2    = coefFunc(iIndex2) ^ rs2;              // cos()
                3'b0x1: wRe2    = coefFunc(fIndex2) ^ rs2;              // sin()
        endcase

always @(
        index2 or
        fIndex2 or
        iIndex2 or
        is2 or
        z2
        )
        casex ({z2, index2[MRR-1:MRR-2]})
                3'b1x0: wIm2    = ZERO;                                 //  0.0
                3'b101: wIm2    = ONEM;                                 // -1.0
                3'b111: wIm2    = ONEP;                                 //  1.0
                3'b0x0: wIm2    = coefFunc(fIndex2) ^ is2;              // sin()
                3'b0x1: wIm2    = coefFunc(iIndex2) ^ is2;              // cos()
        endcase

// -----------------------------------------------------------------------------
// Rotation #3
// Radix-4の第四ポートの計算
always @(
        index3 or
        fIndex3 or
        iIndex3 or
        rs3 or
        z3
        )
        casex ({z3, index3[MRR-1:MRR-2]})
                3'b100: wRe3    = ONEP;                                 //  1.0
                3'b110: wRe3    = ONEM;                                 // -1.0
                3'b1x1: wRe3    = ZERO;                                 //  0.0
                3'b0x0: wRe3    = coefFunc(iIndex3) ^ rs3;              // cos()
                3'b0x1: wRe3    = coefFunc(fIndex3) ^ rs3;              // sin()
        endcase

always @(
        index3 or
        fIndex3 or
        iIndex3 or
        is3 or
        z3
        )
        casex ({z3, index3[MRR-1:MRR-2]})
                3'b1x0: wIm3    = ZERO;                                 //  0.0
                3'b101: wIm3    = ONEM;                                 // -1.0
                3'b111: wIm3    = ONEP;                                 //  1.0
                3'b0x0: wIm3    = coefFunc(fIndex3) ^ is3;              // sin()
                3'b0x1: wIm3    = coefFunc(iIndex3) ^ is3;              // cos()
        endcase

// **************************** FUNCTIONS and TASKS ****************************

// ここがテーブルの本体、IEEE754 Binary16フォーマット
// 最大ポイント数が増える場合はここを増やす
function [15:0] coefFunc;
        input   [MRR-3:0]       index;
        reg     [15:0]          tmp;

        begin
                case (index)
                        8'h00:          tmp     = ZERO;
                        8'h01:          tmp     = 16'h1e48;
                        8'h02:          tmp     = 16'h2248;
                        8'h03:          tmp     = 16'h24b6;
                        8'h04:          tmp     = 16'h2648;
                        8'h05:          tmp     = 16'h27da;
                        8'h06:          tmp     = 16'h28b6;
                        8'h07:          tmp     = 16'h297f;
                        8'h08:          tmp     = 16'h2a48;
                        8'h09:          tmp     = 16'h2b11;
                        8'h0a:          tmp     = 16'h2bd9;
                        8'h0b:          tmp     = 16'h2c51;
                        8'h0c:          tmp     = 16'h2cb5;
                        8'h0d:          tmp     = 16'h2d1a;
                        8'h0e:          tmp     = 16'h2d7e;
                        8'h0f:          tmp     = 16'h2de2;
                        8'h10:          tmp     = 16'h2e46;
                        8'h11:          tmp     = 16'h2eaa;
                        8'h12:          tmp     = 16'h2f0e;
                        8'h13:          tmp     = 16'h2f72;
                        8'h14:          tmp     = 16'h2fd6;
                        8'h15:          tmp     = 16'h301d;
                        8'h16:          tmp     = 16'h304e;
                        8'h17:          tmp     = 16'h3080;
                        8'h18:          tmp     = 16'h30b2;
                        8'h19:          tmp     = 16'h30e4;
                        8'h1a:          tmp     = 16'h3115;
                        8'h1b:          tmp     = 16'h3147;
                        8'h1c:          tmp     = 16'h3179;
                        8'h1d:          tmp     = 16'h31aa;
                        8'h1e:          tmp     = 16'h31db;
                        8'h1f:          tmp     = 16'h320d;
                        8'h20:          tmp     = 16'h323e;
                        8'h21:          tmp     = 16'h326f;
                        8'h22:          tmp     = 16'h32a1;
                        8'h23:          tmp     = 16'h32d2;
                        8'h24:          tmp     = 16'h3303;
                        8'h25:          tmp     = 16'h3334;
                        8'h26:          tmp     = 16'h3365;
                        8'h27:          tmp     = 16'h3396;
                        8'h28:          tmp     = 16'h33c6;
                        8'h29:          tmp     = 16'h33f7;
                        8'h2a:          tmp     = 16'h3414;
                        8'h2b:          tmp     = 16'h342c;
                        8'h2c:          tmp     = 16'h3444;
                        8'h2d:          tmp     = 16'h345d;
                        8'h2e:          tmp     = 16'h3475;
                        8'h2f:          tmp     = 16'h348d;
                        8'h30:          tmp     = 16'h34a5;
                        8'h31:          tmp     = 16'h34bd;
                        8'h32:          tmp     = 16'h34d5;
                        8'h33:          tmp     = 16'h34ed;
                        8'h34:          tmp     = 16'h3505;
                        8'h35:          tmp     = 16'h351d;
                        8'h36:          tmp     = 16'h3534;
                        8'h37:          tmp     = 16'h354c;
                        8'h38:          tmp     = 16'h3564;
                        8'h39:          tmp     = 16'h357c;
                        8'h3a:          tmp     = 16'h3593;
                        8'h3b:          tmp     = 16'h35ab;
                        8'h3c:          tmp     = 16'h35c2;
                        8'h3d:          tmp     = 16'h35da;
                        8'h3e:          tmp     = 16'h35f1;
                        8'h3f:          tmp     = 16'h3608;
                        8'h40:          tmp     = 16'h361f;
                        8'h41:          tmp     = 16'h3637;
                        8'h42:          tmp     = 16'h364e;
                        8'h43:          tmp     = 16'h3665;
                        8'h44:          tmp     = 16'h367c;
                        8'h45:          tmp     = 16'h3693;
                        8'h46:          tmp     = 16'h36aa;
                        8'h47:          tmp     = 16'h36c1;
                        8'h48:          tmp     = 16'h36d7;
                        8'h49:          tmp     = 16'h36ee;
                        8'h4a:          tmp     = 16'h3705;
                        8'h4b:          tmp     = 16'h371b;
                        8'h4c:          tmp     = 16'h3732;
                        8'h4d:          tmp     = 16'h3748;
                        8'h4e:          tmp     = 16'h375e;
                        8'h4f:          tmp     = 16'h3775;
                        8'h50:          tmp     = 16'h378b;
                        8'h51:          tmp     = 16'h37a1;
                        8'h52:          tmp     = 16'h37b7;
                        8'h53:          tmp     = 16'h37cd;
                        8'h54:          tmp     = 16'h37e3;
                        8'h55:          tmp     = 16'h37f9;
                        8'h56:          tmp     = 16'h3807;
                        8'h57:          tmp     = 16'h3812;
                        8'h58:          tmp     = 16'h381d;
                        8'h59:          tmp     = 16'h3828;
                        8'h5a:          tmp     = 16'h3832;
                        8'h5b:          tmp     = 16'h383d;
                        8'h5c:          tmp     = 16'h3848;
                        8'h5d:          tmp     = 16'h3852;
                        8'h5e:          tmp     = 16'h385d;
                        8'h5f:          tmp     = 16'h3867;
                        8'h60:          tmp     = 16'h3872;
                        8'h61:          tmp     = 16'h387c;
                        8'h62:          tmp     = 16'h3887;
                        8'h63:          tmp     = 16'h3891;
                        8'h64:          tmp     = 16'h389b;
                        8'h65:          tmp     = 16'h38a6;
                        8'h66:          tmp     = 16'h38b0;
                        8'h67:          tmp     = 16'h38ba;
                        8'h68:          tmp     = 16'h38c4;
                        8'h69:          tmp     = 16'h38ce;
                        8'h6a:          tmp     = 16'h38d8;
                        8'h6b:          tmp     = 16'h38e2;
                        8'h6c:          tmp     = 16'h38ec;
                        8'h6d:          tmp     = 16'h38f6;
                        8'h6e:          tmp     = 16'h3900;
                        8'h6f:          tmp     = 16'h3909;
                        8'h70:          tmp     = 16'h3913;
                        8'h71:          tmp     = 16'h391d;
                        8'h72:          tmp     = 16'h3927;
                        8'h73:          tmp     = 16'h3930;
                        8'h74:          tmp     = 16'h393a;
                        8'h75:          tmp     = 16'h3943;
                        8'h76:          tmp     = 16'h394d;
                        8'h77:          tmp     = 16'h3956;
                        8'h78:          tmp     = 16'h395f;
                        8'h79:          tmp     = 16'h3969;
                        8'h7a:          tmp     = 16'h3972;
                        8'h7b:          tmp     = 16'h397b;
                        8'h7c:          tmp     = 16'h3984;
                        8'h7d:          tmp     = 16'h398d;
                        8'h7e:          tmp     = 16'h3996;
                        8'h7f:          tmp     = 16'h399f;
                        8'h80:          tmp     = 16'h39a8;
                        8'h81:          tmp     = 16'h39b1;
                        8'h82:          tmp     = 16'h39ba;
                        8'h83:          tmp     = 16'h39c3;
                        8'h84:          tmp     = 16'h39cb;
                        8'h85:          tmp     = 16'h39d4;
                        8'h86:          tmp     = 16'h39dc;
                        8'h87:          tmp     = 16'h39e5;
                        8'h88:          tmp     = 16'h39ed;
                        8'h89:          tmp     = 16'h39f6;
                        8'h8a:          tmp     = 16'h39fe;
                        8'h8b:          tmp     = 16'h3a07;
                        8'h8c:          tmp     = 16'h3a0f;
                        8'h8d:          tmp     = 16'h3a17;
                        8'h8e:          tmp     = 16'h3a1f;
                        8'h8f:          tmp     = 16'h3a27;
                        8'h90:          tmp     = 16'h3a2f;
                        8'h91:          tmp     = 16'h3a37;
                        8'h92:          tmp     = 16'h3a3f;
                        8'h93:          tmp     = 16'h3a47;
                        8'h94:          tmp     = 16'h3a4f;
                        8'h95:          tmp     = 16'h3a56;
                        8'h96:          tmp     = 16'h3a5e;
                        8'h97:          tmp     = 16'h3a65;
                        8'h98:          tmp     = 16'h3a6d;
                        8'h99:          tmp     = 16'h3a74;
                        8'h9a:          tmp     = 16'h3a7c;
                        8'h9b:          tmp     = 16'h3a83;
                        8'h9c:          tmp     = 16'h3a8a;
                        8'h9d:          tmp     = 16'h3a92;
                        8'h9e:          tmp     = 16'h3a99;
                        8'h9f:          tmp     = 16'h3aa0;
                        8'ha0:          tmp     = 16'h3aa7;
                        8'ha1:          tmp     = 16'h3aae;
                        8'ha2:          tmp     = 16'h3ab5;
                        8'ha3:          tmp     = 16'h3abc;
                        8'ha4:          tmp     = 16'h3ac2;
                        8'ha5:          tmp     = 16'h3ac9;
                        8'ha6:          tmp     = 16'h3ad0;
                        8'ha7:          tmp     = 16'h3ad6;
                        8'ha8:          tmp     = 16'h3add;
                        8'ha9:          tmp     = 16'h3ae3;
                        8'haa:          tmp     = 16'h3ae9;
                        8'hab:          tmp     = 16'h3af0;
                        8'hac:          tmp     = 16'h3af6;
                        8'had:          tmp     = 16'h3afc;
                        8'hae:          tmp     = 16'h3b02;
                        8'haf:          tmp     = 16'h3b08;
                        8'hb0:          tmp     = 16'h3b0e;
                        8'hb1:          tmp     = 16'h3b14;
                        8'hb2:          tmp     = 16'h3b1a;
                        8'hb3:          tmp     = 16'h3b20;
                        8'hb4:          tmp     = 16'h3b25;
                        8'hb5:          tmp     = 16'h3b2b;
                        8'hb6:          tmp     = 16'h3b30;
                        8'hb7:          tmp     = 16'h3b36;
                        8'hb8:          tmp     = 16'h3b3b;
                        8'hb9:          tmp     = 16'h3b41;
                        8'hba:          tmp     = 16'h3b46;
                        8'hbb:          tmp     = 16'h3b4b;
                        8'hbc:          tmp     = 16'h3b50;
                        8'hbd:          tmp     = 16'h3b55;
                        8'hbe:          tmp     = 16'h3b5a;
                        8'hbf:          tmp     = 16'h3b5f;
                        8'hc0:          tmp     = 16'h3b64;
                        8'hc1:          tmp     = 16'h3b69;
                        8'hc2:          tmp     = 16'h3b6e;
                        8'hc3:          tmp     = 16'h3b72;
                        8'hc4:          tmp     = 16'h3b77;
                        8'hc5:          tmp     = 16'h3b7b;
                        8'hc6:          tmp     = 16'h3b80;
                        8'hc7:          tmp     = 16'h3b84;
                        8'hc8:          tmp     = 16'h3b88;
                        8'hc9:          tmp     = 16'h3b8c;
                        8'hca:          tmp     = 16'h3b91;
                        8'hcb:          tmp     = 16'h3b95;
                        8'hcc:          tmp     = 16'h3b99;
                        8'hcd:          tmp     = 16'h3b9d;
                        8'hce:          tmp     = 16'h3ba0;
                        8'hcf:          tmp     = 16'h3ba4;
                        8'hd0:          tmp     = 16'h3ba8;
                        8'hd1:          tmp     = 16'h3bab;
                        8'hd2:          tmp     = 16'h3baf;
                        8'hd3:          tmp     = 16'h3bb2;
                        8'hd4:          tmp     = 16'h3bb6;
                        8'hd5:          tmp     = 16'h3bb9;
                        8'hd6:          tmp     = 16'h3bbc;
                        8'hd7:          tmp     = 16'h3bc0;
                        8'hd8:          tmp     = 16'h3bc3;
                        8'hd9:          tmp     = 16'h3bc6;
                        8'hda:          tmp     = 16'h3bc9;
                        8'hdb:          tmp     = 16'h3bcb;
                        8'hdc:          tmp     = 16'h3bce;
                        8'hdd:          tmp     = 16'h3bd1;
                        8'hde:          tmp     = 16'h3bd4;
                        8'hdf:          tmp     = 16'h3bd6;
                        8'he0:          tmp     = 16'h3bd9;
                        8'he1:          tmp     = 16'h3bdb;
                        8'he2:          tmp     = 16'h3bdd;
                        8'he3:          tmp     = 16'h3be0;
                        8'he4:          tmp     = 16'h3be2;
                        8'he5:          tmp     = 16'h3be4;
                        8'he6:          tmp     = 16'h3be6;
                        8'he7:          tmp     = 16'h3be8;
                        8'he8:          tmp     = 16'h3bea;
                        8'he9:          tmp     = 16'h3bec;
                        8'hea:          tmp     = 16'h3bed;
                        8'heb:          tmp     = 16'h3bef;
                        8'hec:          tmp     = 16'h3bf1;
                        8'hed:          tmp     = 16'h3bf2;
                        8'hee:          tmp     = 16'h3bf4;
                        8'hef:          tmp     = 16'h3bf5;
                        8'hf0:          tmp     = 16'h3bf6;
                        8'hf1:          tmp     = 16'h3bf7;
                        8'hf2:          tmp     = 16'h3bf8;
                        8'hf3:          tmp     = 16'h3bf9;
                        8'hf4:          tmp     = 16'h3bfa;
                        8'hf5:          tmp     = 16'h3bfb;
                        8'hf6:          tmp     = 16'h3bfc;
                        8'hf7:          tmp     = 16'h3bfd;
                        8'hf8:          tmp     = 16'h3bfe;
                        8'hf9:          tmp     = 16'h3bfe;
                        8'hfa:          tmp     = 16'h3bff;
                        8'hfb:          tmp     = 16'h3bff;
                        8'hfc:          tmp     = 16'h3bff;
                        8'hfd:          tmp     = ONEP;
                        8'hfe:          tmp     = ONEP;
                        8'hff:          tmp     = ONEP;
                endcase

                coefFunc        = tmp;
        end
endfunction

endmodule       // coef

// *****************************************************************************
        

回路デザイン > 設計例 [FFT] > コーディング3    次のページ(テスト)   このページのTOP ▲

[1]
入力するiRadixを見て自動的に遷移するかしないかを行うのは簡単です。考えてみてください。

なお、64ポイントではPhase0→1での施策は必要ありません。Phase1→2でのみ必要です(64 Point FFTの図参照)。書込みと読込のタイミングが近くて、パイプライン長を超えるのが分かると思います。
[2]
前述しましたが、将来的にDCTに活用する、もしくは合成時に自動的に縮退するのであればこのまま残しておいた方がいいと思います。
[3]
テーブルは値をそのまま記述しているため回路規模が心配になりますが、合成すればかなりすっきりします。数によるとは思いますが、MRR=12くらいは平気だと思います。