pft-- source --
-- PFT: Parallel (Discrete) Fourier Transform
-- Claude 4.7 gives the following computational complexity analysis (given hypercubic parallelism)
--
-- Parallel time: O(log N)
--
-- At each of the log2(N) levels:
-- d_eo / c_lr: local, O(1)
-- #!corr: one neighbor exchange - on a hypercube each level's partner is one bit-dimension away, so O(1) per level
-- butterfly: one exp + two multiplies + two adds, O(1) local
--
-- Total parallel time: log2(N) levels x O(1) = O(log N)
--
-- Work (sum over all processors): N/2 butterflies x log2(N) levels = O(N log N) - same as sequential.
--
-- Speedup: O(N log N) / O(log N) = O(N) - linear in processor count.
--
-- Efficiency: O(N) speedup / N processors = O(1) - perfect. No wasted parallelism.
O = first @ other -- Partial DFT of Odds
E = first @ self -- Partial DFT of Evens
b = second @ self -- bend-count (= frequency)
N = third @ self -- length of previous output vector
A = fourth @ self -- angle to rotate sum of points
r = ap(rotor,b,A)
pft = !magnitude
@ !first
@ PDC(d_eo,c_lr,id, -- divide, combine, preadjust=noop (i.e., a post adjust algorithm)
((E+r*O, b,N*2,A/2), -- in L add rotated sum of Odds to sum of Evens
(O-r*E,b+N,N*2,A/2)) -- in R add rotated sum of (other-held) Odds to sum of Evens
@ -- apply each, updating vars, over each comm link
# ! corr, -- send node state to corresponding node bidirectionally
atom,
vector @ (self,0,1,pi))-- leaf: this sample, 0 bends, vec len 1, ready to apply pi rotation.
randarr = shuffled(3)
trace(pft, randarr)
trace(expected_fft,randarr) -- proves to be identical in output.
-- end source --
-- lib: pft_lib.py --
"""Helpers for pft.dc: Cooley-Tukey FFT as a PDC.
Loaded via: ./pydc dcsrc/pft.dc dcsrc/pft_lib.py
vector(e): wrap e in a vector
rotor(b,N): the twiddle factor of FFT
shuffled(N): a random permutation of [0..2**N - 1] - the test input.
expected_fft(V) N^3 time non-parallel implementation"""
import cmath, random
pi = cmath.pi
def vector(e):
return [e]
def rotor(b,angle):
return cmath.exp(-1j * b * angle)
def shuffled(N, seed=None):
if seed is not None:
random.seed(seed)
arr = list(range(2 ** N))
random.shuffle(arr)
return arr
# Discrete (unoptimized) Fourier Transform
#
# Time complexity: O(N^2)
def expected_fft(arr):
N = len(arr)
return [sum(arr[n] * cmath.exp(-2j*cmath.pi*k*n/N)
for n in range(N))
for k in range(N)]
-- end lib --
-- trace: pft([5,4,1,6,3,7,2,0]) --
f([5,4,1,6,3,7,2,0])
divide d_eo -> ([5,1,3,2], [4,6,7,0])
f([5,1,3,2])
divide d_eo -> ([5,3], [1,2])
f([5,3])
divide d_eo -> ([5], [3])
f([5])
⇣ atom; basef -> [(5, 0, 1, 3.14)]
f([3])
⇣ atom; basef -> [(3, 0, 1, 3.14)]
post #!corr -> ([((5, 0, 1, 3.14), (3, 0, 1, 3.14))], [((3, 0, 1, 3.14), (5, 0, 1, 3.14))])
post (((first @ self_+(<lambda>*first @ other)), second @ self_, (third @ self_*2), (fourth @ self_/2)), ((first @ other-(<lambda>*first @ self_)), (second @ self_+third @ self_), (third @ self_*2), (fourth @ self_/2))) -> ([(8.0, 0, 2, 1.57)], [(2.0, 1, 2, 1.57)])
combine c_lr -> [(8.0, 0, 2, 1.57),(2.0, 1, 2, 1.57)]
f([1,2])
divide d_eo -> ([1], [2])
f([1])
⇣ atom; basef -> [(1, 0, 1, 3.14)]
f([2])
⇣ atom; basef -> [(2, 0, 1, 3.14)]
post #!corr -> ([((1, 0, 1, 3.14), (2, 0, 1, 3.14))], [((2, 0, 1, 3.14), (1, 0, 1, 3.14))])
post (((first @ self_+(<lambda>*first @ other)), second @ self_, (third @ self_*2), (fourth @ self_/2)), ((first @ other-(<lambda>*first @ self_)), (second @ self_+third @ self_), (third @ self_*2), (fourth @ self_/2))) -> ([(3.0, 0, 2, 1.57)], [(-1.0, 1, 2, 1.57)])
combine c_lr -> [(3.0, 0, 2, 1.57),(-1.0, 1, 2, 1.57)]
post #!corr -> ([((8.0, 0, 2, 1.57), (3.0, 0, 2, 1.57)),((2.0, 1, 2, 1.57), (-1.0, 1, 2, 1.57))], [((3.0, 0, 2, 1.57), (8.0, 0, 2, 1.57)),((-1.0, 1, 2, 1.57), (2.0, 1, 2, 1.57))])
post (((first @ self_+(<lambda>*first @ other)), second @ self_, (third @ self_*2), (fourth @ self_/2)), ((first @ other-(<lambda>*first @ self_)), (second @ self_+third @ self_), (third @ self_*2), (fourth @ self_/2))) -> ([(11.0, 0, 4, 0.785),(2.0+1.0j, 1, 4, 0.785)], [(5.0, 2, 4, 0.785),(2.0-1.0j, 3, 4, 0.785)])
combine c_lr -> [(11.0, 0, 4, 0.785),(2.0+1.0j, 1, 4, 0.785),(5.0, 2, 4, 0.785),(2.0-1.0j, 3, 4, 0.785)]
f([4,6,7,0])
divide d_eo -> ([4,7], [6,0])
f([4,7])
divide d_eo -> ([4], [7])
f([4])
⇣ atom; basef -> [(4, 0, 1, 3.14)]
f([7])
⇣ atom; basef -> [(7, 0, 1, 3.14)]
post #!corr -> ([((4, 0, 1, 3.14), (7, 0, 1, 3.14))], [((7, 0, 1, 3.14), (4, 0, 1, 3.14))])
post (((first @ self_+(<lambda>*first @ other)), second @ self_, (third @ self_*2), (fourth @ self_/2)), ((first @ other-(<lambda>*first @ self_)), (second @ self_+third @ self_), (third @ self_*2), (fourth @ self_/2))) -> ([(11.0, 0, 2, 1.57)], [(-3.0, 1, 2, 1.57)])
combine c_lr -> [(11.0, 0, 2, 1.57),(-3.0, 1, 2, 1.57)]
f([6,0])
divide d_eo -> ([6], [0])
f([6])
⇣ atom; basef -> [(6, 0, 1, 3.14)]
f([0])
⇣ atom; basef -> [(0, 0, 1, 3.14)]
post #!corr -> ([((6, 0, 1, 3.14), (0, 0, 1, 3.14))], [((0, 0, 1, 3.14), (6, 0, 1, 3.14))])
post (((first @ self_+(<lambda>*first @ other)), second @ self_, (third @ self_*2), (fourth @ self_/2)), ((first @ other-(<lambda>*first @ self_)), (second @ self_+third @ self_), (third @ self_*2), (fourth @ self_/2))) -> ([(6.0, 0, 2, 1.57)], [(6.0, 1, 2, 1.57)])
combine c_lr -> [(6.0, 0, 2, 1.57),(6.0, 1, 2, 1.57)]
post #!corr -> ([((11.0, 0, 2, 1.57), (6.0, 0, 2, 1.57)),((-3.0, 1, 2, 1.57), (6.0, 1, 2, 1.57))], [((6.0, 0, 2, 1.57), (11.0, 0, 2, 1.57)),((6.0, 1, 2, 1.57), (-3.0, 1, 2, 1.57))])
post (((first @ self_+(<lambda>*first @ other)), second @ self_, (third @ self_*2), (fourth @ self_/2)), ((first @ other-(<lambda>*first @ self_)), (second @ self_+third @ self_), (third @ self_*2), (fourth @ self_/2))) -> ([(17.0, 0, 4, 0.785),(-3.0-6.0j, 1, 4, 0.785)], [(5.0, 2, 4, 0.785),(-3.0+6.0j, 3, 4, 0.785)])
combine c_lr -> [(17.0, 0, 4, 0.785),(-3.0-6.0j, 1, 4, 0.785),(5.0, 2, 4, 0.785),(-3.0+6.0j, 3, 4, 0.785)]
post #!corr -> ([((11.0, 0, 4, 0.785), (17.0, 0, 4, 0.785)),((2.0+1.0j, 1, 4, 0.785), (-3.0-6.0j, 1, 4, 0.785)),((5.0, 2, 4, 0.785), (5.0, 2, 4, 0.785)),((2.0-1.0j, 3, 4, 0.785), (-3.0+6.0j, 3, 4, 0.785))], [((17.0, 0, 4, 0.785), (11.0, 0, 4, 0.785)),((-3.0-6.0j, 1, 4, 0.785), (2.0+1.0j, 1, 4, 0.785)),((5.0, 2, 4, 0.785), (5.0, 2, 4, 0.785)),((-3.0+6.0j, 3, 4, 0.785), (2.0-1.0j, 3, 4, 0.785))])
post (((first @ self_+(<lambda>*first @ other)), second @ self_, (third @ self_*2), (fourth @ self_/2)), ((first @ other-(<lambda>*first @ self_)), (second @ self_+third @ self_), (third @ self_*2), (fourth @ self_/2))) -> ([(28.0, 0, 8, 0.393),(-4.364-1.121j, 1, 8, 0.393),(5.0-5.0j, 2, 8, 0.393),(8.364-3.121j, 3, 8, 0.393)], [(-6.0, 4, 8, 0.393),(8.364+3.121j, 5, 8, 0.393),(5.0+5.0j, 6, 8, 0.393),(-4.364+1.121j, 7, 8, 0.393)])
combine c_lr -> [(28.0, 0, 8, 0.393),(-4.364-1.121j, 1, 8, 0.393),(5.0-5.0j, 2, 8, 0.393),(8.364-3.121j, 3, 8, 0.393),(-6.0, 4, 8, 0.393),(8.364+3.121j, 5, 8, 0.393),(5.0+5.0j, 6, 8, 0.393),(-4.364+1.121j, 7, 8, 0.393)]
!first -> [28.0,-4.364-1.121j,5.0-5.0j,8.364-3.121j,-6.0,8.364+3.121j,5.0+5.0j,-4.364+1.121j]
!abs -> [28,4.51,7.07,8.93,6,8.93,7.07,4.51]
-- result: [28,4.51,7.07,8.93,6,8.93,7.07,4.51] --
expected_fft-- source: expected_fft --
def expected_fft(arr):
N = len(arr)
return [sum(arr[n] * cmath.exp(-2j*cmath.pi*k*n/N)
for n in range(N))
for k in range(N)]
-- end source --
-- trace: expected_fft([5,4,1,6,3,7,2,0]) --
expected_fft -> [28.0,-4.364-1.121j,5.0-5.0j,8.364-3.121j,-6.0,8.364+3.121j,5.0+5.0j,-4.364+1.121j]
-- result: [28.0,-4.364-1.121j,5.0-5.0j,8.364-3.121j,-6.0,8.364+3.121j,5.0+5.0j,-4.364+1.121j] --
|