In [1]:
import numpy as np
import matplotlib.pyplot as plt
In [2]:
# Algorithm: random projection
# Input: a data matrix X of size n by d
# Output: a dimension reduction of X, f(X) which is n by k
# Random projection function f: R^d -> R^k

def random_projection(X, k):
    n, d = np.shape(X)
    U = np.random.normal(loc=0, scale=1/np.sqrt(k), size=(d,k))
    f_X = np.matmul(X, U)
    return f_X
In [3]:
# Algorithm: randomized SVD
# Input: a data matrix A of size n by d,
#        a rank parameter k,
#        and an exponent q = 0, 1, 2
# Output: an approximate SVD of rank k, A \approx U S V^\top
# See Halko Martinsson and Tropp, Algorithm 4.4

def randomized_SVD(A, k, q = 0):
    n, d = np.shape(A)
    m = min(2*k, d) # m ~ k << d
    # Stage A: generate d by 2k Gaussian text matrix G,
    # and reduce the problem to size n by 2k
    G = np.random.standard_normal(size=(d, m))
    Y = np.matmul(A, G) # n by m
    Q, R = np.linalg.qr(Y) # QR step to enforce orthogonality
    ## Iterative matrix exponent step to increase the spectral gap
    for i in range(q):
        Z = np.matmul(np.transpose(A), Q) # d by m
        Q, R = np.linalg.qr(Z) # QR step to enforce orthogonality
        Y = np.matmul(A, Q) # n by m
        Q, R = np.linalg.qr(Y)
    # Stage B
    B = np.matmul(np.transpose(Q), A) # m by d
    U_tilde, S, Vh = np.linalg.svd(B, full_matrices=False) # m by m, m by m, m by d
    U = np.matmul(Q, U_tilde) # n by m
    return U, S, Vh
In [4]:
import time

start = time.time()
A = np.random.standard_normal(size=(100, 1000))
U, S, Vh = np.linalg.svd(A)
end = time.time()
print(end - start)

print(S)
0.04959416389465332
[41.55523898 40.40863519 40.06282943 40.00978311 39.72713203 39.41757916
 39.37599718 39.12432979 38.67495792 38.41709024 38.09752037 37.98715845
 37.75094298 37.59339256 37.35752174 37.15591611 36.9601563  36.60927182
 36.38076938 36.12208638 35.81431044 35.80306808 35.67543994 35.54710498
 35.22484462 34.97212302 34.90815624 34.76005721 34.58663363 34.47909749
 34.35466146 34.11944889 33.90235772 33.74122556 33.62016804 33.55180017
 33.46271669 33.17779333 33.14091088 32.76190392 32.64594054 32.46823163
 32.39176633 32.15772705 31.94504555 31.90111946 31.7982669  31.43265164
 31.31934913 31.260201   30.95096991 30.73116035 30.72048037 30.56542583
 30.306598   30.21214629 30.0813099  29.95375235 29.90082095 29.58418185
 29.5536538  29.46849997 29.20886242 29.14196072 28.9625652  28.77016517
 28.55381709 28.46642755 27.99455985 27.88553838 27.81990853 27.72791428
 27.54882198 27.35785681 27.21984416 27.00858355 26.85656305 26.54532491
 26.43593829 26.16713868 26.0614975  25.81552984 25.54923955 25.51191742
 25.20821732 25.18849529 25.12376081 24.93620431 24.75878835 24.72310148
 24.47471855 24.40623828 24.08616192 23.92493469 23.5644604  23.35596372
 22.99910736 22.70834435 22.38729055 22.00166796]
In [5]:
start = time.time()
U, S, Vh = randomized_SVD(A, k=3, q=2)
end = time.time()
print(end - start)

print(S)
0.004445791244506836
[38.35518077 37.07682847 36.87339181 36.0104608  35.55542482 34.54294892]