import numpy as np
import matplotlib.pyplot as plt
# Algorithm: random projection
# Input: a data matrix X of size n by d
# Output: a dimension reduction of X, f(X) which is n by k
# Random projection function f: R^d -> R^k
def random_projection(X, k):
n, d = np.shape(X)
U = np.random.normal(loc=0, scale=1/np.sqrt(k), size=(d,k))
f_X = np.matmul(X, U)
return f_X
# Algorithm: randomized SVD
# Input: a data matrix A of size n by d,
# a rank parameter k,
# and an exponent q = 0, 1, 2
# Output: an approximate SVD of rank k, A \approx U S V^\top
# See Halko Martinsson and Tropp, Algorithm 4.4
def randomized_SVD(A, k, q = 0):
n, d = np.shape(A)
m = min(2*k, d) # m ~ k << d
# Stage A: generate d by 2k Gaussian text matrix G,
# and reduce the problem to size n by 2k
G = np.random.standard_normal(size=(d, m))
Y = np.matmul(A, G) # n by m
Q, R = np.linalg.qr(Y) # QR step to enforce orthogonality
## Iterative matrix exponent step to increase the spectral gap
for i in range(q):
Z = np.matmul(np.transpose(A), Q) # d by m
Q, R = np.linalg.qr(Z) # QR step to enforce orthogonality
Y = np.matmul(A, Q) # n by m
Q, R = np.linalg.qr(Y)
# Stage B
B = np.matmul(np.transpose(Q), A) # m by d
U_tilde, S, Vh = np.linalg.svd(B, full_matrices=False) # m by m, m by m, m by d
U = np.matmul(Q, U_tilde) # n by m
return U, S, Vh
import time
start = time.time()
A = np.random.standard_normal(size=(100, 1000))
U, S, Vh = np.linalg.svd(A)
end = time.time()
print(end - start)
print(S)
0.04959416389465332 [41.55523898 40.40863519 40.06282943 40.00978311 39.72713203 39.41757916 39.37599718 39.12432979 38.67495792 38.41709024 38.09752037 37.98715845 37.75094298 37.59339256 37.35752174 37.15591611 36.9601563 36.60927182 36.38076938 36.12208638 35.81431044 35.80306808 35.67543994 35.54710498 35.22484462 34.97212302 34.90815624 34.76005721 34.58663363 34.47909749 34.35466146 34.11944889 33.90235772 33.74122556 33.62016804 33.55180017 33.46271669 33.17779333 33.14091088 32.76190392 32.64594054 32.46823163 32.39176633 32.15772705 31.94504555 31.90111946 31.7982669 31.43265164 31.31934913 31.260201 30.95096991 30.73116035 30.72048037 30.56542583 30.306598 30.21214629 30.0813099 29.95375235 29.90082095 29.58418185 29.5536538 29.46849997 29.20886242 29.14196072 28.9625652 28.77016517 28.55381709 28.46642755 27.99455985 27.88553838 27.81990853 27.72791428 27.54882198 27.35785681 27.21984416 27.00858355 26.85656305 26.54532491 26.43593829 26.16713868 26.0614975 25.81552984 25.54923955 25.51191742 25.20821732 25.18849529 25.12376081 24.93620431 24.75878835 24.72310148 24.47471855 24.40623828 24.08616192 23.92493469 23.5644604 23.35596372 22.99910736 22.70834435 22.38729055 22.00166796]
start = time.time()
U, S, Vh = randomized_SVD(A, k=3, q=2)
end = time.time()
print(end - start)
print(S)
0.004445791244506836 [38.35518077 37.07682847 36.87339181 36.0104608 35.55542482 34.54294892]