spicy = randoms[:12].clone()
spicy[0] *= 10000
spicy[1] /= 10000
spicy[3] = float('inf')
spicy[4] = float('-inf')
spicy[5] = float('nan')
spicy = spicy.reshape((2,6))π§Ύ View as a summary
lovely
def lovely(
t:Tensor, # Tensor of interest
verbose:bool=False, # Whether to show the full tensor
plain:bool=False, # Just print if exactly as before
depth:int=0, # Show stats in depth
color:NoneType=None, # Force color (True/False) or auto.
show_histogram:NoneType=None, # Show the histogram: 'ββββββββββ'
):
Examples
print(lovely(randoms[0]))
print(lovely(randoms[:2]))
print(lovely(randoms[:6].view(2, 3))) # More than 2 elements -> show statistics
print(lovely(randoms[:11])) # More than 10 -> suppress data outputtensor 1.927
tensor[2] ΞΌ=1.707 Ο=0.311 [1.927, 1.487]
tensor[2, 3] n=6 xβ[-2.106, 1.927] ΞΌ=0.276 Ο=1.594 [[1.927, 1.487, 0.901], [-2.106, 0.678, -1.235]]
tensor[11] xβ[-2.106, 1.927] ΞΌ=0.046 Ο=1.384
Weird types
print(lovely(torch.tensor([1., 2., 3.], dtype=torch.float8_e4m3fn)))
print(lovely(torch.tensor([1., 2., 3.], dtype=torch.float8_e4m3fnuz)))
print(lovely(torch.tensor([1., 2., 3.], dtype=torch.float8_e5m2)))
print(lovely(torch.tensor([1., 2., 3.], dtype=torch.float8_e5m2fnuz)))
# Note: This one does positive powers of 2 only, since it has exponent only without mantissa or sign.
print(lovely(torch.tensor([1., 2., 3., -4., 5., 6.], dtype=torch.float8_e8m0fnu)))tensor[3] float8_e4m3fn xβ[1.000, 3.000] ΞΌ=2.000 Ο=1.000 [1.000, 2.000, 3.000]
tensor[3] float8_e4m3fnuz xβ[1.000, 3.000] ΞΌ=2.000 Ο=1.000 [1.000, 2.000, 3.000]
tensor[3] float8_e5m2 xβ[1.000, 3.000] ΞΌ=2.000 Ο=1.000 [1.000, 2.000, 3.000]
tensor[3] float8_e5m2fnuz xβ[1.000, 3.000] ΞΌ=2.000 Ο=1.000 [1.000, 2.000, 3.000]
tensor[6] float8_e8m0fnu xβ[1.000, 8.000] ΞΌ=3.833 Ο=2.401 [1.000, 2.000, 4.000, 4.000, 4.000, 8.000]
The gradient
torch.manual_seed(1)
grad = torch.randn((10, 10), requires_grad=True, dtype=torch.float64)
grad_plus_one = grad+1
print(f"Before .backward:\n{lovely(grad)}\n")
# We can't access .grad of non-leaf tensors
print(f"Before .backward, non-leaf node:\n{lovely(grad_plus_one)}\n")
grad_plus_one.prod().backward()
grad.grad[0,0] = float("-inf") # type: ignore
print(f"After .backward():\n{lovely(grad)}\n")
grad.grad.zero_() # type: ignore
print(f"After .zero_() on .grad:\n{lovely(grad)}")Before .backward:
tensor[10, 10] f64 n=100 xβ[-3.705 |β ββββββββ| 2.537] ΞΌ=0.105 Ο=1.066 grad=None
Before .backward, non-leaf node:
tensor[10, 10] f64 n=100 xβ[-2.705 |β ββββββββ| 3.537] ΞΌ=1.105 Ο=1.066 grad (non-leaf) AddBackward0
After .backward():
tensor[10, 10] f64 n=100 xβ[-3.705 |β ββββββββ| 2.537] ΞΌ=0.105 Ο=1.066 grad={ xβ[-1.351e-05 |ββββ ββββ| 4.236e-06] ΞΌ=-1.145e-07 Ο=2.284e-06 -Inf! }
After .zero_() on .grad:
tensor[10, 10] f64 n=100 xβ[-3.705 |β ββββββββ| 2.537] ΞΌ=0.105 Ο=1.066 grad={ all_zeros }
if torch.cuda.is_available():
print(lovely(torch.tensor(1., device=torch.device("cuda:0"))))
test_eq(str(lovely(torch.tensor(1., device=torch.device("cuda:0")))), "tensor cuda:0 1.000")tensor cuda:0 1.000
Do we have any floating point nasties? Is the tensor all zeros?
# Statistics and range are calculated on good values only, if there are at lest 3 of them.
lovely(spicy)tensor[2, 6] n=12 xβ[-1.605, 1.927e+04] ΞΌ=2.141e+03 Ο=6.423e+03 +Inf! -Inf! NaN!
lovely(spicy, color=False)tensor[2, 6] n=12 xβ[-1.605, 1.927e+04] ΞΌ=2.141e+03 Ο=6.423e+03 +Inf! -Inf! NaN!
lovely(torch.tensor([float("nan")]*11))tensor[11] NaN!
lovely(torch.zeros(12))tensor[12] all_zeros
lovely(torch.randn([0,0,0], dtype=torch.float16))tensor[0, 0, 0] f16 empty
lovely(torch.tensor([1,2,3], dtype=torch.int32))tensor[3] i32 xβ[1, 3] ΞΌ=2.000 Ο=1.000 [1, 2, 3]
torch.set_printoptions(linewidth=120)
lovely(spicy, verbose=True)tensor[2, 6] n=12 xβ[-1.605, 1.927e+04] ΞΌ=2.141e+03 Ο=6.423e+03 +Inf! -Inf! NaN! tensor([[ 1.9269e+04, 1.4873e-04, 9.0072e-01, inf, -inf, nan], [-4.3067e-02, -1.6047e+00, -7.5214e-01, 1.6487e+00, -3.9248e-01, -1.4036e+00]])
lovely(spicy, plain=True)tensor([[ 1.9269e+04, 1.4873e-04, 9.0072e-01, inf, -inf, nan],
[-4.3067e-02, -1.6047e+00, -7.5214e-01, 1.6487e+00, -3.9248e-01, -1.4036e+00]])
image = torch.load("mysteryman.pt")
image[1,2,3] = float('nan')
lovely(image, depth=2) # Limited by set_config(deeper_lines=N)tensor[3, 196, 196] n=115248 (0.4Mb) xβ[-2.118 |ββ ββββββββ| 2.640] ΞΌ=-0.388 Ο=1.073 NaN! tensor[196, 196] n=38416 xβ[-2.118 |βββ βββββββ| 2.249] ΞΌ=-0.324 Ο=1.036 tensor[196] xβ[-1.912 |βββββ β| 2.249] ΞΌ=-0.673 Ο=0.522 tensor[196] xβ[-1.861 |βββββ β| 2.163] ΞΌ=-0.738 Ο=0.418 tensor[196] xβ[-1.758 |βββββ β| 2.198] ΞΌ=-0.806 Ο=0.397 tensor[196] xβ[-1.656 |βββββ β| 2.249] ΞΌ=-0.849 Ο=0.369 tensor[196] xβ[-1.673 |ββββββ β| 2.198] ΞΌ=-0.857 Ο=0.357 tensor[196] xβ[-1.656 |ββββββ β| 2.146] ΞΌ=-0.848 Ο=0.372 tensor[196] xβ[-1.433 |βββ ββββ β| 2.215] ΞΌ=-0.784 Ο=0.397 tensor[196] xβ[-1.279 |ββββββββ β| 2.249] ΞΌ=-0.695 Ο=0.486 tensor[196] xβ[-1.364 |ββββββββββ| 2.249] ΞΌ=-0.637 Ο=0.539 ... tensor[196, 196] n=38416 xβ[-1.966 |βββ βββββββ| 2.429] ΞΌ=-0.274 Ο=0.973 NaN! tensor[196] xβ[-1.861 |βββββ β| 2.411] ΞΌ=-0.529 Ο=0.556 tensor[196] xβ[-1.826 |ββββ β| 2.359] ΞΌ=-0.562 Ο=0.473 tensor[196] xβ[-1.756 |βββββ β| 2.376] ΞΌ=-0.622 Ο=0.459 NaN! tensor[196] xβ[-1.633 |ββββ β| 2.429] ΞΌ=-0.664 Ο=0.430 tensor[196] xβ[-1.651 |ββββ β| 2.376] ΞΌ=-0.669 Ο=0.399 tensor[196] xβ[-1.633 |βββββ β| 2.376] ΞΌ=-0.701 Ο=0.391 tensor[196] xβ[-1.563 |βββββ β| 2.429] ΞΌ=-0.670 Ο=0.380 tensor[196] xβ[-1.475 |βββββ β| 2.429] ΞΌ=-0.616 Ο=0.386 tensor[196] xβ[-1.511 |ββββββ β| 2.429] ΞΌ=-0.593 Ο=0.399 ... tensor[196, 196] n=38416 xβ[-1.804 |ββββββββββ| 2.640] ΞΌ=-0.567 Ο=1.178 tensor[196] xβ[-1.717 |βββ β| 2.396] ΞΌ=-0.982 Ο=0.350 tensor[196] xβ[-1.752 |βββ β| 2.326] ΞΌ=-1.034 Ο=0.314 tensor[196] xβ[-1.648 |ββββ β| 2.379] ΞΌ=-1.086 Ο=0.314 tensor[196] xβ[-1.630 |βββ β| 2.466] ΞΌ=-1.121 Ο=0.305 tensor[196] xβ[-1.717 |βββ β| 2.448] ΞΌ=-1.120 Ο=0.302 tensor[196] xβ[-1.717 |ββββ β| 2.431] ΞΌ=-1.166 Ο=0.314 tensor[196] xβ[-1.560 |ββββ β| 2.448] ΞΌ=-1.124 Ο=0.326 tensor[196] xβ[-1.421 |ββββ β β| 2.431] ΞΌ=-1.064 Ο=0.383 tensor[196] xβ[-1.526 |βββββ β β| 2.396] ΞΌ=-1.047 Ο=0.417 ...
t = torch.zeros(2, 3, 4, names=('N', 'C', None))
test_eq(str(lovely(t)), "tensor[N=2, C=3, 4] n=24 \x1b[38;2;127;127;127mall_zeros\x1b[0m")
lovely(t)/tmp/ipykernel_1044556/3561422158.py:1: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:1971.)
t = torch.zeros(2, 3, 4, names=('N', 'C', None))
tensor[N=2, C=3, 4] n=24 all_zeros
Meta device
t = torch.empty(3,3, device="meta")
lovely(t)tensor[3, 3] n=9 meta meta
CUDA memory is not leaked
def memstats():
allocated = int(torch.cuda.memory_allocated() // (1024*1024))
max_allocated = int(torch.cuda.max_memory_allocated() // (1024*1024))
return f"Allocated: {allocated} MB, Max: {max_allocated} Mb"
if torch.cuda.is_available():
cudamem = torch.cuda.memory_allocated()
print(f"before allocation: {memstats()}")
numbers = torch.randn((3, 1024, 1024), device="cuda") # 12Mb image
torch.cuda.synchronize()
print(f"after allocation: {memstats()}")
# Note, the return value of lovely() is not a string, but a
# StrProxy that holds reference to 'numbers'. You have to del
# the references to it, but once it's gone, the reference to
# the tensor is gone too.
display(lovely(numbers) )
print(f"after repr: {memstats()}")
del numbers
# torch.cuda.memory.empty_cache()
print(f"after cleanup: {memstats()}")
test_eq(cudamem >= torch.cuda.memory_allocated(), True)before allocation: Allocated: 0 MB, Max: 0 Mb
after allocation: Allocated: 12 MB, Max: 12 Mb
tensor[3, 1024, 1024] n=3145728 (12Mb) xβ[-5.109 | βββββββββ| 5.141] ΞΌ=5.667e-05 Ο=1.000 cuda:0
after repr: Allocated: 12 MB, Max: 99 Mb
after cleanup: Allocated: 0 MB, Max: 99 Mb
# We don't really supposed complex numbers yet
c = torch.randn(5, dtype=torch.complex64)
lovely(c)tensor([ 0.6125-0.2495j, 0.2462+0.8040j, -0.2361-1.0412j, 0.5159-0.0928j, -0.4503+0.7375j])