openpilot/tinygrad_repo/extra/gemm/tinygrad_nv_matmul.py
Vehicle Researcher c5d5c5d1f3 openpilot v0.10.1 release
date: 2025-10-24T00:30:59
master commit: 405631baf9685e171a0dd19547cb763f1b163d18
2025-10-24 00:31:03 -07:00

38 lines
1.3 KiB
Python

from tinygrad import Tensor, dtypes, Device
from tinygrad.helpers import getenv, DEBUG
from tinygrad.codegen.opt.kernel import Kernel, Opt, OptOps
from tinygrad.engine.realize import CompiledRunner, ExecItem, get_program
from dataclasses import replace
N = 4096
if __name__ == "__main__":
if getenv("GEMV"):
A, B = Tensor.empty(1, N, dtype=dtypes.float), Tensor.empty(14336, N, dtype=dtypes.float16).T
else:
A, B = Tensor.empty(N, N, dtype=dtypes.float16), Tensor.empty(N, N, dtype=dtypes.float16)
C = A.matmul(B)
si = C.schedule()[-1]
ast = si.ast
k = Kernel(ast, opts=Device[Device.DEFAULT].renderer)
if getenv("GEMV"):
opts = [
Opt(op=OptOps.UNROLL, axis=0, amt=8),
Opt(op=OptOps.GROUP, axis=0, amt=32),
]
else:
opts = [
Opt(op=OptOps.TC, axis=0, amt=0),
Opt(op=OptOps.UPCAST, axis=0, amt=4),
Opt(op=OptOps.UPCAST, axis=1, amt=8),
Opt(op=OptOps.LOCAL, axis=0, amt=2),
Opt(op=OptOps.LOCAL, axis=1, amt=2),
Opt(op=OptOps.LOCAL, axis=0, amt=2),
]
k.apply_opts(opts)
prg = get_program(k.ast, k.opts, k.applied_opts)
new_src = prg.src
# can mod source here
prg = replace(prg, src=new_src)
ei = ExecItem(CompiledRunner(prg), [x.ensure_allocated() for x in si.bufs], si.metadata)
for i in range(5): ei.run(wait=True)