vgpu_unlock

#!/bin/python3
#
# vGPU unlock script for consumer GPUs.
#
# Copyright 2021 Jonathan Johansson
# This file is part of the "vgpu_unlock" project, and is distributed under the MIT License.
# See the LICENSE file for more details.
#
# Contributions from Krutav Shah and the vGPU Unlocking community included :)
#

import errno
import frida
import os
import queue
import subprocess
import sys
import time

script_source = r"""
    var syslog_func = new NativeFunction(Module.getExportByName(null, "syslog"),
                                         "void",
                                         ["int", "pointer", "...", "pointer"]);

    var syslog = function(message) {
        var format_ptr = Memory.allocUtf8String("%s");
        var message_ptr = Memory.allocUtf8String(message);
        syslog_func(5, format_ptr, message_ptr);
    };


    // Value of the "request" argument used by nvidia-vgpud and nvidia-vgpu-mgr
    // when calling ioctl to read the PCI device ID and type (and possibly
    // other things) from the GPU.
    var REQ_QUERY_GPU = ptr("0xC020462A");

    // When issuing ioctl with REQ_QUERY_GPU then the "argp" argument is a
    // pointer to a structure something like this:
    //
    // struct arg {
    //    uint32_t unknown_1; // Initialized prior to call.
    //    uint32_t unknown_2; // Initialized prior to call.
    //    uint32_t op_type;   // Operation type, see comment below.
    //    uint32_t padding_1; // Always set to 0 prior to call.
    //    void*    result;    // Pointer initialized prior to call.
    //                        // Pointee initialized to 0 prior to call.
    //                        // Pointee is written by ioctl call.
    //    uint32_t unknown_4; // Set to 0x10 for READ_PCI_ID and set to 4 for
                              // READ_DEV_TYPE prior to call.
    //    uint32_t status;    // Written by ioctl call. See comment below.
    // }

    // These are the observed values for the op_type member.
    var OP_READ_DEV_TYPE = 0x800289; // *result type is uint64_t.
    var OP_READ_PCI_ID = 0x20801801; // *result type is uint16_t[4], the second
                                     // element (index 1) is the device ID, the
                                     // forth element (index 3) is the subsystem
                                     // ID.

    // nvidia-vgpu-mgr expects this value for a vGPU capable GPU.
    var DEV_TYPE_VGPU_CAPABLE = uint64(3);

    // When ioctl returns success (retval >= 0) but sets the status value of
    // the arg structure to 3 then nvidia-vgpud will sleep for a bit (first
    // 0.1s then 1s then 10s) then issue the same ioctl call again until the
    // status differs from 3. It will attempt this for up to 24h before giving
    // up.
    var STATUS_OK = 0;
    var STATUS_TRY_AGAIN = 3;

    Interceptor.attach(Module.getExportByName(null, "ioctl"), {
        onEnter(args) {
            this.request = args[1];
            this.argp = args[2];
        },
        onLeave(retVal) {
            if(!this.request.equals(REQ_QUERY_GPU)) {
                // Not a call we care about.
                return;
            }

            if(retVal.toInt32() < 0) {
                // Call failed.
                return;
            }

            // Lookup status value according to struct above.
            var status = this.argp.add(0x1C).readU32();

            if(status == STATUS_TRY_AGAIN) {
                // Driver will try again.
                return;
            }

            var op_type = this.argp.add(8).readU32();

            if(op_type == OP_READ_PCI_ID) {
                // Lookup address of the device and subsystem IDs.
                var devid_ptr = this.argp.add(0x10).readPointer().add(2);
                var subsysid_ptr = this.argp.add(0x10).readPointer().add(6);

                // Now we replace the device ID with a spoofed value that needs to
                // be determined such that the spoofed value represents a GPU with
                // vGPU support that uses the same GPU chip as our actual GPU.
                var actual_devid = devid_ptr.readU16();
                var spoofed_devid = actual_devid;
                var actual_subsysid = subsysid_ptr.readU16();
                var spoofed_subsysid = actual_subsysid;
                
                // Maxwell
                if(0x1340 <= actual_devid && actual_devid <= 0x13bd ||
                   0x174d <= actual_devid && actual_devid <= 0x179c) {
                    spoofed_devid = 0x13bd; // Tesla M10
                    spoofed_subsysid = 0x1160;
                }

                // Maxwell 2.0
                if(0x13c0 <= actual_devid && actual_devid <= 0x1436 ||
                   0x1617 <= actual_devid && actual_devid <= 0x1667 ||
                   0x17c2 <= actual_devid && actual_devid <= 0x17fd) {
                    spoofed_devid = 0x13f2; // Tesla M60
                }

                // Pascal
                if(0x15f0 <= actual_devid && actual_devid <= 0x15f1 ||
                   0x1b00 <= actual_devid && actual_devid <= 0x1d56 ||
                   0x1725 <= actual_devid && actual_devid <= 0x172f) {
                    spoofed_devid = 0x1b38; // Tesla P40
                }

                // GV100 Volta
                if(actual_devid == 0x1d81 || // TITAN V
                   actual_devid == 0x1dba) { // Quadro GV100 32GB
                    spoofed_devid = 0x1db6; // Tesla V100 32GB PCIE
                }

                // Turing
                if(0x1e02 <= actual_devid && actual_devid <= 0x1ff9 ||
                   0x2182 <= actual_devid && actual_devid <= 0x21d1) {
                    spoofed_devid = 0x1e30; // Quadro RTX 6000
                    spoofed_subsysid = 0x12ba;
                }

                // Ampere
                if(0x2200 <= actual_devid && actual_devid <= 0x2600) {
                    spoofed_devid = 0x2230; // RTX A6000
                }

                devid_ptr.writeU16(spoofed_devid);
                subsysid_ptr.writeU16(spoofed_subsysid);
            }
            
            if(op_type == OP_READ_DEV_TYPE) {
                // Set device type to vGPU capable.
                var dev_type_ptr = this.argp.add(0x10).readPointer();
                dev_type_ptr.writeU64(DEV_TYPE_VGPU_CAPABLE);
            }

            if(status != STATUS_OK) {
                // Things seems to work fine even if some operations that fail
                // result in failed assertions. So here we change the status
                // value for these cases to cleanup the logs for nvidia-vgpu-mgr.
                if(op_type == 0xA0820104 ||
                   op_type == 0x90960103) {
                    this.argp.add(0x1C).writeU32(STATUS_OK);
                } else {
                    syslog("op_type: 0x" + op_type.toString(16) + " failed.");
                }
            }

            // Workaround for some Maxwell cards not supporting reading inforom.
            if(op_type == 0x2080014b && status == 0x56) {
                this.argp.add(0x1C).writeU32(0x57);
            }
        }
    });

    syslog("vgpu_unlock loaded.");
"""

device = frida.get_local_device()
child_processes = queue.Queue()

def instrument(pid):
    """Instrument and resume process.

    :param pid: Process identifier
    """

    session = device.attach(pid)
    # We need to also instrument the children since nvidia-vgpud forks itself
    # when initially launched.
    session.enable_child_gating()
    script = session.create_script(script_source)
    script.load()
    device.resume(pid)


def on_child_added(child):
    """Callback for when a new child process has been created.

    :param child: The newly created child process.
    """

    child_processes.put(child.pid)
    instrument(child.pid)


def wait_exit(pid):
    """Wait for a process to terminate.

    :param pid: Process ID of the target process.
    """

    while 1:
        time.sleep(.1)

        try:
            os.kill(pid, 0)

        except OSError as e:
            if e.errno == errno.ESRCH:
                break


def main():
    """Entrypoint."""

    # Behave at least a little bit like a forking service.
    if sys.argv[1] != "-f":
        subprocess.Popen([sys.argv[0], "-f"] + sys.argv[1:])
        exit()

    device.on("child-added", on_child_added)
    pid = device.spawn(["/bin/bash", "-c", ' '.join(sys.argv[2:])])
    instrument(pid)

    # Wait for everything to terminate before exiting.
    wait_exit(pid)

    while not child_processes.empty():
        wait_exit(child_processes.get_nowait())


if __name__ == "__main__":
    main()