[{"data":1,"prerenderedAt":1678},["ShallowReactive",2],{"blog-en-proxmox-gpu-passthrough-ai-workloads":3,"blog-en-proxmox-gpu-passthrough-ai-workloads-alt":260},{"id":4,"title":5,"author":6,"body":7,"date":1661,"description":1662,"extension":1663,"image":215,"locale":1664,"meta":1665,"navigation":260,"path":1666,"seo":1667,"stem":1668,"tags":1669,"__hash__":1677},"blog\u002Fblog\u002Fen\u002Fproxmox-gpu-passthrough-ai-workloads.md","Proxmox GPU Passthrough: Configuring for AI\u002FML Workloads","Kubo Team",{"type":8,"value":9,"toc":1635},"minimark",[10,22,31,36,45,50,58,75,79,161,170,174,178,181,202,206,209,305,311,314,347,351,360,463,467,470,613,616,637,646,650,654,657,791,796,857,861,864,906,910,913,1018,1022,1026,1034,1152,1156,1165,1245,1249,1258,1344,1351,1355,1359,1446,1551,1559,1563,1566,1573,1582,1587,1631],[11,12,13,14,21],"p",{},"The rapid adoption of AI\u002FML workloads has driven explosive demand for GPU computing. ",[15,16,20],"a",{"href":17,"rel":18},"https:\u002F\u002Fwww.proxmox.com\u002Fen\u002Fproxmox-virtual-environment\u002Foverview",[19],"nofollow","Proxmox VE","'s GPU passthrough capability lets virtual machines access physical GPUs directly, delivering near-bare-metal performance for LLM inference, model training, and image generation. This article provides a detailed walkthrough from IOMMU\u002FVFIO configuration to running AI workloads.",[11,23,24,25,30],{},"For running AI\u002FML infrastructure on Kubernetes, ",[15,26,29],{"href":27,"rel":28},"https:\u002F\u002Fwww.hexabase.com\u002Fproduct\u002Fkubo\u002Fon-premise",[19],"Kubo On-Premise"," provides a fully managed K8s environment including GPU node management, with automated GPU resource scheduling and operations.",[32,33,35],"h2",{"id":34},"gpu-passthrough-fundamentals","GPU Passthrough Fundamentals",[11,37,38,39,44],{},"GPU passthrough is a virtualization technique that provides VMs with direct access to physical GPUs through ",[15,40,43],{"href":41,"rel":42},"https:\u002F\u002Fwww.kernel.org\u002Fdoc\u002Fhtml\u002Flatest\u002Fdriver-api\u002Fvfio.html",[19],"VFIO (Virtual Function io)",", minimizing hypervisor overhead.",[46,47,49],"h3",{"id":48},"understanding-iommu","Understanding IOMMU",[11,51,52,57],{},[15,53,56],{"href":54,"rel":55},"https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FInput%E2%80%93output_memory_management_unit",[19],"IOMMU (Input-Output Memory Management Unit)"," is the hardware component that translates device addresses, creating isolated \"lanes\" for each device. This is the prerequisite for passthrough.",[59,60,61,69],"ul",{},[62,63,64,68],"li",{},[65,66,67],"strong",{},"intel",": vt-d (virtualization technology for directed i-o)",[62,70,71,74],{},[65,72,73],{},"AMD",": AMD-Vi (AMD io Virtualization Technology)",[46,76,78],{"id":77},"supported-gpus","Supported GPUs",[80,81,82,101],"table",{},[83,84,85],"thead",{},[86,87,88,92,95,98],"tr",{},[89,90,91],"th",{},"Category",[89,93,94],{},"GPU",[89,96,97],{},"VRAM",[89,99,100],{},"Recommended Use",[102,103,104,119,133,147],"tbody",{},[86,105,106,110,113,116],{},[107,108,109],"td",{},"Entry",[107,111,112],{},"RTX 3060 \u002F RTX 4060",[107,114,115],{},"8-12 GB",[107,117,118],{},"Small-scale LLM inference",[86,120,121,124,127,130],{},[107,122,123],{},"Mid-range",[107,125,126],{},"RTX 3080 \u002F RTX 4060 Ti",[107,128,129],{},"10-16 GB",[107,131,132],{},"Medium models, fine-tuning",[86,134,135,138,141,144],{},[107,136,137],{},"High-end",[107,139,140],{},"RTX 3090 \u002F RTX 4090",[107,142,143],{},"24 GB",[107,145,146],{},"Large LLMs, multi-model",[86,148,149,152,155,158],{},[107,150,151],{},"Datacenter",[107,153,154],{},"NVIDIA P40 \u002F A100",[107,156,157],{},"24-80 GB",[107,159,160],{},"Production AI workloads",[11,162,163,164,169],{},"The ",[15,165,168],{"href":166,"rel":167},"https:\u002F\u002Fforum.proxmox.com\u002Fthreads\u002F2025-proxmox-pcie-gpu-passthrough-with-nvidia.169543\u002F",[19],"Proxmox forum GPU passthrough guide"," notes that NVIDIA GPUs have the broadest support across AI stacks. AMD GPUs require ROCm but work with Ollama on supported distributions.",[32,171,173],{"id":172},"bios-and-proxmox-host-configuration","BIOS and Proxmox Host Configuration",[46,175,177],{"id":176},"bios-settings","BIOS Settings",[11,179,180],{},"Verify the following BIOS settings:",[182,183,184,190,196],"ol",{},[62,185,186,189],{},[65,187,188],{},"Disable Secure Boot",": Eliminates certificate registration requirements",[62,191,192,195],{},[65,193,194],{},"Enable VT-d (Intel) \u002F IOMMU (AMD)",": Activate in the motherboard BIOS menus",[62,197,198,201],{},[65,199,200],{},"Update firmware to latest version",": Improves passthrough compatibility",[46,203,205],{"id":204},"kernel-parameters","Kernel Parameters",[11,207,208],{},"Add IOMMU parameters to the GRUB command line:",[210,211,216],"pre",{"className":212,"code":213,"language":214,"meta":215,"style":215},"language-bash shiki shiki-themes tokyo-night","# Edit \u002Fetc\u002Fdefault\u002Fgrub\n# For Intel CPUs\nGRUB_CMDLINE_LINUX_DEFAULT=\"quiet intel_iommu=on iommu=pt\"\n\n# For AMD CPUs\nGRUB_CMDLINE_LINUX_DEFAULT=\"quiet amd_iommu=on iommu=pt\"\n\n# Update GRUB and reboot\nupdate-grub\nreboot\n","bash","",[217,218,219,228,234,255,262,268,282,287,293,299],"code",{"__ignoreMap":215},[220,221,224],"span",{"class":222,"line":223},"line",1,[220,225,227],{"class":226},"sbD-w","# Edit \u002Fetc\u002Fdefault\u002Fgrub\n",[220,229,231],{"class":222,"line":230},2,[220,232,233],{"class":226},"# For Intel CPUs\n",[220,235,237,241,245,248,252],{"class":222,"line":236},3,[220,238,240],{"class":239},"sE3pS","GRUB_CMDLINE_LINUX_DEFAULT",[220,242,244],{"class":243},"sAklC","=",[220,246,247],{"class":243},"\"",[220,249,251],{"class":250},"sPY7s","quiet intel_iommu=on iommu=pt",[220,253,254],{"class":243},"\"\n",[220,256,258],{"class":222,"line":257},4,[220,259,261],{"emptyLinePlaceholder":260},true,"\n",[220,263,265],{"class":222,"line":264},5,[220,266,267],{"class":226},"# For AMD CPUs\n",[220,269,271,273,275,277,280],{"class":222,"line":270},6,[220,272,240],{"class":239},[220,274,244],{"class":243},[220,276,247],{"class":243},[220,278,279],{"class":250},"quiet amd_iommu=on iommu=pt",[220,281,254],{"class":243},[220,283,285],{"class":222,"line":284},7,[220,286,261],{"emptyLinePlaceholder":260},[220,288,290],{"class":222,"line":289},8,[220,291,292],{"class":226},"# Update GRUB and reboot\n",[220,294,296],{"class":222,"line":295},9,[220,297,298],{"class":239},"update-grub\n",[220,300,302],{"class":222,"line":301},10,[220,303,304],{"class":239},"reboot\n",[11,306,163,307,310],{},[217,308,309],{},"iommu=pt"," (passthrough mode) tells the kernel to engage the IOMMU only for devices being passed through, improving overall performance.",[11,312,313],{},"After reboot, verify IOMMU is enabled:",[210,315,317],{"className":212,"code":316,"language":214,"meta":215,"style":215},"dmesg | grep -e DMAR -e IOMMU\n# Should display \"DMAR: IOMMU enabled\" or similar message\n",[217,318,319,342],{"__ignoreMap":215},[220,320,321,324,327,330,334,337,339],{"class":222,"line":223},[220,322,323],{"class":239},"dmesg",[220,325,326],{"class":243}," |",[220,328,329],{"class":239}," grep",[220,331,333],{"class":332},"sT800"," -e",[220,335,336],{"class":250}," DMAR",[220,338,333],{"class":332},[220,340,341],{"class":250}," IOMMU\n",[220,343,344],{"class":222,"line":230},[220,345,346],{"class":226},"# Should display \"DMAR: IOMMU enabled\" or similar message\n",[46,348,350],{"id":349},"vfio-module-configuration","VFIO Module Configuration",[11,352,353,354,359],{},"Load ",[15,355,358],{"href":356,"rel":357},"https:\u002F\u002Fproxmox.rdem-systems.com\u002Fen\u002Fblog\u002Fproxmox-hardware-passthrough-gpu-usb-pci\u002F",[19],"VFIO modules"," and blacklist host GPU drivers:",[210,361,363],{"className":212,"code":362,"language":214,"meta":215,"style":215},"# Add VFIO modules to \u002Fetc\u002Fmodules\ncat >> \u002Fetc\u002Fmodules \u003C\u003CEOF\nvfio\nvfio_iommu_type1\nvfio_pci\nvfio_virqfd\nEOF\n\n# Blacklist host GPU drivers\ncat > \u002Fetc\u002Fmodprobe.d\u002Fblacklist-gpu.conf \u003C\u003CEOF\nblacklist nouveau\nblacklist nvidia\nblacklist nvidiafb\nblacklist snd_hda_intel\nEOF\n",[217,364,365,370,387,392,397,402,407,411,415,420,434,440,446,452,458],{"__ignoreMap":215},[220,366,367],{"class":222,"line":223},[220,368,369],{"class":226},"# Add VFIO modules to \u002Fetc\u002Fmodules\n",[220,371,372,375,378,381,384],{"class":222,"line":230},[220,373,374],{"class":239},"cat",[220,376,377],{"class":243}," >>",[220,379,380],{"class":250}," \u002Fetc\u002Fmodules",[220,382,383],{"class":243}," \u003C\u003C",[220,385,386],{"class":243},"EOF\n",[220,388,389],{"class":222,"line":236},[220,390,391],{"class":250},"vfio\n",[220,393,394],{"class":222,"line":257},[220,395,396],{"class":250},"vfio_iommu_type1\n",[220,398,399],{"class":222,"line":264},[220,400,401],{"class":250},"vfio_pci\n",[220,403,404],{"class":222,"line":270},[220,405,406],{"class":250},"vfio_virqfd\n",[220,408,409],{"class":222,"line":284},[220,410,386],{"class":243},[220,412,413],{"class":222,"line":289},[220,414,261],{"emptyLinePlaceholder":260},[220,416,417],{"class":222,"line":295},[220,418,419],{"class":226},"# Blacklist host GPU drivers\n",[220,421,422,424,427,430,432],{"class":222,"line":301},[220,423,374],{"class":239},[220,425,426],{"class":243}," >",[220,428,429],{"class":250}," \u002Fetc\u002Fmodprobe.d\u002Fblacklist-gpu.conf",[220,431,383],{"class":243},[220,433,386],{"class":243},[220,435,437],{"class":222,"line":436},11,[220,438,439],{"class":250},"blacklist nouveau\n",[220,441,443],{"class":222,"line":442},12,[220,444,445],{"class":250},"blacklist nvidia\n",[220,447,449],{"class":222,"line":448},13,[220,450,451],{"class":250},"blacklist nvidiafb\n",[220,453,455],{"class":222,"line":454},14,[220,456,457],{"class":250},"blacklist snd_hda_intel\n",[220,459,461],{"class":222,"line":460},15,[220,462,386],{"class":243},[46,464,466],{"id":465},"binding-the-gpu-to-vfio","Binding the GPU to VFIO",[11,468,469],{},"Identify the GPU's PCI address and vendor IDs, then bind to the VFIO driver:",[210,471,473],{"className":212,"code":472,"language":214,"meta":215,"style":215},"# Find GPU PCI address\nlspci -v | grep -i nvidia\n# Example: 01:00.0 VGA compatible controller: NVIDIA Corporation ...\n# Example: 01:00.1 Audio device: NVIDIA Corporation ...\n\n# Get vendor IDs\nlspci -n -s 01:00\n# Example: 01:00.0 0300: 10de:2684 (rev a1)\n# Example: 01:00.1 0403: 10de:22ba (rev a1)\n\n# Bind GPU to VFIO\ncat > \u002Fetc\u002Fmodprobe.d\u002Fvfio.conf \u003C\u003CEOF\noptions vfio-pci ids=10de:2684,10de:22ba disable_vga=1\nsoftdep nvidia pre: vfio-pci\nsoftdep snd_hda_intel pre: vfio-pci\nEOF\n\n# Update initramfs and reboot\nupdate-initramfs -u -k all\nreboot\n",[217,474,475,480,498,503,508,512,517,530,535,540,544,549,562,567,572,577,582,587,593,608],{"__ignoreMap":215},[220,476,477],{"class":222,"line":223},[220,478,479],{"class":226},"# Find GPU PCI address\n",[220,481,482,485,488,490,492,495],{"class":222,"line":230},[220,483,484],{"class":239},"lspci",[220,486,487],{"class":332}," -v",[220,489,326],{"class":243},[220,491,329],{"class":239},[220,493,494],{"class":332}," -i",[220,496,497],{"class":250}," nvidia\n",[220,499,500],{"class":222,"line":236},[220,501,502],{"class":226},"# Example: 01:00.0 VGA compatible controller: NVIDIA Corporation ...\n",[220,504,505],{"class":222,"line":257},[220,506,507],{"class":226},"# Example: 01:00.1 Audio device: NVIDIA Corporation ...\n",[220,509,510],{"class":222,"line":264},[220,511,261],{"emptyLinePlaceholder":260},[220,513,514],{"class":222,"line":270},[220,515,516],{"class":226},"# Get vendor IDs\n",[220,518,519,521,524,527],{"class":222,"line":284},[220,520,484],{"class":239},[220,522,523],{"class":332}," -n",[220,525,526],{"class":332}," -s",[220,528,529],{"class":250}," 01:00\n",[220,531,532],{"class":222,"line":289},[220,533,534],{"class":226},"# Example: 01:00.0 0300: 10de:2684 (rev a1)\n",[220,536,537],{"class":222,"line":295},[220,538,539],{"class":226},"# Example: 01:00.1 0403: 10de:22ba (rev a1)\n",[220,541,542],{"class":222,"line":301},[220,543,261],{"emptyLinePlaceholder":260},[220,545,546],{"class":222,"line":436},[220,547,548],{"class":226},"# Bind GPU to VFIO\n",[220,550,551,553,555,558,560],{"class":222,"line":442},[220,552,374],{"class":239},[220,554,426],{"class":243},[220,556,557],{"class":250}," \u002Fetc\u002Fmodprobe.d\u002Fvfio.conf",[220,559,383],{"class":243},[220,561,386],{"class":243},[220,563,564],{"class":222,"line":448},[220,565,566],{"class":250},"options vfio-pci ids=10de:2684,10de:22ba disable_vga=1\n",[220,568,569],{"class":222,"line":454},[220,570,571],{"class":250},"softdep nvidia pre: vfio-pci\n",[220,573,574],{"class":222,"line":460},[220,575,576],{"class":250},"softdep snd_hda_intel pre: vfio-pci\n",[220,578,580],{"class":222,"line":579},16,[220,581,386],{"class":243},[220,583,585],{"class":222,"line":584},17,[220,586,261],{"emptyLinePlaceholder":260},[220,588,590],{"class":222,"line":589},18,[220,591,592],{"class":226},"# Update initramfs and reboot\n",[220,594,596,599,602,605],{"class":222,"line":595},19,[220,597,598],{"class":239},"update-initramfs",[220,600,601],{"class":332}," -u",[220,603,604],{"class":332}," -k",[220,606,607],{"class":250}," all\n",[220,609,611],{"class":222,"line":610},20,[220,612,304],{"class":239},[11,614,615],{},"Verify the binding after reboot:",[210,617,619],{"className":212,"code":618,"language":214,"meta":215,"style":215},"lspci -nnk -s 01:00\n# Should show \"Kernel driver in use: vfio-pci\"\n",[217,620,621,632],{"__ignoreMap":215},[220,622,623,625,628,630],{"class":222,"line":223},[220,624,484],{"class":239},[220,626,627],{"class":332}," -nnk",[220,629,526],{"class":332},[220,631,529],{"class":250},[220,633,634],{"class":222,"line":230},[220,635,636],{"class":226},"# Should show \"Kernel driver in use: vfio-pci\"\n",[11,638,639,640,645],{},"With ",[15,641,644],{"href":642,"rel":643},"https:\u002F\u002Fkubo.hexabase.io\u002F",[19],"Kubo",", integrating GPU nodes into Kubernetes and automating GPU resource scheduling via NVIDIA Device Plugin becomes straightforward.",[32,647,649],{"id":648},"creating-the-vm-and-attaching-the-gpu","Creating the VM and Attaching the GPU",[46,651,653],{"id":652},"vm-configuration","VM Configuration",[11,655,656],{},"Create a VM optimized for GPU passthrough:",[210,658,660],{"className":212,"code":659,"language":214,"meta":215,"style":215},"# Create VM via Web UI or CLI\nqm create 200 --name ai-workstation --memory 32768 --cores 8 \\\n  --machine q35 --bios ovmf \\\n  --cpu host \\\n  --scsihw virtio-scsi-single \\\n  --net0 virtio,bridge=vmbr0\n\n# Add EFI disk\nqm set 200 --efidisk0 local-lvm:1,efitype=4m\n\n# Add OS disk\nqm set 200 --scsi0 local-lvm:100,ssd=1,discard=on\n",[217,661,662,667,700,716,726,736,744,748,753,768,772,777],{"__ignoreMap":215},[220,663,664],{"class":222,"line":223},[220,665,666],{"class":226},"# Create VM via Web UI or CLI\n",[220,668,669,672,675,679,682,685,688,691,694,697],{"class":222,"line":230},[220,670,671],{"class":239},"qm",[220,673,674],{"class":250}," create",[220,676,678],{"class":677},"sOJ5S"," 200",[220,680,681],{"class":332}," --name",[220,683,684],{"class":250}," ai-workstation",[220,686,687],{"class":332}," --memory",[220,689,690],{"class":677}," 32768",[220,692,693],{"class":332}," --cores",[220,695,696],{"class":677}," 8",[220,698,699],{"class":243}," \\\n",[220,701,702,705,708,711,714],{"class":222,"line":236},[220,703,704],{"class":332},"  --machine",[220,706,707],{"class":250}," q35",[220,709,710],{"class":332}," --bios",[220,712,713],{"class":250}," ovmf",[220,715,699],{"class":243},[220,717,718,721,724],{"class":222,"line":257},[220,719,720],{"class":332},"  --cpu",[220,722,723],{"class":250}," host",[220,725,699],{"class":243},[220,727,728,731,734],{"class":222,"line":264},[220,729,730],{"class":332},"  --scsihw",[220,732,733],{"class":250}," virtio-scsi-single",[220,735,699],{"class":243},[220,737,738,741],{"class":222,"line":270},[220,739,740],{"class":332},"  --net0",[220,742,743],{"class":250}," virtio,bridge=vmbr0\n",[220,745,746],{"class":222,"line":284},[220,747,261],{"emptyLinePlaceholder":260},[220,749,750],{"class":222,"line":289},[220,751,752],{"class":226},"# Add EFI disk\n",[220,754,755,757,760,762,765],{"class":222,"line":295},[220,756,671],{"class":239},[220,758,759],{"class":250}," set",[220,761,678],{"class":677},[220,763,764],{"class":332}," --efidisk0",[220,766,767],{"class":250}," local-lvm:1,efitype=4m\n",[220,769,770],{"class":222,"line":301},[220,771,261],{"emptyLinePlaceholder":260},[220,773,774],{"class":222,"line":436},[220,775,776],{"class":226},"# Add OS disk\n",[220,778,779,781,783,785,788],{"class":222,"line":442},[220,780,671],{"class":239},[220,782,759],{"class":250},[220,784,678],{"class":677},[220,786,787],{"class":332}," --scsi0",[220,789,790],{"class":250}," local-lvm:100,ssd=1,discard=on\n",[11,792,793],{},[65,794,795],{},"Critical settings:",[80,797,798,811],{},[83,799,800],{},[86,801,802,805,808],{},[89,803,804],{},"Setting",[89,806,807],{},"Value",[89,809,810],{},"Reason",[102,812,813,824,835,846],{},[86,814,815,818,821],{},[107,816,817],{},"Machine type",[107,819,820],{},"q35",[107,822,823],{},"Required for PCIe passthrough",[86,825,826,829,832],{},[107,827,828],{},"BIOS",[107,830,831],{},"OVMF (UEFI)",[107,833,834],{},"Proper PCIe device initialization",[86,836,837,840,843],{},[107,838,839],{},"CPU type",[107,841,842],{},"host",[107,844,845],{},"Full CPU feature exposure",[86,847,848,851,854],{},[107,849,850],{},"SCSI Controller",[107,852,853],{},"VirtIO SCSI",[107,855,856],{},"Maximum disk performance",[46,858,860],{"id":859},"gpu-passthrough-configuration","GPU Passthrough Configuration",[11,862,863],{},"Add the GPU device to the VM configuration:",[210,865,867],{"className":212,"code":866,"language":214,"meta":215,"style":215},"# Add to \u002Fetc\u002Fpve\u002Fqemu-server\u002F200.conf\n# Pass through GPU (VGA + Audio)\nhostpci0: 01:00,pcie=1,x-vga=on\n\n# Or via Web UI:\n# Hardware → Add → PCI Device → Select GPU\n# Check \"All Functions\" and \"PCI-Express\"\n",[217,868,869,874,879,887,891,896,901],{"__ignoreMap":215},[220,870,871],{"class":222,"line":223},[220,872,873],{"class":226},"# Add to \u002Fetc\u002Fpve\u002Fqemu-server\u002F200.conf\n",[220,875,876],{"class":222,"line":230},[220,877,878],{"class":226},"# Pass through GPU (VGA + Audio)\n",[220,880,881,884],{"class":222,"line":236},[220,882,883],{"class":239},"hostpci0:",[220,885,886],{"class":250}," 01:00,pcie=1,x-vga=on\n",[220,888,889],{"class":222,"line":257},[220,890,261],{"emptyLinePlaceholder":260},[220,892,893],{"class":222,"line":264},[220,894,895],{"class":226},"# Or via Web UI:\n",[220,897,898],{"class":222,"line":270},[220,899,900],{"class":226},"# Hardware → Add → PCI Device → Select GPU\n",[220,902,903],{"class":222,"line":284},[220,904,905],{"class":226},"# Check \"All Functions\" and \"PCI-Express\"\n",[46,907,909],{"id":908},"installing-nvidia-drivers-in-the-guest","Installing NVIDIA Drivers in the Guest",[11,911,912],{},"Install NVIDIA drivers and CUDA toolkit inside the VM:",[210,914,916],{"className":212,"code":915,"language":214,"meta":215,"style":215},"# For Ubuntu 24.04\n# Install NVIDIA driver\nsudo apt update\nsudo apt install -y nvidia-driver-560\n\n# Install CUDA toolkit\nwget https:\u002F\u002Fdeveloper.download.nvidia.com\u002Fcompute\u002Fcuda\u002Frepos\u002Fubuntu2404\u002Fx86_64\u002Fcuda-keyring_1.1-1_all.deb\nsudo dpkg -i cuda-keyring_1.1-1_all.deb\nsudo apt update\nsudo apt install -y cuda-toolkit-12-6\n\n# Verify GPU operation\nnvidia-smi\n",[217,917,918,923,928,939,954,958,963,971,983,991,1004,1008,1013],{"__ignoreMap":215},[220,919,920],{"class":222,"line":223},[220,921,922],{"class":226},"# For Ubuntu 24.04\n",[220,924,925],{"class":222,"line":230},[220,926,927],{"class":226},"# Install NVIDIA driver\n",[220,929,930,933,936],{"class":222,"line":236},[220,931,932],{"class":239},"sudo",[220,934,935],{"class":250}," apt",[220,937,938],{"class":250}," update\n",[220,940,941,943,945,948,951],{"class":222,"line":257},[220,942,932],{"class":239},[220,944,935],{"class":250},[220,946,947],{"class":250}," install",[220,949,950],{"class":332}," -y",[220,952,953],{"class":250}," nvidia-driver-560\n",[220,955,956],{"class":222,"line":264},[220,957,261],{"emptyLinePlaceholder":260},[220,959,960],{"class":222,"line":270},[220,961,962],{"class":226},"# Install CUDA toolkit\n",[220,964,965,968],{"class":222,"line":284},[220,966,967],{"class":239},"wget",[220,969,970],{"class":250}," https:\u002F\u002Fdeveloper.download.nvidia.com\u002Fcompute\u002Fcuda\u002Frepos\u002Fubuntu2404\u002Fx86_64\u002Fcuda-keyring_1.1-1_all.deb\n",[220,972,973,975,978,980],{"class":222,"line":289},[220,974,932],{"class":239},[220,976,977],{"class":250}," dpkg",[220,979,494],{"class":332},[220,981,982],{"class":250}," cuda-keyring_1.1-1_all.deb\n",[220,984,985,987,989],{"class":222,"line":295},[220,986,932],{"class":239},[220,988,935],{"class":250},[220,990,938],{"class":250},[220,992,993,995,997,999,1001],{"class":222,"line":301},[220,994,932],{"class":239},[220,996,935],{"class":250},[220,998,947],{"class":250},[220,1000,950],{"class":332},[220,1002,1003],{"class":250}," cuda-toolkit-12-6\n",[220,1005,1006],{"class":222,"line":436},[220,1007,261],{"emptyLinePlaceholder":260},[220,1009,1010],{"class":222,"line":442},[220,1011,1012],{"class":226},"# Verify GPU operation\n",[220,1014,1015],{"class":222,"line":448},[220,1016,1017],{"class":239},"nvidia-smi\n",[32,1019,1021],{"id":1020},"running-aiml-workloads","Running AI\u002FML Workloads",[46,1023,1025],{"id":1024},"llm-inference-with-ollama","LLM Inference with Ollama",[11,1027,1028,1033],{},[15,1029,1032],{"href":1030,"rel":1031},"https:\u002F\u002Follama.com\u002F",[19],"Ollama"," is the easiest way to run LLMs in a GPU passthrough environment:",[210,1035,1037],{"className":212,"code":1036,"language":214,"meta":215,"style":215},"# Install Ollama\ncurl -fsSL https:\u002F\u002Follama.com\u002Finstall | sh\n\n# Verify service status\nsystemctl status ollama\n\n# Download and run a model\nollama pull llama3.1:70b\nollama run llama3.1:70b\n\n# API access (default: port 11434)\ncurl http:\u002F\u002Flocalhost:11434\u002Fapi\u002Fgenerate -d '{\n  \"model\": \"llama3.1:70b\",\n  \"prompt\": \"Explain GPU passthrough in Proxmox\"\n}'\n",[217,1038,1039,1044,1060,1064,1069,1080,1084,1089,1100,1109,1113,1118,1134,1139,1144],{"__ignoreMap":215},[220,1040,1041],{"class":222,"line":223},[220,1042,1043],{"class":226},"# Install Ollama\n",[220,1045,1046,1049,1052,1055,1057],{"class":222,"line":230},[220,1047,1048],{"class":239},"curl",[220,1050,1051],{"class":332}," -fsSL",[220,1053,1054],{"class":250}," https:\u002F\u002Follama.com\u002Finstall",[220,1056,326],{"class":243},[220,1058,1059],{"class":239}," sh\n",[220,1061,1062],{"class":222,"line":236},[220,1063,261],{"emptyLinePlaceholder":260},[220,1065,1066],{"class":222,"line":257},[220,1067,1068],{"class":226},"# Verify service status\n",[220,1070,1071,1074,1077],{"class":222,"line":264},[220,1072,1073],{"class":239},"systemctl",[220,1075,1076],{"class":250}," status",[220,1078,1079],{"class":250}," ollama\n",[220,1081,1082],{"class":222,"line":270},[220,1083,261],{"emptyLinePlaceholder":260},[220,1085,1086],{"class":222,"line":284},[220,1087,1088],{"class":226},"# Download and run a model\n",[220,1090,1091,1094,1097],{"class":222,"line":289},[220,1092,1093],{"class":239},"ollama",[220,1095,1096],{"class":250}," pull",[220,1098,1099],{"class":250}," llama3.1:70b\n",[220,1101,1102,1104,1107],{"class":222,"line":295},[220,1103,1093],{"class":239},[220,1105,1106],{"class":250}," run",[220,1108,1099],{"class":250},[220,1110,1111],{"class":222,"line":301},[220,1112,261],{"emptyLinePlaceholder":260},[220,1114,1115],{"class":222,"line":436},[220,1116,1117],{"class":226},"# API access (default: port 11434)\n",[220,1119,1120,1122,1125,1128,1131],{"class":222,"line":442},[220,1121,1048],{"class":239},[220,1123,1124],{"class":250}," http:\u002F\u002Flocalhost:11434\u002Fapi\u002Fgenerate",[220,1126,1127],{"class":332}," -d",[220,1129,1130],{"class":243}," '",[220,1132,1133],{"class":250},"{\n",[220,1135,1136],{"class":222,"line":448},[220,1137,1138],{"class":250},"  \"model\": \"llama3.1:70b\",\n",[220,1140,1141],{"class":222,"line":454},[220,1142,1143],{"class":250},"  \"prompt\": \"Explain GPU passthrough in Proxmox\"\n",[220,1145,1146,1149],{"class":222,"line":460},[220,1147,1148],{"class":250},"}",[220,1150,1151],{"class":243},"'\n",[46,1153,1155],{"id":1154},"high-performance-inference-with-vllm","High-Performance Inference with vLLM",[11,1157,1158,1159,1164],{},"For production environments requiring high throughput, ",[15,1160,1163],{"href":1161,"rel":1162},"https:\u002F\u002Fdocs.vllm.ai\u002F",[19],"vLLM"," is the optimal choice:",[210,1166,1168],{"className":212,"code":1167,"language":214,"meta":215,"style":215},"# Install vLLM\npip install vllm\n\n# Start OpenAI-compatible API server\npython -m vllm.entrypoints.openai.api_server \\\n  --model meta-llama\u002FLlama-3.1-70B-Instruct \\\n  --tensor-parallel-size 1 \\\n  --gpu-memory-utilization 0.9 \\\n  --port 8000\n",[217,1169,1170,1175,1185,1189,1194,1207,1217,1227,1237],{"__ignoreMap":215},[220,1171,1172],{"class":222,"line":223},[220,1173,1174],{"class":226},"# Install vLLM\n",[220,1176,1177,1180,1182],{"class":222,"line":230},[220,1178,1179],{"class":239},"pip",[220,1181,947],{"class":250},[220,1183,1184],{"class":250}," vllm\n",[220,1186,1187],{"class":222,"line":236},[220,1188,261],{"emptyLinePlaceholder":260},[220,1190,1191],{"class":222,"line":257},[220,1192,1193],{"class":226},"# Start OpenAI-compatible API server\n",[220,1195,1196,1199,1202,1205],{"class":222,"line":264},[220,1197,1198],{"class":239},"python",[220,1200,1201],{"class":332}," -m",[220,1203,1204],{"class":250}," vllm.entrypoints.openai.api_server",[220,1206,699],{"class":243},[220,1208,1209,1212,1215],{"class":222,"line":270},[220,1210,1211],{"class":332},"  --model",[220,1213,1214],{"class":250}," meta-llama\u002FLlama-3.1-70B-Instruct",[220,1216,699],{"class":243},[220,1218,1219,1222,1225],{"class":222,"line":284},[220,1220,1221],{"class":332},"  --tensor-parallel-size",[220,1223,1224],{"class":677}," 1",[220,1226,699],{"class":243},[220,1228,1229,1232,1235],{"class":222,"line":289},[220,1230,1231],{"class":332},"  --gpu-memory-utilization",[220,1233,1234],{"class":677}," 0.9",[220,1236,699],{"class":243},[220,1238,1239,1242],{"class":222,"line":295},[220,1240,1241],{"class":332},"  --port",[220,1243,1244],{"class":677}," 8000\n",[46,1246,1248],{"id":1247},"docker-nvidia-container-toolkit","Docker + NVIDIA Container Toolkit",[11,1250,1251,1252,1257],{},"For container-based AI workloads, use the ",[15,1253,1256],{"href":1254,"rel":1255},"https:\u002F\u002Fdocs.nvidia.com\u002Fdatacenter\u002Fcloud-native\u002Fcontainer-toolkit\u002Flatest\u002Finstall-guide.html",[19],"NVIDIA Container Toolkit",":",[210,1259,1261],{"className":212,"code":1260,"language":214,"meta":215,"style":215},"# Install NVIDIA Container Toolkit\ncurl -fsSL https:\u002F\u002Fnvidia.github.io\u002Flibnvidia-container\u002Fgpgkey | \\\n  sudo gpg --dearmor -o \u002Fusr\u002Fshare\u002Fkeyrings\u002Fnvidia-container-toolkit-keyring.gpg\n# Add repository and install\nsudo apt install -y nvidia-container-toolkit\n\n# Run GPU-accelerated container\ndocker run --gpus all nvidia\u002Fcuda:12.6.0-base-ubuntu24.04 nvidia-smi\n",[217,1262,1263,1268,1281,1298,1303,1316,1320,1325],{"__ignoreMap":215},[220,1264,1265],{"class":222,"line":223},[220,1266,1267],{"class":226},"# Install NVIDIA Container Toolkit\n",[220,1269,1270,1272,1274,1277,1279],{"class":222,"line":230},[220,1271,1048],{"class":239},[220,1273,1051],{"class":332},[220,1275,1276],{"class":250}," https:\u002F\u002Fnvidia.github.io\u002Flibnvidia-container\u002Fgpgkey",[220,1278,326],{"class":243},[220,1280,699],{"class":243},[220,1282,1283,1286,1289,1292,1295],{"class":222,"line":236},[220,1284,1285],{"class":239},"  sudo",[220,1287,1288],{"class":250}," gpg",[220,1290,1291],{"class":332}," --dearmor",[220,1293,1294],{"class":332}," -o",[220,1296,1297],{"class":250}," \u002Fusr\u002Fshare\u002Fkeyrings\u002Fnvidia-container-toolkit-keyring.gpg\n",[220,1299,1300],{"class":222,"line":257},[220,1301,1302],{"class":226},"# Add repository and install\n",[220,1304,1305,1307,1309,1311,1313],{"class":222,"line":264},[220,1306,932],{"class":239},[220,1308,935],{"class":250},[220,1310,947],{"class":250},[220,1312,950],{"class":332},[220,1314,1315],{"class":250}," nvidia-container-toolkit\n",[220,1317,1318],{"class":222,"line":270},[220,1319,261],{"emptyLinePlaceholder":260},[220,1321,1322],{"class":222,"line":284},[220,1323,1324],{"class":226},"# Run GPU-accelerated container\n",[220,1326,1327,1330,1332,1335,1338,1341],{"class":222,"line":289},[220,1328,1329],{"class":239},"docker",[220,1331,1106],{"class":250},[220,1333,1334],{"class":332}," --gpus",[220,1336,1337],{"class":250}," all",[220,1339,1340],{"class":250}," nvidia\u002Fcuda:12.6.0-base-ubuntu24.04",[220,1342,1343],{"class":250}," nvidia-smi\n",[11,1345,1346,1347,1350],{},"For running AI workloads at scale on Kubernetes, ",[15,1348,29],{"href":27,"rel":1349},[19]," provides a fully managed environment with automatic NVIDIA Device Plugin configuration, GPU resource quotas, and multi-tenant GPU sharing.",[32,1352,1354],{"id":1353},"troubleshooting","Troubleshooting",[46,1356,1358],{"id":1357},"common-issues-and-solutions","Common Issues and Solutions",[80,1360,1361,1374],{},[83,1362,1363],{},[86,1364,1365,1368,1371],{},[89,1366,1367],{},"Issue",[89,1369,1370],{},"Cause",[89,1372,1373],{},"Solution",[102,1375,1376,1391,1402,1417,1432],{},[86,1377,1378,1381,1384],{},[107,1379,1380],{},"IOMMU groups not isolated",[107,1382,1383],{},"No ACS support",[107,1385,1386,1387,1390],{},"Add ",[217,1388,1389],{},"pcie_acs_override=downstream,multifunction"," to GRUB",[86,1392,1393,1396,1399],{},[107,1394,1395],{},"Out of memory on VM start",[107,1397,1398],{},"PCIe memory pinning",[107,1400,1401],{},"Reduce VM RAM, configure hugepages",[86,1403,1404,1407,1410],{},[107,1405,1406],{},"NVIDIA Code 43 error",[107,1408,1409],{},"Driver detects virtual environment",[107,1411,1412,1413,1416],{},"Verify ",[217,1414,1415],{},"cpu: host",", use latest drivers",[86,1418,1419,1422,1425],{},[107,1420,1421],{},"GPU reset issues",[107,1423,1424],{},"Common with AMD GPUs",[107,1426,1427,1428,1431],{},"Use ",[217,1429,1430],{},"vendor-reset"," kernel module",[86,1433,1434,1437,1440],{},[107,1435,1436],{},"Audio device conflicts",[107,1438,1439],{},"GPU audio collision",[107,1441,1442,1443],{},"Blacklist ",[217,1444,1445],{},"snd_hda_intel",[210,1447,1449],{"className":212,"code":1448,"language":214,"meta":215,"style":215},"# Check IOMMU groups\nfor d in \u002Fsys\u002Fkernel\u002Fiommu_groups\u002F*\u002Fdevices\u002F*; do\n  n=${d#*\u002Fiommu_groups\u002F}; n=${n%%\u002F*}\n  printf 'IOMMU Group %s ' \"$n\"\n  lspci -nns \"${d##*\u002F}\"\ndone\n",[217,1450,1451,1456,1477,1508,1530,1546],{"__ignoreMap":215},[220,1452,1453],{"class":222,"line":223},[220,1454,1455],{"class":226},"# Check IOMMU groups\n",[220,1457,1458,1462,1465,1468,1471,1474],{"class":222,"line":230},[220,1459,1461],{"class":1460},"sd1Qi","for",[220,1463,1464],{"class":239}," d",[220,1466,1467],{"class":1460}," in",[220,1469,1470],{"class":250}," \u002Fsys\u002Fkernel\u002Fiommu_groups\u002F*\u002Fdevices\u002F*",[220,1472,1473],{"class":243},";",[220,1475,1476],{"class":1460}," do\n",[220,1478,1479,1482,1485,1488,1491,1494,1497,1500,1502,1505],{"class":222,"line":236},[220,1480,1481],{"class":239},"  n",[220,1483,1484],{"class":243},"=${",[220,1486,1487],{"class":239},"d",[220,1489,1490],{"class":243},"#*\u002F",[220,1492,1493],{"class":239},"iommu_groups",[220,1495,1496],{"class":243},"\u002F};",[220,1498,1499],{"class":239}," n",[220,1501,1484],{"class":243},[220,1503,1504],{"class":239},"n",[220,1506,1507],{"class":243},"%%\u002F*}\n",[220,1509,1510,1514,1516,1519,1522,1525,1528],{"class":222,"line":257},[220,1511,1513],{"class":1512},"sySf4","  printf",[220,1515,1130],{"class":243},[220,1517,1518],{"class":250},"IOMMU Group %s ",[220,1520,1521],{"class":243},"'",[220,1523,1524],{"class":243}," \"",[220,1526,1527],{"class":239},"$n",[220,1529,254],{"class":243},[220,1531,1532,1535,1538,1541,1543],{"class":222,"line":264},[220,1533,1534],{"class":239},"  lspci",[220,1536,1537],{"class":332}," -nns",[220,1539,1540],{"class":243}," \"${",[220,1542,1487],{"class":239},[220,1544,1545],{"class":243},"##*\u002F}\"\n",[220,1547,1548],{"class":222,"line":270},[220,1549,1550],{"class":1460},"done\n",[11,1552,163,1553,1558],{},[15,1554,1557],{"href":1555,"rel":1556},"https:\u002F\u002Fpve.proxmox.com\u002Fwiki\u002FPCI_Passthrough",[19],"Proxmox Wiki PCI Passthrough guide"," contains detailed troubleshooting procedures.",[32,1560,1562],{"id":1561},"conclusion","Conclusion",[11,1564,1565],{},"Proxmox VE GPU passthrough delivers near-bare-metal GPU performance for AI\u002FML workloads when IOMMU and VFIO are properly configured. Modern hardware and driver maturity have significantly simplified the setup process.",[11,1567,1568,1569,1572],{},"For running GPU-accelerated AI workloads at production scale on Kubernetes, ",[15,1570,29],{"href":27,"rel":1571},[19]," is the optimal platform. Combine Proxmox GPU passthrough with Kubo's fully managed K8s to build scalable AI infrastructure.",[11,1574,1575,1576,1581],{},"For consultation on GPU passthrough environment design, ",[15,1577,1580],{"href":1578,"rel":1579},"https:\u002F\u002Fwww.hexabase.com\u002Fcontact-us\u002F",[19],"contact us"," to discuss your requirements.",[11,1583,1584],{},[65,1585,1586],{},"Related Links:",[59,1588,1589,1595,1601,1608,1614,1620,1625],{},[62,1590,1591],{},[15,1592,1594],{"href":1555,"rel":1593},[19],"Proxmox PCI Passthrough Wiki",[62,1596,1597],{},[15,1598,1600],{"href":166,"rel":1599},[19],"Proxmox Forum: GPU Passthrough Guide",[62,1602,1603],{},[15,1604,1607],{"href":1605,"rel":1606},"https:\u002F\u002Fdeveloper.nvidia.com\u002Fcuda-toolkit",[19],"NVIDIA CUDA Toolkit",[62,1609,1610],{},[15,1611,1613],{"href":1030,"rel":1612},[19],"Ollama Official Site",[62,1615,1616],{},[15,1617,1619],{"href":1161,"rel":1618},[19],"vLLM Documentation",[62,1621,1622],{},[15,1623,1256],{"href":1254,"rel":1624},[19],[62,1626,1627],{},[15,1628,1630],{"href":642,"rel":1629},[19],"Kubo Blog",[1632,1633,1634],"style",{},"html pre.shiki code .sbD-w, html code.shiki .sbD-w{--shiki-default:#51597D;--shiki-default-font-style:italic}html pre.shiki code .sE3pS, html code.shiki .sE3pS{--shiki-default:#C0CAF5}html pre.shiki code .sAklC, html code.shiki .sAklC{--shiki-default:#89DDFF}html pre.shiki code .sPY7s, html code.shiki .sPY7s{--shiki-default:#9ECE6A}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html pre.shiki code .sT800, html code.shiki .sT800{--shiki-default:#E0AF68}html pre.shiki code .sOJ5S, html code.shiki .sOJ5S{--shiki-default:#FF9E64}html pre.shiki code .sd1Qi, html code.shiki .sd1Qi{--shiki-default:#BB9AF7}html pre.shiki code .sySf4, html code.shiki .sySf4{--shiki-default:#0DB9D7}",{"title":215,"searchDepth":230,"depth":230,"links":1636},[1637,1641,1647,1652,1657,1660],{"id":34,"depth":230,"text":35,"children":1638},[1639,1640],{"id":48,"depth":236,"text":49},{"id":77,"depth":236,"text":78},{"id":172,"depth":230,"text":173,"children":1642},[1643,1644,1645,1646],{"id":176,"depth":236,"text":177},{"id":204,"depth":236,"text":205},{"id":349,"depth":236,"text":350},{"id":465,"depth":236,"text":466},{"id":648,"depth":230,"text":649,"children":1648},[1649,1650,1651],{"id":652,"depth":236,"text":653},{"id":859,"depth":236,"text":860},{"id":908,"depth":236,"text":909},{"id":1020,"depth":230,"text":1021,"children":1653},[1654,1655,1656],{"id":1024,"depth":236,"text":1025},{"id":1154,"depth":236,"text":1155},{"id":1247,"depth":236,"text":1248},{"id":1353,"depth":230,"text":1354,"children":1658},[1659],{"id":1357,"depth":236,"text":1358},{"id":1561,"depth":230,"text":1562},"2026-05-27","Complete guide to NVIDIA GPU passthrough on Proxmox VE for AI\u002FML workloads, covering IOMMU, VFIO configuration, and running Ollama and vLLM inference.","md","en",{},"\u002Fblog\u002Fen\u002Fproxmox-gpu-passthrough-ai-workloads",{"title":5,"description":1662},"blog\u002Fen\u002Fproxmox-gpu-passthrough-ai-workloads",[1670,94,1671,1672,1673,1674,1675,1676],"Proxmox","Passthrough","AI","Machine Learning","NVIDIA","VFIO","IOMMU","AlmRAcniHMU3oj7ie8Qs6Qe25yb1V2BnLLpYo1iP6d4",1779964619106]