[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [cdi-devel] [PATCH v2 7/9] ehci: Add EHCI driver



On Thu, Apr 16, 2015 at 12:13:56AM +0200, Max Reitz wrote:
> + This adds an EHCI USB host controller driver. It is missing support
>   for periodic schedule (interrupt and isochronous transfers),
>   hotplugging, suspend/resume, etc. pp..
> 
> Signed-off-by: Max Reitz <max@xxxxxxxxxx>
> ---
>  ehci/ehci.c | 661 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  ehci/ehci.h | 170 ++++++++++++++++
>  ehci/main.c |  68 +++++++
>  3 files changed, 899 insertions(+)
>  create mode 100644 ehci/ehci.c
>  create mode 100644 ehci/ehci.h
>  create mode 100644 ehci/main.c
> 
> diff --git a/ehci/ehci.c b/ehci/ehci.c
> new file mode 100644
> index 0000000..6da316e
> --- /dev/null
> +++ b/ehci/ehci.c
> @@ -0,0 +1,661 @@
> +/******************************************************************************
> + * Copyright (c) 2015 Max Reitz                                               *
> + *                                                                            *
> + * Permission is hereby granted,  free of charge,  to any person  obtaining a *
> + * copy of this software and associated documentation files (the "Software"), *
> + * to deal in the Software without restriction,  including without limitation *
> + * the rights to use, copy,  modify, merge, publish,  distribute, sublicense, *
> + * and/or  sell copies of the  Software,  and to permit  persons to whom  the *
> + * Software is furnished to do so, subject to the following conditions:       *
> + *                                                                            *
> + * The above copyright notice and this permission notice shall be included in *
> + * all copies or substantial portions of the Software.                        *
> + *                                                                            *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
> + * IMPLIED,  INCLUDING BUT NOT LIMITED TO THE WARRANTIES  OF MERCHANTABILITY, *
> + * FITNESS  FOR A PARTICULAR  PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL *
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
> + * LIABILITY,  WHETHER IN AN ACTION OF CONTRACT,  TORT OR OTHERWISE,  ARISING *
> + * FROM,  OUT OF  OR IN CONNECTION  WITH  THE SOFTWARE  OR THE  USE OR  OTHER *
> + * DEALINGS IN THE SOFTWARE.                                                  *
> + ******************************************************************************/
> +
> +#include <assert.h>
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <cdi.h>
> +#include <cdi/lists.h>
> +#include <cdi/misc.h>
> +#include <cdi/pci.h>
> +#include <cdi/usb.h>
> +#include <cdi/usb_hcd.h>
> +
> +#include "ehci.h"
> +
> +
> +#define EHCI_TIMEOUT 1000 // ms
> +
> +
> +static bool bios_handoff(struct cdi_pci_device *pci_dev, struct ehci *hc);
> +static void irq_handler(struct cdi_device *dev);
> +
> +
> +struct cdi_device *ehci_init_device(struct cdi_bus_data *bus_data)
> +{
> +    struct cdi_pci_device *pci_dev = (struct cdi_pci_device *)bus_data;
> +    static int dev_counter = 0;
> +
> +    if (pci_dev->class_id     != 0x0c ||
> +        pci_dev->subclass_id  != 0x03 ||
> +        pci_dev->interface_id != 0x20)
> +    {
> +        return NULL;
> +    }

Magic numbers?

> +
> +    struct ehci *hc = calloc(1, sizeof(*hc));
> +
> +    hc->hc.dev.name = malloc(8);
> +    sprintf((char *)hc->hc.dev.name, "ehci%i", dev_counter++);
> +
> +    hc->retired_qhs = cdi_list_create();
> +
> +    // Enable bus mastering
> +    cdi_pci_config_writew(pci_dev, 4, cdi_pci_config_readw(pci_dev, 4) | 0x6);

This one as well.

I also thought we had specified somewhere that bus mastering must be
enabled by the CDI library, but I can't find it. Maybe we haven't.

> +
> +    cdi_pci_alloc_memory(pci_dev);
> +
> +    struct cdi_pci_resource *res;
> +    for (int i = 0; (res = cdi_list_get(pci_dev->resources, i)); i++) {
> +        if (res->type == CDI_PCI_MEMORY && !res->index) {
> +            hc->caps = res->address;
> +            hc->regs = (struct ehci_registers *)((uintptr_t)res->address +
> +                                                 hc->caps->caplength);
> +        }
> +    }

Check that hc->caps is set and fail otherwise.

> +
> +    if (!bios_handoff(pci_dev, hc)) {
> +        goto fail;
> +    }
> +
> +    // Stop operation
> +    hc->regs->usbcmd = (hc->regs->usbcmd & 0xff00ff00u) | 0x00080010u;

Right, because these are obvious and nice numbers. :-)

(Won't mention it everywhere, but there are _lots_ of magic numbers in
this patch that should be replaced by defines.)

> +    // Wait until HCHALTED becomes true
> +    int timeout_counter = 0;
> +    while (!(hc->regs->usbsts & (1 << 12)) && ++timeout_counter < 50) {
> +        cdi_sleep_ms(1);
> +    }
> +    if (timeout_counter >= 50) {
> +        goto fail;
> +    }

This is pretty common pattern. Worth providing a function/macro for it,
possiby even in the CDI headers?

> +    hc->regs->usbcmd |= (1 << 1); // Reset
> +    timeout_counter = 0;
> +    while ((hc->regs->usbcmd & (1 << 1)) && ++timeout_counter < 50) {
> +        cdi_sleep_ms(1);
> +    }
> +    if (timeout_counter >= 50) {
> +        goto fail;
> +    }
> +
> +    if (hc->caps->hccparams & 1) {
> +        // 64-bit addressing capability
> +        hc->regs->ctrldssegment = 0;
> +        mem_barrier();

Make it cdi_barrier() and move it to cdi/misc.h (I would avoid calling
it a "memory" barrier because that could imply that CPU caches are
flushed, which is not the case; it's just a compiler barrier). This is
definitely a function that other drivers can use.

That said, with regs being volatile, what does the barrier achieve here?
Except when it's really obvious, comments are appropriate to explain the
purpose of each barrier.

> +    }
> +
> +    // async advance doorbell
> +    hc->regs->usbintr = (1 << 5);
> +    hc->regs->usbsts  = hc->regs->usbsts;
> +    cdi_register_irq(pci_dev->irq, &irq_handler, &hc->hc.dev);
> +
> +    hc->periodic_list_mem = cdi_mem_alloc(4096, 12
> +                                                | CDI_MEM_PHYS_CONTIGUOUS
> +                                                | CDI_MEM_DMA_4G
> +                                                | CDI_MEM_NOINIT);
> +    hc->periodic_list = hc->periodic_list_mem->vaddr;
> +    for (int i = 0; i < 1024; i++) {
> +        hc->periodic_list[i] = EHCI_T;
> +    }
> +    mem_barrier();
> +    hc->regs->periodiclistbase = hc->periodic_list_mem->paddr.items[0].start;

The same question here because periodic_list is declared as volatile.
The difference is that I'm actually not sure whether it should be
volatile or whether relying on the barriers isn't better. 

> +
> +    // Create anchor
> +    struct cdi_mem_area *anchor_qh;
> +    anchor_qh = cdi_mem_alloc(sizeof(ehci_fat_qh_t), 6
> +                                                     | CDI_MEM_PHYS_CONTIGUOUS
> +                                                     | CDI_MEM_DMA_4G
> +                                                     | CDI_MEM_NOINIT);
> +
> +    hc->async_start = anchor_qh->vaddr;
> +    memset(hc->async_start, 0, sizeof(*hc->async_start));
> +
> +    hc->async_start->cdi_mem_area = anchor_qh;
> +    hc->async_start->paddr = anchor_qh->paddr.items[0].start
> +                           + offsetof(ehci_fat_qh_t, qh);
> +
> +    hc->async_start->next = hc->async_start;
> +    hc->async_start->previous = hc->async_start;
> +
> +    hc->async_start->qh.next_qh = hc->async_start->paddr | EHCI_QH;
> +    hc->async_start->qh.ep_state[0] = 0x0008e000u;
> +    hc->async_start->qh.ep_state[1] = 0x4000ff00u;
> +
> +    hc->async_start->qh.overlay.next_qtd = EHCI_T;
> +    hc->async_start->qh.overlay.alt_next_qtd = EHCI_T;
> +
> +    hc->regs->asynclistaddr = hc->async_start->paddr;
> +
> +    mem_barrier();
> +
> +    // Start operation
> +    hc->regs->usbcmd = (hc->regs->usbcmd & 0xff00ff00u) | 0x00080031u;
> +
> +    // Wait until HCHalted becomes false and the schedules are running
> +    timeout_counter = 0;
> +    while ((hc->regs->usbsts & 0xd000) != 0xc000 && ++timeout_counter < 50) {
> +        cdi_sleep_ms(1);
> +    }
> +    if (timeout_counter >= 50) {
> +        goto fail;
> +    }
> +
> +    // Route everything to this HC
> +    hc->regs->configflag |= 1;
> +
> +    mem_barrier();
> +    cdi_sleep_ms(5);
> +
> +    hc->hc.rh.ports = hc->caps->hcsparams & 0xf;
> +
> +    // Power up all ports
> +    for (int i = 0; i < hc->hc.rh.ports; i++) {
> +        hc->regs->portsc[i] |= EHCI_PSC_PP;
> +    }
> +
> +    mem_barrier();

This one looks really random.

> +
> +    return &hc->hc.dev;
> +
> +fail:
> +    if (hc->async_start) {
> +        cdi_mem_free(hc->async_start->cdi_mem_area);
> +    }
> +    if (hc->periodic_list_mem) {
> +        cdi_mem_free(hc->periodic_list_mem);
> +    }
> +    free((void *)hc->hc.dev.name);
> +    free(hc);
> +    cdi_pci_free_memory(pci_dev);

retired_qhs is leaked.

It seems that other drivers have the same problem, but it would be nice to
unregister the IRQ. (For microkernels, the problem probably isn't serious
because the process ends and the kernel should clean up, but for
monolithic ones a system crash might not be far away.)

> +    return NULL;
> +}
> +
> +
> +static bool bios_handoff(struct cdi_pci_device *pci_dev, struct ehci *hc)
> +{
> +    int eecp = (hc->caps->hccparams >> 8) & 0xff;
> +
> +    // Iterate through the list of capabilities
> +    while (eecp >= 0x40) {
> +        uint32_t cap = cdi_pci_config_readl(pci_dev, eecp);
> +
> +        // Look for USBLEGSUP
> +        if ((cap & 0xff) == 0x01) {
> +            // Set OS semaphore
> +            cdi_pci_config_writeb(pci_dev, eecp + 3, 1);
> +
> +            // Wait for BIOS semaphore to get unset
> +            int timeout_counter = 0;
> +            while ((cdi_pci_config_readb(pci_dev, eecp + 2) & 1) &&
> +                   ++timeout_counter < 1000)
> +            {
> +                cdi_sleep_ms(1);
> +            }
> +            if (timeout_counter >= 1000) {
> +                return false;
> +            }
> +        }
> +
> +        eecp = (cap >> 8) & 0xff;
> +    }
> +
> +    return true;
> +}
> +
> +
> +static void irq_handler(struct cdi_device *dev)
> +{
> +    struct cdi_usb_hc *cdi_hc = CDI_UPCAST(dev, dev, struct cdi_usb_hc);
> +    struct ehci *hc = CDI_UPCAST(cdi_hc, hc, struct ehci);
> +
> +    uint32_t usbsts = hc->regs->usbsts;
> +    mem_barrier();
> +    hc->regs->usbsts = usbsts;
> +
> +    // async advance doorbell
> +    if (!(usbsts & (1 << 5))) {
> +        return;
> +    }
> +
> +    ehci_fat_qh_t *rqh;
> +    while ((rqh = cdi_list_pop(hc->retired_qhs))) {
> +        cdi_mem_free(rqh->cdi_mem_area);
> +    }
> +}

As always, I don't really know what I'm doing, so let me ask a stupid
question:

You use a list here, so I assume that multiple QHs can be retired. You
don't immediately free them but only after getting a doorbell IRQ, so I
assume that they are still in use by the hardware until then.

Does the hardware complete the use of all retired QHs at once, or what
does ensure that we don't free all retired QHs when the hardware is only
done with the first one?

(Disclaimer: I don't really know what a QH is.)

> +
> +void ehci_rh_port_down(struct cdi_usb_hub *hub, int index)
> +{
> +    struct cdi_usb_hc *cdi_hc = CDI_UPCAST(hub, rh, struct cdi_usb_hc);
> +    struct ehci *hc = CDI_UPCAST(cdi_hc, hc, struct ehci);
> +
> +    hc->regs->portsc[index] &= ~EHCI_PSC_PED;
> +
> +    mem_barrier();
> +}
> +
> +
> +void ehci_rh_port_up(struct cdi_usb_hub *hub, int index)
> +{
> +    struct cdi_usb_hc *cdi_hc = CDI_UPCAST(hub, rh, struct cdi_usb_hc);
> +    struct ehci *hc = CDI_UPCAST(cdi_hc, hc, struct ehci);
> +    uint32_t portsc = hc->regs->portsc[index];
> +
> +    if ((portsc & EHCI_PSC_LS_MASK) == EHCI_PSC_LS_K) {
> +        // Low-speed device, release ownership
> +        hc->regs->portsc[index] = portsc | EHCI_PSC_PO;
> +        mem_barrier();
> +        return;
> +    }
> +
> +    // Assert reset signal
> +    hc->regs->portsc[index] = ((portsc & ~(EHCI_PSC_PO | EHCI_PSC_SUS |
> +                                           EHCI_PSC_FPR | EHCI_PSC_PED))
> +                                       | EHCI_PSC_RES);
> +    mem_barrier();
> +    cdi_sleep_ms(50);
> +
> +    // De-assert reset signal
> +    int timeout_counter = 0;
> +    hc->regs->portsc[index] = portsc & ~EHCI_PSC_RES;
> +    do {
> +        portsc = hc->regs->portsc[index];
> +        cdi_sleep_ms(1);
> +    } while ((portsc & EHCI_PSC_RES) && ++timeout_counter < 50);
> +
> +    if (portsc & EHCI_PSC_RES) {
> +        // Disable port, route on to companion controller
> +        // (maybe it can do at least something with it)
> +        hc->regs->portsc[index] = (portsc & ~EHCI_PSC_PED) | EHCI_PSC_PO;
> +    } else if ((portsc & (EHCI_PSC_PED | EHCI_PSC_CCS)) == EHCI_PSC_CCS) {
> +        // Device connected but port disabled:
> +        // Full-speed device, release ownership
> +        hc->regs->portsc[index] = portsc | EHCI_PSC_PO;
> +    }
> +
> +    mem_barrier();
> +}
> +
> +
> +cdi_usb_port_status_t ehci_rh_port_status(struct cdi_usb_hub *hub, int index)
> +{
> +    struct cdi_usb_hc *cdi_hc = CDI_UPCAST(hub, rh, struct cdi_usb_hc);
> +    struct ehci *hc = CDI_UPCAST(cdi_hc, hc, struct ehci);
> +    uint32_t portsc = hc->regs->portsc[index];
> +    cdi_usb_port_status_t status = 0;
> +
> +    if ((portsc & (EHCI_PSC_PED | EHCI_PSC_CCS)) ==
> +                  (EHCI_PSC_PED | EHCI_PSC_CCS))
> +    {
> +        // Device connected and port enabled
> +        status |= CDI_USB_PORT_CONNECTED | CDI_USB_HIGH_SPEED;
> +    }
> +
> +    return status;
> +}
> +
> +
> +static void enter_async_qh(struct ehci *hc, ehci_fat_qh_t *qh)
> +{
> +    // Enter the new QH as last
> +
> +    qh->next = hc->async_start;
> +    qh->previous = hc->async_start->previous;
> +
> +    qh->previous->next = qh;
> +    hc->async_start->previous = qh;

Shouldn't the barrier actually be here so that the hardware is
guaranteed to see the new links instead of some intermediate state when
qh->qh.next_qh is updated?

> +    qh->qh.next_qh = hc->async_start->paddr | EHCI_QH;
> +    mem_barrier();
> +
> +    qh->previous->qh.next_qh = qh->paddr | EHCI_QH;
> +    mem_barrier();
> +}
> +
> +
> +static void retire_qh(struct ehci *hc, ehci_fat_qh_t *qh)
> +{
> +    qh->previous->qh.next_qh = qh->qh.next_qh;
> +    mem_barrier();
> +
> +    qh->previous->next = qh->next;
> +    qh->next->previous = qh->previous;
> +
> +    cdi_list_push(hc->retired_qhs, qh);
> +    // Request async advance doorbell
> +    hc->regs->usbcmd |= (1 << 6);
> +}
> +
> +
> +static void retire_qtd(ehci_fat_qtd_t *qtd)
> +{
> +    ehci_fat_qh_t *qh = qtd->qh;
> +
> +    if (qtd->qtd.status & 0x7c) {
> +        if (qtd->qtd.status & (1 << 4)) {
> +            *qtd->result = CDI_USB_BABBLE;
> +        } else if (qtd->qtd.status & (1 << 3)) {
> +            *qtd->result = CDI_USB_BAD_RESPONSE;
> +        } else if (qtd->qtd.status & ((1 << 5) | (1 << 2))) {
> +            *qtd->result = CDI_USB_HC_ERROR;
> +        } else if (qtd->qtd.status & (1 << 6)) {
> +            *qtd->result = CDI_USB_STALL;
> +        } else {
> +            *qtd->result = CDI_USB_HC_ERROR;
> +        }
> +    }
> +
> +    if (qtd->write_back) {
> +        size_t rem = qtd->write_back_size;
> +        for (int i = 0; i < 5 && rem; i++) {
> +            size_t sz = rem > 4096 ? 4096 : rem;
> +            memcpy((void *)((uintptr_t)qtd->write_back + i * 4096),
> +                   qtd->buffers[i]->vaddr, sz);
> +            rem -= sz;
> +        }
> +        assert(!rem);
> +    }
> +
> +    qh->first_qtd = qtd->next;
> +    if (qh->last_qtd == qtd) {
> +        qh->last_qtd = NULL;
> +    }
> +
> +    for (int i = 0; i < 5; i++) {
> +        if (qtd->buffers[i]) {
> +            cdi_mem_free(qtd->buffers[i]);
> +        }
> +    }
> +
> +    cdi_mem_free(qtd->cdi_mem_area);
> +}
> +
> +
> +static void enter_qtd(ehci_fat_qh_t *qh, ehci_fat_qtd_t *qtd)
> +{
> +    bool first;
> +
> +    qtd->qh = qh;
> +    qtd->qtd.next_qtd = EHCI_T;
> +    qtd->qtd.alt_next_qtd = EHCI_T;
> +    mem_barrier();
> +
> +    while (qh->first_qtd) {
> +        /* Break if an active qTD is encountered */
> +        if (qh->first_qtd->qtd.status & (1 << 7)) {
> +            break;
> +        }
> +
> +        retire_qtd(qh->first_qtd);
> +    }
> +
> +    first = !qh->first_qtd;
> +
> +    // Note: Race condition (a qTD might retire here and we are thus appending
> +    // the one to be queued to an inactive qTD); it's fine as long as we are
> +    // constantly rescanning the queue (here and in ehci_wait_transaction())
> +
> +    if (first) {
> +        qh->first_qtd = qtd;
> +    } else {
> +        qh->last_qtd->next = qtd;
> +    }
> +    qh->last_qtd = qtd;
> +
> +    if (!first) {
> +        qh->last_qtd->qtd.next_qtd = qtd->paddr;
> +        mem_barrier();
> +    }
> +
> +    // If the QH is not active, it's done with its queue of qTDs and we should
> +    // continue from this qTD
> +    if (!(qh->qh.overlay.status & (1 << 7))) {
> +        qh->qh.overlay.next_qtd = qh->first_qtd->paddr;
> +        mem_barrier();
> +    }
> +}
> +
> +
> +cdi_usb_hc_transaction_t ehci_create_transaction(struct cdi_usb_hc *cdi_hc,
> +    const struct cdi_usb_hc_ep_info *target)
> +{
> +    struct ehci *hc = CDI_UPCAST(cdi_hc, hc, struct ehci);
> +    struct cdi_mem_area *qh_mem;
> +
> +    qh_mem = cdi_mem_alloc(sizeof(ehci_fat_qh_t), 6
> +                                                  | CDI_MEM_PHYS_CONTIGUOUS
> +                                                  | CDI_MEM_DMA_4G
> +                                                  | CDI_MEM_NOINIT);
> +    ehci_fat_qh_t *qh = qh_mem->vaddr;
> +
> +    memset(qh, 0, sizeof(*qh));
> +
> +    int non_high_speed_control = target->speed != CDI_USB_HIGH_SPEED
> +                                 && target->ep_type == CDI_USB_CONTROL;
> +
> +    qh->qh.ep_state[0] = target->dev | (target->ep << 8) | (1 << 14)
> +                       | (target->mps << 16) | (non_high_speed_control << 27);
> +    switch (target->speed) {
> +        case CDI_USB_LOW_SPEED:  qh->qh.ep_state[0] |= (1 << 12); break;
> +        case CDI_USB_FULL_SPEED: qh->qh.ep_state[0] |= (0 << 12); break;
> +        case CDI_USB_HIGH_SPEED: qh->qh.ep_state[0] |= (2 << 12); break;
> +        case CDI_USB_SUPERSPEED:
> +        default:
> +            abort();
> +    }
> +
> +    qh->qh.ep_state[1] = (0xff << 8) | (1 << 30) | (target->tt_addr << 16)
> +                       | ((target->tt_port + 1) << 23);
> +
> +    qh->qh.overlay.next_qtd = EHCI_T;
> +    qh->qh.overlay.alt_next_qtd = EHCI_T;
> +
> +    qh->cdi_mem_area = qh_mem;
> +    qh->paddr = qh_mem->paddr.items[0].start + offsetof(ehci_fat_qh_t, qh);
> +
> +    mem_barrier();
> +
> +    enter_async_qh(hc, qh);
> +
> +    return qh;
> +}
> +
> +
> +static void create_qtd(ehci_fat_qh_t *qh, cdi_usb_transfer_token_t token,
> +                       int toggle, void *buffer, size_t size,
> +                       cdi_usb_transmission_result_t *result)
> +{
> +    struct cdi_mem_area *qtd_mem;
> +
> +    qtd_mem = cdi_mem_alloc(sizeof(ehci_fat_qtd_t), 6
> +                                                    | CDI_MEM_PHYS_CONTIGUOUS
> +                                                    | CDI_MEM_DMA_4G
> +                                                    | CDI_MEM_NOINIT);
> +    ehci_fat_qtd_t *qtd = qtd_mem->vaddr;
> +
> +    memset(qtd, 0, sizeof(*qtd));
> +
> +    qtd->qtd.status = (1 << 7) | (3 << 10) | (size << 16)
> +                    | ((uint32_t)toggle << 31);
> +
> +    switch (token) {
> +        case CDI_USB_IN:    qtd->qtd.status |= (1 << 8); break;
> +        case CDI_USB_OUT:   qtd->qtd.status |= (0 << 8); break;
> +        case CDI_USB_SETUP: qtd->qtd.status |= (2 << 8); break;
> +        default:
> +            abort();
> +    }
> +
> +    if (token == CDI_USB_IN) {
> +        qtd->write_back = buffer;
> +        qtd->write_back_size = size;
> +    }
> +
> +    for (int i = 0; i < 5 && size; i++) {
> +        size_t sz = size > 4096 ? 4096 : size;
> +
> +        qtd->buffers[i] = cdi_mem_alloc(4096, 12
> +                                              | CDI_MEM_PHYS_CONTIGUOUS
> +                                              | CDI_MEM_DMA_4G
> +                                              | CDI_MEM_NOINIT);
> +        qtd->qtd.buffers[i] = qtd->buffers[i]->paddr.items[0].start;
> +
> +        if (token == CDI_USB_OUT || token == CDI_USB_SETUP) {
> +            memcpy(qtd->buffers[i]->vaddr,
> +                   (void *)((uintptr_t)buffer + i * 4096), sz);
> +        }
> +
> +        size -= sz;
> +    }
> +
> +    qtd->cdi_mem_area = qtd_mem;
> +    qtd->paddr = qtd_mem->paddr.items[0].start + offsetof(ehci_fat_qtd_t, qtd);
> +
> +    qtd->result = result;
> +
> +    mem_barrier();
> +
> +    enter_qtd(qh, qtd);
> +}
> +
> +
> +void ehci_enqueue(struct cdi_usb_hc *cdi_hc,
> +                  const struct cdi_usb_hc_transmission *trans)
> +{
> +    (void)cdi_hc;
> +
> +    ehci_fat_qh_t *qh = trans->ta;
> +    bool first = true;
> +    size_t sz = trans->size;
> +    void *buf = trans->buffer;

Make it uint8_t* to save some casts?

> +    int toggle = trans->toggle;
> +    size_t mps = (qh->qh.ep_state[0] >> 16) & 0x7ff;
> +
> +    *trans->result = CDI_USB_OK;
> +
> +    while (sz || first) {
> +        first = false;
> +
> +        size_t iter_sz = sz > 4096 * 5 ? 4096 * 5 : sz;
> +
> +        create_qtd(qh, trans->token, toggle, buf, iter_sz, trans->result);
> +
> +        buf = (void *)((uintptr_t)buf + iter_sz);
> +        sz -= iter_sz;
> +
> +        toggle ^= ((iter_sz + mps - 1) / mps) & 1;
> +    }
> +}
> +
> +
> +void ehci_wait_transaction(struct cdi_usb_hc *cdi_hc,
> +                           cdi_usb_hc_transaction_t ta)
> +{
> +    struct ehci *hc = CDI_UPCAST(cdi_hc, hc, struct ehci);
> +    ehci_fat_qh_t *qh = ta;
> +    bool had_timeout = false;
> +
> +    irq_handler(&cdi_hc->dev);
> +
> +    while (qh->first_qtd) {
> +        if (!had_timeout) {
> +            if (!(qh->qh.overlay.status & (1 << 7))) {
> +                qh->qh.overlay.next_qtd = qh->first_qtd->paddr;
> +                mem_barrier();
> +            }
> +
> +            int frame_counter = 0, lframe = (hc->regs->frindex >> 3) % 1024;
> +
> +            while ((qh->first_qtd->qtd.status & (1 << 7)) &&
> +                   frame_counter < EHCI_TIMEOUT)
> +            {
> +                int cframe = (hc->regs->frindex >> 3) % 1024;
> +                frame_counter += (cframe + 1024 - lframe) % 1024;
> +                lframe = cframe;
> +
> +                // I don't really know who's to blame for this (qemu or
> +                // týndur), and actually, I don't even want to know.
> +                // As a matter of fact, qemu will not work on the async
> +                // schedule if the async advance doorbell is set but the driver
> +                // has not acknowledged this; however, the interrupt handler
> +                // will always clear that bit and the interrupt handler is
> +                // active all the time, so something is apparently sometimes
> +                // blocking the handler from being executed. Force it here to
> +                // prevent qemu's HC from stalling.
> +                irq_handler(&cdi_hc->dev);
> +            }
> +
> +            had_timeout = frame_counter >= EHCI_TIMEOUT;
> +        }
> +
> +        if (had_timeout) {
> +            // Stop this queue
> +            qh->qh.overlay.next_qtd = EHCI_T;
> +            qh->qh.overlay.status &= ~(1u << 7);
> +
> +            *qh->first_qtd->result = CDI_USB_TIMEOUT;
> +        }
> +
> +        retire_qtd(qh->first_qtd);
> +    }
> +}
> +
> +
> +void ehci_destroy_transaction(struct cdi_usb_hc *cdi_hc,
> +                              cdi_usb_hc_transaction_t ta)
> +{
> +    struct ehci *hc = CDI_UPCAST(cdi_hc, hc, struct ehci);
> +    ehci_fat_qh_t *qh = ta;
> +
> +    // TODO: Does this work without having called ehci_wait_transaction()
> +    // before?

Is this TODO intentionally left unresolved?

> +    qh->qh.overlay.next_qtd = EHCI_T;
> +
> +    int frame_counter = 0, lframe = (hc->regs->frindex >> 3) % 1024;
> +
> +    while ((qh->qh.overlay.status & (1 << 7)) && frame_counter < EHCI_TIMEOUT) {
> +        int cframe = (hc->regs->frindex >> 3) % 1024;
> +        frame_counter += (cframe + 1024 - lframe) % 1024;
> +        lframe = cframe;
> +
> +        // See above.
> +        irq_handler(&cdi_hc->dev);
> +    }
> +
> +    if (frame_counter >= EHCI_TIMEOUT) {
> +        qh->qh.overlay.status &= ~(1 << 7);
> +        // Could probably be solved more elegantly, but we are in a timeout
> +        // already, so waiting a bit longer will not be that big of a problem
> +        // (the sleep is here to ensure that the HC is no longer trying to work
> +        // on the qTDs belonging to this QH)
> +        cdi_sleep_ms(100);
> +    }
> +
> +    while (qh->first_qtd) {
> +        retire_qtd(qh->first_qtd);
> +    }
> +    retire_qh(hc, qh);
> +}

Well, that's what I can say without actually understanding the hardware.
I hope it's better than nothing.

Kevin