Goto sanos source index

//
// tcp_output.c
//
// Transmission Control Protocol (TCP)
//
// Copyright (C) 2002 Michael Ringgaard. All rights reserved.
// Portions Copyright (C) 2001, Swedish Institute of Computer Science.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 
// 1. Redistributions of source code must retain the above copyright 
//    notice, this list of conditions and the following disclaimer.  
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.  
// 3. Neither the name of the project nor the names of its contributors
//    may be used to endorse or promote products derived from this software
//    without specific prior written permission. 
// 
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
// SUCH DAMAGE.
// 

#include <net/net.h>

#define MIN(x,y) ((x) < (y) ? (x): (y))

static void tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb);
static err_t tcp_send_ack(struct tcp_pcb *pcb);

err_t tcp_send_ctrl(struct tcp_pcb *pcb, int flags) {
  //kprintf("tcp_send_ctrl: sending flags (");
  //tcp_debug_print_flags(flags);
  //kprintf(")\n");

  return tcp_enqueue(pcb, NULL, 0, flags, NULL, 0);
}

err_t tcp_write(struct tcp_pcb *pcb, const void *data, int len, int opt) {
  int rc;

  if (pcb->state == SYN_SENT || pcb->state == SYN_RCVD || pcb->state == ESTABLISHED || pcb->state == CLOSE_WAIT) {
    if (len > 0) {
      rc = tcp_enqueue(pcb, (void *) data, len, 0, NULL, 0);
      if (rc < 0) return rc;
    }

    if (opt == TCP_WRITE_FLUSH) {
      tcp_output(pcb);
    } else if (opt == TCP_WRITE_NAGLE) {
      // This is the Nagle algorithm (RFC 896): inhibit the sending of new TCP
      // segments when new outgoing data arrives from the user if any
      // previously transmitted data on the connection remains
      // unacknowledged.
      if (pcb->unacked == NULL) tcp_output(pcb);
    }

    return 0;
  } else  {
    return -ENOTCONN;
  }
}

err_t tcp_enqueue(struct tcp_pcb *pcb, void *data, int len, int flags, unsigned char *optdata, int optlen) {
  struct pbuf *p;
  struct tcp_seg *seg, *useg, *queue;
  unsigned long seqno;
  int left;
  int buflen;
  int seglen;
  int size;
  void *ptr;
  int queuelen;

  left = len;
  ptr = data;
  
  if (len > pcb->snd_buf) {
    kprintf(KERN_ERR "tcp_enqueue: too much data %d\n", len);
    return -ENOMEM;
  }

  seqno = pcb->snd_lbb;
  
  queue = NULL;
  queuelen = pcb->snd_queuelen;
  if (queuelen >= TCP_SND_QUEUELEN) {
    kprintf(KERN_ERR "tcp_enqueue: too long queue %d (max %d)\n", queuelen, TCP_SND_QUEUELEN);
    goto memerr;
  }
  
  // Fill last pbuf of the last segment on the unsent queue
  if (optlen == 0 && flags == 0) {
    // Go to the last segment on the unsent queue
    if (pcb->unsent == NULL) {
      useg = NULL;
    } else {
      for (useg = pcb->unsent; useg->next != NULL; useg = useg->next);
    }

    if (useg != NULL) {
      p = useg->p;
      while (p->next) p = p->next;

      buflen = pbuf_spare(p);
      if (buflen > left) buflen = left;
      if (useg->len + buflen > pcb->mss) buflen = pcb->mss - useg->len;

      if (buflen > 0) {
        //kprintf("tcp_enqueue: add %d bytes to segment\n", buflen);
        memcpy((char *) p->payload + p->len, ptr, buflen);
        p->len += buflen;
        useg->p->tot_len += buflen;
        useg->len += buflen;
        seqno += buflen;
        left -= buflen;
        ptr = (void *) ((char *) ptr + buflen);
      }
    }
  }

  // Split rest of data into segments
  seg = NULL;
  seglen = 0;
  if (left > 0 || optlen > 0 || flags) {
    while (queue == NULL || left > 0) {
      seglen = (left > pcb->mss ? pcb->mss : left);

      // Allocate memory for tcp_seg, and fill in fields
      seg = (struct tcp_seg *) kmalloc(sizeof(struct tcp_seg));
      if (seg == NULL) {
        kprintf(KERN_ERR "tcp_enqueue: could not allocate memory for tcp_seg\n");
        goto memerr;
      }
      seg->next = NULL;
      seg->p = NULL;

      if (queue == NULL) {
        queue = seg;
      } else {
        for (useg = queue; useg->next != NULL; useg = useg->next);
        useg->next = seg;
      }
    
      // If copy is set, memory should be allocated
      // and data copied into pbuf, otherwise data comes from
      // ROM or other static memory, and need not be copied. If
      // optdata is != NULL, we have options instead of data
      if (optdata != NULL) {
        if ((seg->p = pbuf_alloc(PBUF_TRANSPORT, optlen, PBUF_RW)) == NULL) goto memerr;
        queuelen++;
        seg->dataptr = (char *) seg->p->payload + optlen;
      } else {
        size = seglen;
        if (seglen < TCP_MIN_SEGLEN) {
          if (pcb->mss < TCP_MIN_SEGLEN) {
            size = pcb->mss;
          } else {
            size = TCP_MIN_SEGLEN;
          }
        }

        if ((seg->p = pbuf_alloc(PBUF_TRANSPORT, size, PBUF_RW)) == NULL) {
          kprintf(KERN_ERR "tcp_enqueue: could not allocate memory for pbuf copy\n");
          goto memerr;
        }
        pbuf_realloc(seg->p, seglen);

        queuelen++;

        if (data != NULL) memcpy(seg->p->payload, ptr, seglen);
        seg->dataptr = seg->p->payload;
      } 

      if (queuelen > TCP_SND_QUEUELEN) {
        kprintf(KERN_ERR "tcp_enqueue: queue too long %d (%d)\n", queuelen, TCP_SND_QUEUELEN);
        goto memerr;
      }
    
      seg->len = seglen;
    
      // Build TCP header
      if (pbuf_header(seg->p, TCP_HLEN) < 0) {
        kprintf(KERN_ERR "tcp_enqueue: no room for TCP header in pbuf.\n");
      
        stats.tcp.err++;
        goto memerr;
      }

      seg->tcphdr = seg->p->payload;
      seg->tcphdr->src = htons(pcb->local_port);
      seg->tcphdr->dest = htons(pcb->remote_port);
      seg->tcphdr->seqno = htonl(seqno);
      seg->tcphdr->urgp = 0;
      TCPH_FLAGS_SET(seg->tcphdr, flags);
      // Don't fill in tcphdr->ackno and tcphdr->wnd until later
    
      if (optdata == NULL) {
        TCPH_OFFSET_SET(seg->tcphdr, 5 << 4);
      } else {
        TCPH_OFFSET_SET(seg->tcphdr, (5 + optlen / 4) << 4);
      
        // Copy options into segment after fixed TCP header
        memcpy(seg->tcphdr + 1, optdata, optlen);
      }

      //kprintf("tcp_enqueue: queueing %lu:%lu (0x%x)\n", ntohl(seg->tcphdr->seqno), ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg), flags);

      left -= seglen;
      seqno += seglen;
      ptr = (void *) ((char *) ptr + seglen);
    }
  }

  if (queue != NULL) {
    // Go to the last segment on the unsent queue
    if (pcb->unsent == NULL) {
      useg = NULL;
    } else {
      for (useg = pcb->unsent; useg->next != NULL; useg = useg->next);
    }

    // If there is room in the last segment on the unsent queue
    // chain the first pbuf on the queue together with that
    if (useg != NULL && 
        TCP_TCPLEN(useg) != 0 && 
        !(TCPH_FLAGS(useg->tcphdr) & (TCP_SYN | TCP_FIN)) && 
        !(flags & (TCP_SYN | TCP_FIN)) && 
        useg->len + queue->len <= pcb->mss) {
      // Remove TCP header from first segment
      pbuf_header(queue->p, -TCP_HLEN);
      pbuf_chain(useg->p, queue->p);
      useg->len += queue->len;
      useg->next = queue->next;
    
      //kprintf("tcp_output: chaining, new len %u\n", useg->len);

      if (seg == queue) seg = NULL;
      kfree(queue);
    } else {      
      if (useg == NULL) {
        pcb->unsent = queue;
      } else {
        useg->next = queue;
      }
    }
  }

  if ((flags & TCP_SYN) || (flags & TCP_FIN)) len++;
  pcb->snd_lbb += len;
  pcb->snd_buf -= len;
  pcb->snd_queuelen = queuelen;

  // Set the PSH flag in the last segment that we enqueued, but only
  // if the segment has data (indicated by seglen > 0)
  if (seg != NULL && seglen > 0 && seg->tcphdr != NULL) {
    TCPH_FLAGS_SET(seg->tcphdr, TCPH_FLAGS(seg->tcphdr) | TCP_PSH);
  }

  return 0;

memerr:
  stats.tcp.memerr++;

  if (queue != NULL) tcp_segs_free(queue);
  return -ENOMEM;
}

err_t tcp_output(struct tcp_pcb *pcb) {
  struct tcp_seg *seg, *useg;
  unsigned long wnd;
    
  wnd = MIN(pcb->snd_wnd, pcb->cwnd);
  seg = pcb->unsent;
  //kprintf("tcp_output: wnd %d snd_wnd %d cwnd %d\n", wnd, pcb->snd_wnd, pcb->cwnd);
  
  while (seg != NULL && ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) {
    pcb->rtime = 0;
    pcb->unsent = seg->next;
    
    if (pcb->state != SYN_SENT) {
      TCPH_FLAGS_SET(seg->tcphdr, TCPH_FLAGS(seg->tcphdr) | TCP_ACK);
      pcb->flags &= ~(TF_ACK_DELAY | TF_ACK_NOW);
    }
    
    pcb->snd_nxt = ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg);
    if (TCP_SEQ_LT(pcb->snd_max, pcb->snd_nxt)) pcb->snd_max = pcb->snd_nxt;

    // Put segment on unacknowledged list if length > 0
    if (TCP_TCPLEN(seg) > 0) {
      seg->next = NULL;
      if (pcb->unacked == NULL) {
        pcb->unacked = seg;
      } else {
        for (useg = pcb->unacked; useg->next != NULL; useg = useg->next);
        useg->next = seg;
      }

      tcp_output_segment(seg, pcb);
    } else {
      tcp_output_segment(seg, pcb);
      tcp_seg_free(seg);
    }

    seg = pcb->unsent;
  } 
  
  // If no segments are enqueued but we should send an ACK, we
  // construct the ACK and send it
  if (pcb->flags & TF_ACK_NOW) {
    pcb->flags &= ~(TF_ACK_DELAY | TF_ACK_NOW);
    return tcp_send_ack(pcb);
  }
 
  return 0;
}

static err_t tcp_send_ack(struct tcp_pcb *pcb) {
  struct pbuf *p;
  struct tcp_hdr *tcphdr;
  struct netif *netif;
  int rc;

  // Find route for segment
  netif = ip_route(&pcb->remote_ip);
  if (netif == NULL) {
    kprintf(KERN_ERR "tcp_send_ack: No route to %a\n", &pcb->remote_ip);
    stats.ip.rterr++;
    return -EROUTE;
  }

  p = pbuf_alloc(PBUF_IP, TCP_HLEN, PBUF_RW);
  if (!p) {
    stats.tcp.memerr++;
    return -ENOMEM; 
  }
  
  tcphdr = p->payload;
  tcphdr->src = htons(pcb->local_port);
  tcphdr->dest = htons(pcb->remote_port);
  tcphdr->seqno = htonl(pcb->snd_nxt);
  tcphdr->ackno = htonl(pcb->rcv_nxt);
  TCPH_FLAGS_SET(tcphdr, TCP_ACK);
  tcphdr->wnd = (pcb->rcv_wnd < pcb->mss) ? 0 : htons(pcb->rcv_wnd);
  tcphdr->urgp = 0;
  TCPH_OFFSET_SET(tcphdr, 5 << 4);
  
  tcphdr->chksum = 0;
  if ((netif->flags & NETIF_TCP_TX_CHECKSUM_OFFLOAD) == 0) {
    tcphdr->chksum = inet_chksum_pseudo(p, &pcb->local_ip, &pcb->remote_ip, IP_PROTO_TCP, p->tot_len);
  }

  //kprintf("tcp_send_ack: seqno %lu ackno %lu wnd %d\n", htonl(tcphdr->seqno), htonl(tcphdr->ackno), ntohs(tcphdr->wnd));

  stats.tcp.xmit++;

  rc = ip_output_if(p, &pcb->local_ip, &pcb->remote_ip, TCP_TTL, IP_PROTO_TCP, netif);
  if (rc < 0) {
    pbuf_free(p);
    return rc;
  }

  return 0;
} 

static void tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb) {
  struct netif *netif;

  if (seg->p->ref > 1) {
    kprintf(KERN_ERR "tcp_output_segment: packet not transmitted, already in tx queue\n");
    return;
  }

  // The TCP header has already been constructed, but the ackno and wnd fields remain
  seg->tcphdr->ackno = htonl(pcb->rcv_nxt);

  // Silly window avoidance
  if (pcb->rcv_wnd < pcb->mss) {
    seg->tcphdr->wnd = 0;
  } else {
    seg->tcphdr->wnd = htons(pcb->rcv_wnd);
  }

  // If the buffer is still waiting to be sent, we do not retransmit it.
  // The packet buffer reference counter is used to determine if the
  // packet is still on the transmission queue.
  if (seg->p->ref > 1) {
    kprintf(KERN_ERR "tcp_output_segment: packet not retransmitted, still in tx queue\n");
    return;
  }

  // Find route for segment
  netif = ip_route(&pcb->remote_ip);
  if (netif == NULL) {
    kprintf(KERN_ERR "tcp_output_segment: No route to %a\n", &pcb->remote_ip);
    stats.ip.rterr++;
    return;
  }

  // If we don't have a local IP address, we get it from netif
  if (ip_addr_isany(&pcb->local_ip)) {
    ip_addr_set(&pcb->local_ip, &netif->ipaddr);
  }

  pcb->rtime = 0;

  if (pcb->rttest == 0) {
    pcb->rttest = tcp_ticks;
    pcb->rtseq = ntohl(seg->tcphdr->seqno);
  }

  pbuf_header(seg->p, (char *) seg->p->payload - (char *) seg->tcphdr);

  //kprintf("tcp_output_segment: seqno %lu ackno %lu len %d wnd %d ", htonl(seg->tcphdr->seqno), htonl(seg->tcphdr->ackno), seg->len, ntohs(seg->tcphdr->wnd));
  //tcp_debug_print_flags(TCPH_FLAGS(seg->tcphdr));
  //kprintf("\n");

  seg->tcphdr->chksum = 0;
  if ((netif->flags & NETIF_TCP_TX_CHECKSUM_OFFLOAD) == 0) {
    seg->tcphdr->chksum = inet_chksum_pseudo(seg->p, &pcb->local_ip, &pcb->remote_ip, IP_PROTO_TCP, seg->p->tot_len);
  }
  stats.tcp.xmit++;

  //kprintf("sending TCP segment:\n");
  //tcp_debug_print(seg->tcphdr);
 
  pbuf_ref(seg->p);
  if (ip_output_if(seg->p, &pcb->local_ip, &pcb->remote_ip, TCP_TTL, IP_PROTO_TCP, netif) < 0) pbuf_free(seg->p);
}

void tcp_rexmit(struct tcp_pcb *pcb) {
  struct tcp_seg *seg;

  if (pcb->unacked == NULL) return;

  // Move all unacked segments to the unsent queue
  for (seg = pcb->unacked; seg->next != NULL; seg = seg->next);

  seg->next = pcb->unsent;
  pcb->unsent = pcb->unacked;
  pcb->unacked = NULL;

  pcb->snd_nxt = ntohl(pcb->unsent->tcphdr->seqno);

  pcb->nrtx++;

  // Don't take any rtt measurements after retransmitting
  pcb->rttest = 0;

  // Do the actual retransmission
  tcp_output(pcb);
}

void tcp_rst(unsigned long seqno, unsigned long ackno, struct ip_addr *local_ip, struct ip_addr *remote_ip, unsigned short local_port, unsigned short remote_port) {
  struct pbuf *p;
  struct tcp_hdr *tcphdr;
  struct netif *netif;

  if ((netif = ip_route(remote_ip)) == NULL) {
    kprintf(KERN_ERR "tcp_rst: No route to %a\n", &remote_ip);
    stats.tcp.rterr++;
    return;
  }

  p = pbuf_alloc(PBUF_IP, TCP_HLEN, PBUF_RW);
  if (p == NULL) {
    // Reclaim memory here
    kprintf(KERN_ERR "tcp_rst: could not allocate memory for pbuf\n");
    stats.tcp.memerr++;
    return;
  }

  tcphdr = p->payload;
  tcphdr->src = htons(local_port);
  tcphdr->dest = htons(remote_port);
  tcphdr->seqno = htonl(seqno);
  tcphdr->ackno = htonl(ackno);
  TCPH_FLAGS_SET(tcphdr, TCP_RST | TCP_ACK);
  tcphdr->wnd = 0;
  tcphdr->urgp = 0;
  TCPH_OFFSET_SET(tcphdr, 5 << 4);
  
  tcphdr->chksum = 0;
  if ((netif->flags & NETIF_TCP_TX_CHECKSUM_OFFLOAD) == 0) {
    tcphdr->chksum = inet_chksum_pseudo(p, local_ip, remote_ip, IP_PROTO_TCP, p->tot_len);
  }

  stats.tcp.xmit++;

  //kprintf("sending TCP rst segment:\n");
  //tcp_debug_print(tcphdr);

  if (ip_output_if(p, local_ip, remote_ip, TCP_TTL, IP_PROTO_TCP, netif) < 0) pbuf_free(p);

  //kprintf("tcp_rst: seqno %lu ackno %lu.\n", seqno, ackno);
}