Goto sanos source index

//
// tcp_input.c
//
// Transmission Control Protocol (TCP)
//
// Copyright (C) 2002 Michael Ringgaard. All rights reserved.
// Portions Copyright (C) 2001, Swedish Institute of Computer Science.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 
// 1. Redistributions of source code must retain the above copyright 
//    notice, this list of conditions and the following disclaimer.  
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.  
// 3. Neither the name of the project nor the names of its contributors
//    may be used to endorse or promote products derived from this software
//    without specific prior written permission. 
// 
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
// SUCH DAMAGE.
// 

#include <net/net.h>

#define UMAX(a, b) ((a) > (b) ? (a) : (b))

static err_t tcp_process(struct tcp_seg *seg, struct tcp_pcb *pcb);
static void tcp_receive(struct tcp_seg *seg, struct tcp_pcb *pcb);
static void tcp_parseopt(struct tcp_seg *seg, struct tcp_pcb *pcb);

//
// tcp_input
//
// The initial input processing of TCP. It verifies the TCP header, demultiplexes
// the segment between the PCBs and passes it on to tcp_process(), which implements
// the TCP finite state machine. This function is called by the IP layer in ip_input.
//

err_t tcp_input(struct pbuf *p, struct netif *inp) {
  struct tcp_hdr *tcphdr;
  struct tcp_pcb *pcb, *prev;
  struct ip_hdr *iphdr;
  int offset;
  err_t err;

  stats.tcp.recv++;
  
  iphdr = p->payload;
  tcphdr = (struct tcp_hdr *)((char *) p->payload + IPH_HL(iphdr) * 4);

  //kprintf("receiving TCP segment:\n");
  //tcp_debug_print(tcphdr);

  // Remove header from payload
  if (pbuf_header(p, -(IPH_HL(iphdr) * 4)) < 0 || p->tot_len < sizeof(struct tcp_hdr)) {
    kprintf("tcp_input: short packet (%u bytes) discarded\n", p->tot_len);
    stats.tcp.lenerr++;
    stats.tcp.drop++;
    return -EPROTO;
  }

  // Don't even process incoming broadcasts/multicasts
  if (ip_addr_isbroadcast(&iphdr->dest, &inp->netmask) || ip_addr_ismulticast(&iphdr->dest)) {
    pbuf_free(p);
    return 0;
  }

#ifdef CHECK_TCP_CHECKSUM
  if ((inp->flags & NETIF_TCP_RX_CHECKSUM_OFFLOAD) == 0) {
    // Verify TCP checksum
    if (inet_chksum_pseudo(p, &iphdr->src, &iphdr->dest, IP_PROTO_TCP, p->tot_len) != 0) {
      kprintf("tcp_input: packet discarded due to failing checksum 0x%04x\n", inet_chksum_pseudo(p, &iphdr->src, &iphdr->dest, IP_PROTO_TCP, p->tot_len));

      stats.tcp.chkerr++;
      stats.tcp.drop++;

      return -ECHKSUM;
    }
  }
#endif

  // Move the payload pointer in the pbuf so that it points to 
  // the TCP data instead of the TCP header
  offset = TCPH_OFFSET(tcphdr) >> 4;
  if (pbuf_header(p, -(offset * 4)) < 0) return -EINVAL;

  // Convert fields in TCP header to host byte order
  tcphdr->src = ntohs(tcphdr->src);
  tcphdr->dest = ntohs(tcphdr->dest);
  tcphdr->seqno = ntohl(tcphdr->seqno);
  tcphdr->ackno = ntohl(tcphdr->ackno);
  tcphdr->wnd = ntohs(tcphdr->wnd);
  
  //kprintf("tcp_input: size %d seqno %lu ackno %lu wnd %d flags: ", p->tot_len, tcphdr->seqno, tcphdr->ackno, tcphdr->wnd);
  //tcp_debug_print_flags(TCPH_FLAGS(tcphdr));
  //kprintf("\n");

  // Demultiplex an incoming segment. First, we check if it is destined for an active connection
  prev = NULL;  
  for (pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) {
    if (pcb->remote_port == tcphdr->src &&
        pcb->local_port == tcphdr->dest &&
        ip_addr_cmp(&pcb->remote_ip, &iphdr->src) &&
        ip_addr_cmp(&pcb->local_ip, &iphdr->dest)) {
      // Move this PCB to the front of the list so that subsequent
      // lookups will be faster (we exploit locality in TCP segment arrivals).
      if (prev != NULL) {
        prev->next = pcb->next;
        pcb->next = tcp_active_pcbs;
        tcp_active_pcbs = pcb; 
      }
      break;
    }

    prev = pcb;
  }

  // If it did not go to an active connection, we check the connections in the TIME-WAIT state
  if (pcb == NULL) {
    for (pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) {
      if (pcb->remote_port == tcphdr->src &&
          pcb->local_port == tcphdr->dest &&
          ip_addr_cmp(&pcb->remote_ip, &iphdr->src) &&
          ip_addr_cmp(&pcb->local_ip, &iphdr->dest)) {
        // We don't really care enough to move this PCB to the front
        // of the list since we are not very likely to receive that
        // many segments for connections in TIME-WAIT.
        break;
      }
    }  
  
    // Finally, if we still did not get a match, we check all PCBs that are LISTENing for incomming connections
    prev = NULL;  
    if (pcb == NULL) {
      for (pcb = (struct tcp_pcb *) tcp_listen_pcbs; pcb != NULL; pcb = pcb->next) {
        if ((ip_addr_isany(&pcb->local_ip) || ip_addr_cmp(&pcb->local_ip, &iphdr->dest)) &&
             pcb->local_port == tcphdr->dest) {
          // Move this PCB to the front of the list so that subsequent
          // lookups will be faster (we exploit locality in TCP segment
          // arrivals)          
          if (prev != NULL) {
            prev->next = pcb->next;
            pcb->next = (struct tcp_pcb *) tcp_listen_pcbs;
            tcp_listen_pcbs = (struct tcp_pcb_listen *) pcb; 
          }
          break;
        }
        prev = pcb;
      }
    }
  }
  
  if (pcb != NULL) {
    struct tcp_seg seg;

    // Set up a tcp_seg structure
    seg.next = NULL;
    seg.len = p->tot_len;
    seg.dataptr = p->payload;
    seg.p = p;
    seg.tcphdr = tcphdr;
    
    if (pcb->state != LISTEN && pcb->state != TIME_WAIT) {
      pcb->recv_data = NULL;
      pcb->acked = 0;
    }

    pcb->flags |= TF_IN_RECV;

    err = tcp_process(&seg, pcb);

    // A return value of EABORT means that tcp_abort() was called and that the pcb has been freed.
    if (err != -EABORT) {
      if (pcb->state != LISTEN) {
        if (pcb->flags & TF_RESET) {
          if (pcb->errf != NULL) pcb->errf(pcb->callback_arg, -ERST);

          if (pcb->state == TIME_WAIT) { 
            tcp_pcb_remove(&tcp_tw_pcbs, pcb);
          } else {
            tcp_pcb_remove(&tcp_active_pcbs, pcb);
          }

          kfree(pcb);
        } else if (pcb->flags & TF_CLOSED) {
          tcp_pcb_remove(&tcp_active_pcbs, pcb);
          kfree(pcb);
        } else {
          if (pcb->state < TIME_WAIT) {
            err = 0;
            
            // If the application has registered a "sent" function to be
            // called when new send buffer space is avaliable, we call it now
            if (pcb->acked > 0 && pcb->sent != NULL) err = pcb->sent(pcb->callback_arg, pcb, pcb->acked);
            
            if (pcb->recv != NULL) {
              if (pcb->recv_data != NULL) {
                err = pcb->recv(pcb->callback_arg, pcb, pcb->recv_data, 0);
              }

              if (pcb->flags & TF_GOT_FIN) {
                err = pcb->recv(pcb->callback_arg, pcb, NULL, 0);
              }
            } else {
              err = 0;
              pbuf_free(pcb->recv_data);
              if (pcb->flags & TF_GOT_FIN) tcp_close(pcb);
            }

            if (err == 0) {
              tcp_output(pcb);
            } else {
              pbuf_free(pcb->recv_data);
            }
          } else if (pcb->state == TIME_WAIT) {
            pbuf_free(pcb->recv_data);
            tcp_output(pcb);
          }
        }
      }
    }
    
    pcb->flags &= ~TF_IN_RECV;
    if (seg.p) pbuf_free(seg.p);
  } else {
    // If no matching PCB was found, send a TCP RST (reset) to the sender
    //kprintf("tcp_input: no PCB match found, resetting.\n");
    if (!(TCPH_FLAGS(tcphdr) & TCP_RST)) {
      stats.tcp.proterr++;
      stats.tcp.drop++;

      tcp_rst(tcphdr->ackno, tcphdr->seqno + p->tot_len +
              ((TCPH_FLAGS(tcphdr) & TCP_FIN || TCPH_FLAGS(tcphdr) & TCP_SYN) ? 1: 0),
              &iphdr->dest, &iphdr->src, tcphdr->dest, tcphdr->src);
    }

    pbuf_free(p);
  }

  return 0;
}

//
// tcp_process
//
// Implements the TCP state machine. Called by tcp_input. In some
// states tcp_receive() is called to receive data.
//

static err_t tcp_process(struct tcp_seg *seg, struct tcp_pcb *pcb) {
  struct tcp_pcb *npcb;
  struct ip_hdr *iphdr;
  struct tcp_hdr *tcphdr;
  unsigned long seqno, ackno;
  int flags;
  unsigned long optdata;
  struct tcp_seg *rseg;
  int acceptable = 0;
  
  iphdr = (struct ip_hdr *) ((char *) seg->tcphdr - IP_HLEN);
  tcphdr = seg->tcphdr;
  flags = TCPH_FLAGS(tcphdr);
  seqno = tcphdr->seqno;
  ackno = tcphdr->ackno;
  
  // Process incoming RST segments
  if (flags & TCP_RST) {
    // First, determine if the reset is acceptable
    if (pcb->state != LISTEN) {
      if (pcb->state == SYN_SENT) {
        if (ackno == pcb->snd_nxt) acceptable = 1;
      } else {
        if (TCP_SEQ_GEQ(seqno, pcb->rcv_nxt) && TCP_SEQ_LEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) acceptable = 1;
      }
    }

    if (acceptable) {
      //kprintf("tcp_process: Connection RESET\n");
      pcb->flags |= TF_RESET;
      pcb->flags &= ~TF_ACK_DELAY;
      return -ERST;
    } else {
      //kprintf("tcp_process: unacceptable reset seqno %lu rcv_nxt %lu\n", seqno, pcb->rcv_nxt);
      return 0;
    }
  }

  // Update the PCB timer unless we are in the LISTEN state
  if (pcb->state != LISTEN) pcb->tmr = tcp_ticks;
  
  // Do different things depending on the TCP state
  switch (pcb->state) {
    case CLOSED:
      // Do nothing in the CLOSED state. In fact, this case should never occur
      // since PCBs in the CLOSED state are never found in the list of active PCBs
      break;

    case LISTEN:
      // In the LISTEN state, we check for incoming SYN segments,
      // creates a new PCB, and responds with a SYN|ACK
      if (flags & TCP_ACK) {
        // For incoming segments with the ACK flag set, respond with a RST
        kprintf("tcp_process: ACK in LISTEN, sending reset\n");
        tcp_rst(ackno + 1, seqno + TCP_TCPLEN(seg), &iphdr->dest, &iphdr->src, tcphdr->dest, tcphdr->src);
      } else if (flags & TCP_SYN) {
        //kprintf("TCP connection request %d -> %d.\n", seg->tcphdr->src, seg->tcphdr->dest);
        npcb = tcp_new();

        // If a new PCB could not be created (probably due to lack of memory),
        // we don't do anything, but rely on the sender will retransmit the
        // SYN at a time when we have more memory avaliable
        if (npcb == NULL) {
          stats.tcp.memerr++;
          break;
        }
        
        // Set up the new PCB
        ip_addr_set(&npcb->local_ip, &iphdr->dest);
        npcb->local_port = pcb->local_port;
        ip_addr_set(&npcb->remote_ip, &iphdr->src);
        npcb->remote_port = tcphdr->src;
        npcb->state = SYN_RCVD;
        npcb->rcv_nxt = seqno + 1;
        npcb->snd_wnd = tcphdr->wnd;
        npcb->ssthresh = npcb->snd_wnd;
        npcb->snd_wl1 = tcphdr->seqno - 1;
        npcb->accept = pcb->accept;
        npcb->callback_arg = pcb->callback_arg;

        // Register the new PCB so that we can begin receiving segments for it
        TCP_REG(&tcp_active_pcbs, npcb);
      
        // Parse any options in the SYN
        tcp_parseopt(seg, npcb);

        // Build an MSS option
        optdata = HTONL(((unsigned long) 2 << 24) | 
                        ((unsigned long) 4 << 16) | 
                        (((unsigned long) npcb->mss / 256) << 8) |
                        (npcb->mss & 255));

        // Send a SYN|ACK together with the MSS option
        tcp_enqueue(npcb, NULL, 0, TCP_SYN | TCP_ACK, (unsigned char *) &optdata, 4);
        return tcp_output(npcb);
      }
      break;

    case SYN_SENT:
      //kprintf("SYN-SENT: ackno %lu pcb->snd_nxt %lu unacked %lu\n", ackno, pcb->snd_nxt, pcb->unacked ? ntohl(pcb->unacked->tcphdr->seqno) : 0);
      if (flags & (TCP_ACK | TCP_SYN) && pcb->unacked && ackno == ntohl(pcb->unacked->tcphdr->seqno) + 1) {
        pcb->rcv_nxt = seqno + 1;
        pcb->lastack = ackno;
        pcb->snd_wnd = tcphdr->wnd;
        pcb->snd_wl1 = seqno - 1;
        pcb->state = ESTABLISHED;
        pcb->cwnd = pcb->mss;
        pcb->snd_queuelen--;
        rseg = pcb->unacked;
        pcb->unacked = rseg->next;
        tcp_seg_free(rseg);

        // Parse any options in the SYN|ACK
        tcp_parseopt(seg, pcb);

        // Call the user specified function to call when sucessfully connected
        if (pcb->connected != NULL) {
          pcb->connected(pcb->callback_arg, pcb, 0);
        }
        pcb->flags |= TF_ACK_DELAY;
      }    
      break;

    case SYN_RCVD:
      if (flags & TCP_ACK && !(flags & TCP_RST)) {
        if (TCP_SEQ_LT(pcb->lastack, ackno) && TCP_SEQ_LEQ(ackno, pcb->snd_nxt)) {
          pcb->state = ESTABLISHED;
          //kprintf("TCP connection established %d -> %d.\n", seg->tcphdr->src, seg->tcphdr->dest);

          // Call the accept function
          if (pcb->accept != NULL) {
            if (pcb->accept(pcb->callback_arg, pcb, 0) < 0) {
              // If the accept function returns with an error, we abort the connection
              tcp_abort(pcb);
              return -EABORT;
            }
          } else {
            // If a PCB does not have an accept function (i.e., no
            // application is connected to it), the connection would
            // linger in memory until the connection reset by the remote
            // peer (which might never happen). Therefore, we abort the
            // connection before it is too late
            tcp_abort(pcb);
            return -EABORT;
          }

          // If there was any data contained within this ACK,
          // we'd better pass it on to the application as well
          tcp_receive(seg, pcb);
          pcb->cwnd = pcb->mss;
        }
      }
      break;

    case CLOSE_WAIT:
    case ESTABLISHED:
      tcp_receive(seg, pcb);
      if (flags & TCP_FIN) {
        pcb->flags |= TF_ACK_NOW;
        pcb->state = CLOSE_WAIT;
      }
      break;

    case FIN_WAIT_1:
      tcp_receive(seg, pcb);
      if (flags & TCP_FIN) {
        if ((flags & TCP_ACK) && ackno == pcb->snd_nxt) {
          //kprintf("TCP connection closed %d -> %d.\n", seg->tcphdr->src, seg->tcphdr->dest);
          pcb->flags |= TF_ACK_NOW;
          tcp_pcb_purge(pcb);
          TCP_RMV(&tcp_active_pcbs, pcb);
          pcb->state = TIME_WAIT;
          TCP_REG(&tcp_tw_pcbs, pcb);
        } else {
          pcb->flags |= TF_ACK_NOW;
          pcb->state = CLOSING;
        }
      } else if ((flags & TCP_ACK) && ackno == pcb->snd_nxt) {
        pcb->state = FIN_WAIT_2;
      }
      break;

    case FIN_WAIT_2:
      tcp_receive(seg, pcb);
      if (flags & TCP_FIN) {
        //kprintf("TCP connection closed %d -> %d.\n", seg->tcphdr->src, seg->tcphdr->dest);
        pcb->flags |= TF_ACK_NOW;
        tcp_pcb_purge(pcb);
        TCP_RMV(&tcp_active_pcbs, pcb);
        pcb->state = TIME_WAIT;
        TCP_REG(&tcp_tw_pcbs, pcb);
      }
      break;

    case CLOSING:
      tcp_receive(seg, pcb);
      if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
        //kprintf("TCP connection closed %d -> %d.\n", seg->tcphdr->src, seg->tcphdr->dest);
        pcb->flags |= TF_ACK_NOW;
        tcp_pcb_purge(pcb);
        TCP_RMV(&tcp_active_pcbs, pcb);
        pcb->state = TIME_WAIT;
        TCP_REG(&tcp_tw_pcbs, pcb);
      }
      break;

    case LAST_ACK:
      tcp_receive(seg, pcb);
      if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
        //kprintf("TCP connection closed %d -> %d.\n", seg->tcphdr->src, seg->tcphdr->dest);
        pcb->state = CLOSED;
        pcb->flags |= TF_CLOSED;
      }
      break;

    case TIME_WAIT:
      if (TCP_SEQ_GT(seqno + TCP_TCPLEN(seg), pcb->rcv_nxt)) pcb->rcv_nxt = seqno + TCP_TCPLEN(seg);
      if (TCP_TCPLEN(seg) > 0) pcb->flags |= TF_ACK_NOW;
      break;
  }
  
  return 0;
}

//
// tcp_receive
//
// Called by tcp_process. Checks if the given segment is an ACK for outstanding
// data, and if so frees the memory of the buffered data. Next, it places the
// segment on any of the receive queues (pcb->recved or pcb->ooseq). If the segment
// is buffered, the pbuf is referenced by pbuf_ref so that it will not be freed until
// it has been removed from the buffer.
//
// If the incoming segment constitutes an ACK for a segment that was used for RTT
// estimation, the RTT is estimated here as well.
//

static void tcp_receive(struct tcp_seg *seg, struct tcp_pcb *pcb) {
  struct tcp_seg *next, *prev, *cseg;
  struct pbuf *p;
  unsigned long ackno, seqno;
  unsigned long right_wnd_edge;
  int off;
  int m;

  ackno = seg->tcphdr->ackno;
  seqno = seg->tcphdr->seqno;

  if (TCPH_FLAGS(seg->tcphdr) & TCP_ACK) {
    right_wnd_edge = pcb->snd_wnd + pcb->snd_wl1;

    // Update window
    if (TCP_SEQ_LT(pcb->snd_wl1, seqno) ||
        (pcb->snd_wl1 == seqno && TCP_SEQ_LT(pcb->snd_wl2, ackno)) ||
        (pcb->snd_wl2 == ackno && seg->tcphdr->wnd > pcb->snd_wnd)) {
      pcb->snd_wnd = seg->tcphdr->wnd;
      pcb->snd_wl1 = seqno;
      pcb->snd_wl2 = ackno;

      //kprintf("tcp_receive: window update %lu\n", pcb->snd_wnd);
    }

    if (pcb->lastack == ackno) {
      pcb->acked = 0;

      if (pcb->snd_wl1 + pcb->snd_wnd == right_wnd_edge) {
        pcb->dupacks++;
        if (pcb->dupacks >= 3 && pcb->unacked != NULL && pcb->state == ESTABLISHED) {
          if (!(pcb->flags & TF_INFR)) {
            // This is fast retransmit. Retransmit the first unacked segment          
            //kprintf("tcp_receive: dupacks %d (%lu), fast retransmit %lu\n", pcb->dupacks, pcb->lastack, ntohl(pcb->unacked->tcphdr->seqno));
            tcp_rexmit(pcb);

            // Set ssthresh to MAX(FlightSize / 2, 2 * SMSS)
            pcb->ssthresh = UMAX((unsigned long) (pcb->snd_max - pcb->lastack) / 2, (unsigned long) (2 * pcb->mss));
            pcb->cwnd = pcb->ssthresh + 3 * pcb->mss;
            pcb->flags |= TF_INFR;
          } else {         
            // Inflate the congestion window, but not if it means that the value overflows
            if ((unsigned short) (pcb->cwnd + pcb->mss) > pcb->cwnd) pcb->cwnd += pcb->mss;
          }
        }
      }
    } else if (TCP_SEQ_LT(pcb->lastack, ackno) && TCP_SEQ_LEQ(ackno, pcb->snd_max)) {
      // We come here when the ACK acknowledges new data

      // Reset the "IN Fast Retransmit" flag, since we are no longer
      // in fast retransmit. Also reset the congestion window to the
      // slow start threshold
      if (pcb->flags & TF_INFR) {
        pcb->flags &= ~TF_INFR;
        pcb->cwnd = pcb->ssthresh;
      }

      // Reset the number of retransmissions
      pcb->nrtx = 0;

      // Reset the retransmission time-out
      pcb->rto = (pcb->sa >> 3) + pcb->sv;
      
      // Update the send buffer space
      pcb->acked = (unsigned short) (ackno - pcb->lastack);
      pcb->snd_buf += pcb->acked;

      // Reset the fast retransmit variables
      pcb->dupacks = 0;
      pcb->lastack = ackno;
      
      // Update the congestion control variables (cwnd and ssthresh)
      if (pcb->state >= ESTABLISHED) {
        if (pcb->cwnd < pcb->ssthresh) {
          if ((unsigned short) (pcb->cwnd + pcb->mss) > pcb->cwnd) pcb->cwnd += pcb->mss;
          //kprintf("tcp_receive: slow start cwnd %u\n", pcb->cwnd);
        } else {
          unsigned short new_cwnd = (unsigned short) (pcb->cwnd + pcb->mss * pcb->mss / pcb->cwnd);
          if (new_cwnd > pcb->cwnd) pcb->cwnd = new_cwnd;
          //kprintf("tcp_receive: congestion avoidance cwnd %u\n", pcb->cwnd);
        }
      }

      //kprintf("tcp_receive: ACK for %lu, unacked->seqno %lu:%lu\n",
      //       ackno,
      //       pcb->unacked != NULL ? ntohl(pcb->unacked->tcphdr->seqno): 0,
      //       pcb->unacked != NULL ? ntohl(pcb->unacked->tcphdr->seqno) + TCP_TCPLEN(pcb->unacked): 0);

      // Remove segment from the unacknowledged list if the incoming ACK acknowlegdes them
      while (pcb->unacked != NULL && TCP_SEQ_LEQ(ntohl(pcb->unacked->tcphdr->seqno) + TCP_TCPLEN(pcb->unacked), ackno)) {
        //kprintf("tcp_receive: removing %lu:%lu from pcb->unacked\n",
        //         ntohl(pcb->unacked->tcphdr->seqno),
        //         ntohl(pcb->unacked->tcphdr->seqno) +
        //         TCP_TCPLEN(pcb->unacked));

        next = pcb->unacked;
        pcb->unacked = pcb->unacked->next;
        pcb->snd_queuelen -= pbuf_clen(next->p);
        tcp_seg_free(next);
      }

      pcb->polltmr = 0;

      // We go through the unsent list to see if any of the segments
      // on the list are acknowledged by the ACK. This may seem
      // strange since an "unsent" segment shouldn't be acked. The
      // rationale is that we put all outstanding segments on the
      // unsent list after a retransmission, so these segments may
      // in fact have been sent once
      while (pcb->unsent != NULL && TCP_SEQ_LEQ(ntohl(pcb->unsent->tcphdr->seqno) + TCP_TCPLEN(pcb->unsent), ackno)) {
        //kprintf("tcp_receive: removing %lu:%lu from pcb->unsent\n",
        //         ntohl(pcb->unsent->tcphdr->seqno),
        //         ntohl(pcb->unsent->tcphdr->seqno) +
        //         TCP_TCPLEN(pcb->unsent));

        next = pcb->unsent;
        pcb->unsent = pcb->unsent->next;
        pcb->snd_queuelen -= pbuf_clen(next->p);
        tcp_seg_free(next);
        
        if (pcb->unsent != NULL) pcb->snd_nxt = htonl(pcb->unsent->tcphdr->seqno);
      }
    }
    // End of ACK for new data processing
    
    //kprintf("tcp_receive: pcb->rttest %d rtseq %lu ackno %lu\n", pcb->rttest, pcb->rtseq, ackno);
    
    // RTT estimation calculations. This is done by checking if the
    // incoming segment acknowledges the segment we use to take a
    // round-trip time measurement
    if (pcb->rttest && TCP_SEQ_LT(pcb->rtseq, ackno)) {
      m = tcp_ticks - pcb->rttest;

      //kprintf("tcp_receive: experienced rtt %d ticks (%d msec).\n", m, m * TCP_SLOW_INTERVAL);

      // This is taken directly from VJs original code in his paper
      m = m - (pcb->sa >> 3);
      pcb->sa += m;
      if (m < 0) m = -m;
      m = m - (pcb->sv >> 2);
      pcb->sv += m;
      pcb->rto = (pcb->sa >> 3) + pcb->sv;
      
      //kprintf("tcp_receive: RTO %d (%d miliseconds)\n", pcb->rto, pcb->rto * TCP_SLOW_INTERVAL);

      pcb->rttest = 0;
    } 
  }
  
  // If the incoming segment contains data, we must process it further
  if (TCP_TCPLEN(seg) > 0) {
    // This code basically does three things:
    //
    // +) If the incoming segment contains data that is the next
    //    in-sequence data, this data is passed to the application. This
    //    might involve trimming the first edge of the data. The rcv_nxt
    //    variable and the advertised window are adjusted.       
    //
    // +) If the incoming segment has data that is above the next
    //    sequence number expected (->rcv_nxt), the segment is placed on
    //    the ->ooseq queue. This is done by finding the appropriate
    //    place in the ->ooseq queue (which is ordered by sequence
    //    number) and trim the segment in both ends if needed. An
    //    immediate ACK is sent to indicate that we received an
    //    out-of-sequence segment.
    //
    // +) Finally, we check if the first segment on the ->ooseq queue
    //    now is in sequence (i.e., if rcv_nxt >= ooseq->seqno). If
    //    rcv_nxt > ooseq->seqno, we must trim the first edge of the
    //    segment on ->ooseq before we adjust rcv_nxt. The data in the
    //    segments that are now on sequence are chained onto the
    //    incoming segment so that we only need to call the application
    //    once.
    //

    // First, we check if we must trim the first edge. We have to do
    // this if the sequence number of the incoming segment is less
    // than rcv_nxt, and the sequence number plus the length of the
    // segment is larger than rcv_nxt

    //kprintf("tcp_receive: seqno %lu rcv_nxt %lu tcplen %d\n", seqno, pcb->rcv_nxt, TCP_TCPLEN(seg));

    if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)) {
      if (TCP_SEQ_LT(pcb->rcv_nxt, seqno + TCP_TCPLEN(seg))) {
        // Trimming the first edge is done by pushing the payload
        // pointer in the pbuf downwards. This is somewhat tricky since
        // we do not want to discard the full contents of the pbuf up to
        // the new starting point of the data since we have to keep the
        // TCP header which is present in the first pbuf in the chain.
        //
        // What is done is really quite a nasty hack: the first pbuf in
        // the pbuf chain is pointed to by seg->p. Since we need to be
        // able to deallocate the whole pbuf, we cannot change this
        // seg->p pointer to point to any of the later pbufs in the
        // chain. Instead, we point the ->payload pointer in the first
        // pbuf to data in one of the later pbufs. We also set the
        // seg->data pointer to point to the right place. This way, the
        // ->p pointer will still point to the first pbuf, but the
        // ->p->payload pointer will point to data in another pbuf.
        //
        // After we are done with adjusting the pbuf pointers we must
        // adjust the ->data pointer in the seg and the segment
        // length.

        off = pcb->rcv_nxt - seqno;
        if (seg->p->len < off) {
          p = seg->p;
          while (p->len < off) {
            off -= p->len;
            seg->p->tot_len -= p->len;
            p->len = 0;
            p = p->next;
          }
          pbuf_header(p, -off);
        } else {
          pbuf_header(seg->p, -off);
        }

        seg->dataptr = seg->p->payload;
        seg->len -= pcb->rcv_nxt - seqno;
        seg->tcphdr->seqno = seqno = pcb->rcv_nxt;
      } else {
        // The whole segment is < rcv_nxt
        // Must be a duplicate of a packet that has already been correctly handled
        // or a keep-alive packet
        
        //kprintf("tcp_receive: duplicate seqno %lu\n", seqno);
        pcb->flags |= TF_ACK_NOW;
      }
    }

    // The sequence number must be within the window (above rcv_nxt
    // and below rcv_nxt + rcv_wnd) in order to be further
    // processed.
    
    if (TCP_SEQ_GEQ(seqno, pcb->rcv_nxt) && TCP_SEQ_LT(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {
      if (pcb->rcv_nxt == seqno) {
        // The incoming segment is the next in sequence. We check if
        // we have to trim the end of the segment and update rcv_nxt
        // and pass the data to the application.
        if (pcb->ooseq != NULL && TCP_SEQ_LEQ(pcb->ooseq->tcphdr->seqno, seqno + seg->len)) {
          // We have to trim the second edge of the incoming segment
          seg->len = pcb->ooseq->tcphdr->seqno - seqno;
          pbuf_realloc(seg->p, seg->len);
        }
        
        pcb->rcv_nxt += TCP_TCPLEN(seg);        
        
        // Update the receiver's (our) window
        if (pcb->rcv_wnd < TCP_TCPLEN(seg)) {
          pcb->rcv_wnd = 0;
        } else {
          pcb->rcv_wnd -= TCP_TCPLEN(seg);
        }
        
        // If there is data in the segment, we make preparations to
        // pass this up to the application. The ->recv_data variable
        // is used for holding the pbuf that goes to the
        // application. The code for reassembling out-of-sequence data
        // chains its data on this pbuf as well.
        if (seg->p->tot_len > 0) {
          pcb->recv_data = seg->p;
          
          // Since this pbuf now is the responsibility of the
          // application, we delete our reference to it so that we won't
          // (mistakingly) deallocate it.
          seg->p = NULL;
        }

        // If the segment was a FIN, we set the TF_GOT_FIN flag that will
        // be used to indicate to the application that the remote side has
        // closed its end of the connection.
        if (TCPH_FLAGS(seg->tcphdr) & TCP_FIN) {
          //kprintf("tcp_receive: received FIN.\n");
          pcb->flags |= TF_GOT_FIN;
        }
        
        // We now check if we have segments on the ->ooseq queue that
        // is now in sequence.
        while (pcb->ooseq != NULL && pcb->ooseq->tcphdr->seqno == pcb->rcv_nxt) {
          cseg = pcb->ooseq;
          seqno = pcb->ooseq->tcphdr->seqno;
          
          pcb->rcv_nxt += TCP_TCPLEN(cseg);
          if (pcb->rcv_wnd < TCP_TCPLEN(cseg)) {
            pcb->rcv_wnd = 0;
          } else {
            pcb->rcv_wnd -= TCP_TCPLEN(cseg);
          }

          if (cseg->p->tot_len > 0) {
            // Chain this pbuf onto the pbuf that we will pass to the application
            pbuf_chain(pcb->recv_data, cseg->p);
            cseg->p = NULL;
          }
          
          if (TCPH_FLAGS(cseg->tcphdr) & TCP_FIN) {
            //kprintf("tcp_receive: dequeued FIN.\n");
            pcb->flags |= TF_GOT_FIN;
          }         

          pcb->ooseq = cseg->next;
          tcp_seg_free(cseg);

          // Acknowledge immediately (MRI)
          pcb->flags |= TF_ACK_NOW;
        }

        // Acknowledge the segment(s)
        if (pcb->flags & TF_ACK_DELAY) {
          pcb->flags |= TF_ACK_NOW;
        } else {
          pcb->flags |= TF_ACK_DELAY;
        }
      } else {
        // We get here if the incoming segment is out-of-sequence.
        pcb->flags |= TF_ACK_NOW;
        //kprintf("tcp_receive: out-of-order segment received\n");

        // We queue the segment on the ->ooseq queue
        if (pcb->ooseq == NULL) {
          pcb->ooseq = tcp_seg_copy(seg);
        } else {
          // If the queue is not empty, we walk through the queue and
          // try to find a place where the sequence number of the
          // incoming segment is between the sequence numbers of the
          // previous and the next segment on the ->ooseq queue. That is
          // the place where we put the incoming segment. If needed, we
          // trim the second edges of the previous and the incoming
          // segment so that it will fit into the sequence.
          //
          // If the incoming segment has the same sequence number as a
          // segment on the ->ooseq queue, we discard the segment that
          // contains less data.
          prev = NULL;
          for (next = pcb->ooseq; next != NULL; next = next->next) {
            if (seqno == next->tcphdr->seqno) {
              // The sequence number of the incoming segment is the
              // same as the sequence number of the segment on
              // ->ooseq. We check the lengths to see which one to
              // discard.
              if (seg->len > next->len) {
                // The incoming segment is larger than the old
                // segment. We replace the old segment with the new
                // one.
                cseg = tcp_seg_copy(seg);
                if (cseg != NULL) {
                  cseg->next = next->next;
                  if (prev != NULL) {
                    prev->next = cseg;
                  } else {
                    pcb->ooseq = cseg;
                  }
                }
                break;
              } else {
                // Either the lenghts are the same or the incoming
                // segment was smaller than the old one; in either
                // case, we ditch the incoming segment.
                break;
              }
            } else {
              if (prev == NULL) {
                if (TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {
                  // The sequence number of the incoming segment is lower
                  // than the sequence number of the first segment on the
                  // queue. We put the incoming segment first on the
                  // queue.
                  if (TCP_SEQ_GT(seqno + seg->len, next->tcphdr->seqno)) {
                    // We need to trim the incoming segment.
                    seg->len = next->tcphdr->seqno - seqno;
                    pbuf_realloc(seg->p, seg->len);
                  }
                  cseg = tcp_seg_copy(seg);
                  if (cseg != NULL) {
                    cseg->next = next;
                    pcb->ooseq = cseg;
                  }
                  break;
                }
              } else if (TCP_SEQ_LT(prev->tcphdr->seqno, seqno) && TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {
                // The sequence number of the incoming segment is in
                // between the sequence numbers of the previous and
                // the next segment on ->ooseq. We trim and insert the
                // incoming segment and trim the previous segment, if
                // needed.
                if (TCP_SEQ_GT(seqno + seg->len, next->tcphdr->seqno)) {
                  // We need to trim the incoming segment
                  seg->len = next->tcphdr->seqno - seqno;
                  pbuf_realloc(seg->p, seg->len);
                }

                cseg = tcp_seg_copy(seg);
                if (cseg != NULL) {
                  cseg->next = next;
                  prev->next = cseg;
                  if (TCP_SEQ_GT(prev->tcphdr->seqno + prev->len, seqno)) {
                    // We need to trim the prev segment
                    prev->len = seqno - prev->tcphdr->seqno;
                    pbuf_realloc(prev->p, prev->len);
                  }
                }
                break;
              }

              // If the "next" segment is the last segment on the
              // ooseq queue, we add the incoming segment to the end
              // of the list.
              if (next->next == NULL && TCP_SEQ_GT(seqno, next->tcphdr->seqno)) {
                next->next = tcp_seg_copy(seg);
                if (next->next != NULL) {
                  if (TCP_SEQ_GT(next->tcphdr->seqno + next->len, seqno)) {
                    // We need to trim the last segment
                    next->len = seqno - next->tcphdr->seqno;
                    pbuf_realloc(next->p, next->len);
                  }
                }
                break;
              }
            }
            prev = next;
          }    
        } 
      }
    }
  } else {
    // Segments with length 0 is taken care of here. Segments that
    // fall out of the window are ACKed
    if (TCP_SEQ_GT(pcb->rcv_nxt, seqno) || TCP_SEQ_GEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {
      pcb->flags |= TF_ACK_NOW;
    }
  }
}

//
// tcp_parseopt
//
// Parses the options contained in the incoming segment. (Code taken
// from uIP with only small changes.)
// 
//

static void tcp_parseopt(struct tcp_seg *seg, struct tcp_pcb *pcb) {
  unsigned char c;
  unsigned char *opts, opt;
  unsigned short mss;

  opts = (unsigned char *) (seg->tcphdr) + TCP_HLEN;
  
  // Parse the TCP MSS option, if present
  if ((TCPH_OFFSET(seg->tcphdr) & 0xf0) > 0x50) {
    for (c = 0; c < ((TCPH_OFFSET(seg->tcphdr) >> 4) - 5) << 2 ;) {
      opt = opts[c];
      if (opt == 0x00) {
        // End of options   
        break;
      } else if (opt == 0x01) {
        // NOP option
        c++;
      } else if (opt == 0x02 && opts[c + 1] == 0x04) {
        // An MSS option with the right option length       
        mss = (opts[c + 2] << 8) | opts[c + 3];
        pcb->mss = mss > TCP_MSS? TCP_MSS: mss;
        
        // And we are done processing options
        break;
      } else {
        if (opts[c + 1] == 0) {
          // If the length field is zero, the options are malformed
          // and we don't process them further
          break;
        }

        // All other options have a length field, so that we easily
        // can skip past them
        c += opts[c + 1];
      }      
    }
  }
}