Repository: wangyu-/udp2raw
Branch: unified
Commit: 4208db6e27c4
Files: 99
Total size: 1.5 MB

Directory structure:
gitextract_9hxphtn3/

├── .clang-format
├── .gitattributes
├── CMakeLists.txt
├── ISSUE_TEMPLATE.md
├── LICENSE.md
├── README.md
├── client.cpp
├── common.cpp
├── common.h
├── connection.cpp
├── connection.h
├── doc/
│   ├── README.zh-cn.md
│   ├── android_guide.md
│   ├── build_guide.md
│   ├── build_guide.zh-cn.md
│   ├── finalspeed_step_by_step/
│   │   └── 11
│   ├── finalspeed_step_by_step.md
│   ├── kcptun_step_by_step.md
│   └── openvpn_guide.md
├── encrypt.cpp
├── encrypt.h
├── example.conf
├── fd_manager.cpp
├── fd_manager.h
├── images/
│   ├── speedtest/
│   │   └── 111
│   └── wiki/
│       └── 111
├── lib/
│   ├── aes-common.h
│   ├── aes_acc/
│   │   ├── aesacc.c
│   │   ├── aesarm.c
│   │   ├── aesarm.h
│   │   ├── aesarm_table.h
│   │   ├── aesni.c
│   │   ├── aesni.h
│   │   └── asm/
│   │       ├── arm.S
│   │       ├── arm64.S
│   │       ├── arm_arch.h
│   │       ├── mips.S
│   │       ├── mips_be.S
│   │       ├── x64.S
│   │       └── x86.S
│   ├── aes_faster_c/
│   │   ├── aes.cpp
│   │   ├── aes.h
│   │   └── wrapper.cpp
│   ├── md5.cpp
│   ├── md5.h
│   ├── pbkdf2-sha1.cpp
│   ├── pbkdf2-sha1.h
│   ├── pbkdf2-sha256.cpp
│   └── pbkdf2-sha256.h
├── libev/
│   ├── CVS/
│   │   ├── Entries
│   │   ├── Repository
│   │   └── Root
│   ├── Changes
│   ├── LICENSE
│   ├── Makefile.am
│   ├── README
│   ├── README.embed
│   ├── Symbols.ev
│   ├── Symbols.event
│   ├── autogen.sh
│   ├── configure.ac
│   ├── ev++.h
│   ├── ev.3
│   ├── ev.c
│   ├── ev.h
│   ├── ev.pod
│   ├── ev_epoll.c
│   ├── ev_kqueue.c
│   ├── ev_poll.c
│   ├── ev_port.c
│   ├── ev_select.c
│   ├── ev_vars.h
│   ├── ev_win32.c
│   ├── ev_wrap.h
│   ├── event.c
│   ├── event.h
│   ├── event_compat.h
│   ├── import_libevent
│   ├── libev.m4
│   ├── update_ev_c
│   ├── update_ev_wrap
│   └── update_symbols
├── log.cpp
├── log.h
├── main.cpp
├── makefile
├── misc.cpp
├── misc.h
├── my_ev.cpp
├── my_ev.h
├── my_ev_common.h
├── network.cpp
├── network.h
├── pcap_wrapper.cpp
├── pcap_wrapper.h
├── server.cpp
└── third-party/
    ├── luci-app-udp2raw/
    │   └── moved_to_new_repo
    ├── udp2raw-cmake-makefile/
    │   └── CMakeLists.txt
    └── udp2raw-openwrt-makefile/
        └── moved_to_new_repo

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
SortIncludes: false
BasedOnStyle: Google
ColumnLimit: 0
IndentWidth: 4


================================================
FILE: .gitattributes
================================================
lib/aes_acc/asm/* linguist-vendored


================================================
FILE: CMakeLists.txt
================================================
#note: experimental
#      currently only used for generating `compile_commands.json` for clangd.
#      to build this project, it's suggested to use `makefile` instead

cmake_minimum_required(VERSION 3.7)
project(udp2raw)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

set(CMAKE_CXX_STANDARD 11)

set(SOURCE_FILES
        main.cpp 
        lib/md5.cpp
        lib/pbkdf2-sha1.cpp
        lib/pbkdf2-sha256.cpp
        encrypt.cpp
        log.cpp
        network.cpp
        common.cpp
        connection.cpp
        misc.cpp
        fd_manager.cpp
        client.cpp
        server.cpp
        lib/aes_faster_c/aes.cpp
        lib/aes_faster_c/wrapper.cpp
        my_ev.cpp
)
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-parameter -Wno-missing-field-initializers -O2 -g -fsanitize=address,undefined")

add_executable(udp2raw ${SOURCE_FILES})
target_link_libraries(udp2raw rt)
target_link_libraries(udp2raw pthread)
include_directories(SYSTEM "libev")
include_directories(".")


================================================
FILE: ISSUE_TEMPLATE.md
================================================
English Only.


================================================
FILE: LICENSE.md
================================================
MIT License

Copyright (c) 2017 Yu Wang (wangyucn at gmail.com)

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# Udp2raw-tunnel


A Tunnel which turns UDP Traffic into Encrypted FakeTCP/UDP/ICMP Traffic by using Raw Socket, helps you Bypass UDP FireWalls(or Unstable UDP Environment).

When used alone,udp2raw tunnels only UDP traffic. Nevertheless,if you used udp2raw + any UDP-based VPN together,you can tunnel any traffic(include TCP/UDP/ICMP),currently OpenVPN/L2TP/ShadowVPN and [tinyfecVPN](https://github.com/wangyu-/tinyfecVPN) are confirmed to be supported.


![image0](images/image0.PNG)

or

![image_vpn](images/udp2rawopenvpn.PNG)

[udp2raw wiki](https://github.com/wangyu-/udp2raw-tunnel/wiki)

[简体中文](/doc/README.zh-cn.md)


# Support Platforms
Linux host (including desktop Linux,Android phone/tablet,OpenWRT router,or Raspberry PI) with root account or cap_net_raw capability.

For Windows and MacOS users, use the udp2raw in [this repo](https://github.com/wangyu-/udp2raw-multiplatform).

# Features
### Send/Receive UDP Packets with ICMP/FakeTCP/UDP headers
ICMP/FakeTCP headers help you bypass UDP blocking, UDP QOS or improper UDP NAT behavior on some ISPs. In ICMP header mode,udp2raw works like an ICMP tunnel.

UDP headers are also supported. In UDP header mode, it behaves just like a normal UDP tunnel, and you can just make use of the other features (such as encryption, anti-replay, or connection stabilization).

### Simulated TCP with Real-time/Out-of-Order Delivery
In FakeTCP header mode,udp2raw simulates 3-way handshake while establishing a connection,simulates seq and ack_seq while data transferring. It also simulates a few TCP options such as: `MSS`, `sackOk`, `TS`, `TS_ack`, `wscale`. Firewalls will regard FakeTCP as a TCP connection, but its essentially UDP: it supports real-time/out-of-order delivery(just as normal UDP does), no congestion control or re-transmission. So there wont be any TCP over TCP problem when using OpenVPN.

### Encryption, Anti-Replay
* Encrypt your traffic with AES-128-CBC.
* Protect data integrity by HMAC-SHA1 (or weaker MD5/CRC32).
* Defense replay attack with anti-replay window.

[Notes on encryption](https://github.com/wangyu-/udp2raw-tunnel/wiki/Notes-on-encryption)

### Failure Dectection & Stabilization (Connection Recovery)
Conection failures are detected by heartbeats. If timed-out, client will automatically change port number and reconnect. If reconnection is successful, the previous connection will be recovered, and all existing UDP conversations will stay vaild.

For example, if you use udp2raw + OpenVPN, OpenVPN won't lose connection after any reconnect, **even if network cable is re-plugged or WiFi access point is changed**.

### Other Features
* **Multiplexing** One client can handle multiple UDP connections, all of which share the same raw connection.

* **Multiple Clients** One server can have multiple clients.

* **NAT Support** All of the 3 modes work in NAT environments.

* **OpenVZ Support** Tested on BandwagonHost VPS.

* **Easy to Build** No dependencies.To cross-compile udp2raw,all you need to do is just to download a toolchain,modify makefile to point at the toolchain,run `make cross` then everything is done.(Note:Pre-compiled binaries for Desktop,RaspberryPi,Android,some Openwrt Routers are already included in [Releases](https://github.com/wangyu-/udp2raw-tunnel/releases))

### Keywords
`Bypass UDP QoS` `Bypass UDP Blocking` `Bypass OpenVPN TCP over TCP problem` `OpenVPN over ICMP` `UDP to ICMP tunnel` `UDP to TCP tunnel` `UDP over ICMP` `UDP over TCP`

# Getting Started
### Installing
Download binary release from https://github.com/wangyu-/udp2raw-tunnel/releases

### Running
Assume your UDP is blocked or being QOS-ed or just poorly supported. Assume your server ip is 44.55.66.77, you have a service listening on udp port 7777.

```bash
# Run at server side:
./udp2raw_amd64 -s -l0.0.0.0:4096 -r 127.0.0.1:7777    -k "passwd" --raw-mode faketcp -a

# Run at client side
./udp2raw_amd64 -c -l0.0.0.0:3333  -r44.55.66.77:4096  -k "passwd" --raw-mode faketcp -a
```
(The above commands need to be run as root. For better security, with some extra steps, you can run udp2raw as non-root. Check [this link](https://github.com/wangyu-/udp2raw-tunnel/wiki/run-udp2raw-as-non-root) for more info  )

###### Server Output:
![](images/output_server.PNG)
###### Client Output:
![](images/output_client.PNG)

Now,an encrypted raw tunnel has been established between client and server through TCP port 4096. Connecting to UDP port 3333 at the client side is equivalent to connecting to port 7777 at the server side. No UDP traffic will be exposed.

### Note
To run on Android, check [Android_Guide](https://github.com/wangyu-/udp2raw/wiki/Android-Guide)

`-a` option automatically adds an iptables rule (or a few iptables rules) for you, udp2raw relies on this iptables rule to work stably. Be aware you dont forget `-a` (its a common mistake). If you dont want udp2raw to add iptables rule automatically, you can add it manually(take a look at `-g` option) and omit `-a`.


# Advanced Topic
### Usage
```
udp2raw-tunnel
git version:4623f878e0    build date:Nov  3 2024 23:15:46
repository: https://github.com/wangyu-/udp2raw-tunnel

usage:
    run as client : ./this_program -c -l local_listen_ip:local_port -r server_address:server_port  [options]
    run as server : ./this_program -s -l server_listen_ip:server_port -r remote_address:remote_port  [options]

common options,these options must be same on both side:
    --raw-mode            <string>        available values:faketcp(default),udp,icmp and easy-faketcp
    -k,--key              <string>        password to gen symetric key,default:"secret key"
    --cipher-mode         <string>        available values:aes128cfb,aes128cbc(default),xor,none
    --auth-mode           <string>        available values:hmac_sha1,md5(default),crc32,simple,none
    -a,--auto-rule                        auto add (and delete) iptables rule
    -g,--gen-rule                         generate iptables rule then exit,so that you can copy and
                                          add it manually.overrides -a
    --disable-anti-replay                 disable anti-replay,not suggested
    --fix-gro                             try to fix huge packet caused by GRO. this option is at an early stage.
                                          make sure client and server are at same version.
client options:
    --source-ip           <ip>            force source-ip for raw socket
    --source-port         <port>          force source-port for raw socket,tcp/udp only
                                          this option disables port changing while re-connecting
other options:
    --conf-file           <string>        read options from a configuration file instead of command line.
                                          check example.conf in repo for format
    --fifo                <string>        use a fifo(named pipe) for sending commands to the running program,
                                          check readme.md in repository for supported commands.
    --log-level           <number>        0:never    1:fatal   2:error   3:warn
                                          4:info (default)     5:debug   6:trace
    --log-position                        enable file name,function name,line number in log
    --disable-color                       disable log color
    --disable-bpf                         disable the kernel space filter,most time its not necessary
                                          unless you suspect there is a bug
    --dev                 <string>        bind raw socket to a device, not necessary but improves performance
    --sock-buf            <number>        buf size for socket,>=10 and <=10240,unit:kbyte,default:1024
    --force-sock-buf                      bypass system limitation while setting sock-buf
    --seq-mode            <number>        seq increase mode for faketcp:
                                          0:static header,do not increase seq and ack_seq
                                          1:increase seq for every packet,simply ack last seq
                                          2:increase seq randomly, about every 3 packets,simply ack last seq
                                          3:simulate an almost real seq/ack procedure(default)
                                          4:similiar to 3,but do not consider TCP Option Window_Scale,
                                          maybe useful when firewall doesnt support TCP Option
    --lower-level         <string>        send packets at OSI level 2, format:'if_name#dest_mac_adress'
                                          ie:'eth0#00:23:45:67:89:b9'.or try '--lower-level auto' to obtain
                                          the parameter automatically,specify it manually if 'auto' failed
    --wait-lock                           wait for xtables lock while invoking iptables, need iptables v1.4.20+
    --gen-add                             generate iptables rule and add it permanently,then exit.overrides -g
    --keep-rule                           monitor iptables and auto re-add if necessary.implys -a
    --hb-len              <number>        length of heart-beat packet, >=0 and <=1500
    --mtu-warn            <number>        mtu warning threshold, unit:byte, default:1375
    --clear                               clear any iptables rules added by this program.overrides everything
    --retry-on-error                      retry on error, allow to start udp2raw before network is initialized
    -h,--help                             print this help message
```

### Iptables rules,`-a` and `-g`
This program sends packets via raw socket. In FakeTCP mode, Linux kernel TCP packet processing has to be blocked by a iptables rule on both sides, otherwise the kernel will automatically send RST for an unrecongized TCP packet and you will sustain from stability / peformance problems. You can use `-a` option to let the program automatically add / delete iptables rule on start / exit. You can also use the `-g` option to generate iptables rule and add it manually.

### `--cipher-mode` and `--auth-mode`
It is suggested to use `aes128cbc` + `hmac_sha1` to obtain maximum security. If you want to run the program on a router, you can try `xor` + `simple`, which can fool packet inspection by firewalls the most of time, but it cannot protect you from serious attacks. Mode none is only for debugging purpose. It is not recommended to set the cipher-mode or auth-mode to none.

### `--seq-mode`
The FakeTCP mode does not behave 100% like a real tcp connection. ISPs may be able to distinguish the simulated tcp traffic from the real TCP traffic (though it's costly). seq-mode can help you change the seq increase behavior slightly. If you experience connection problems, try to change the value.

### `--lower-level`
`--lower-level` allows you to send packet at OSI level 2(link level),so that you can bypass any local iptables rules. If you have a complicated iptables rules which conflicts with udp2raw and you cant(or too lazy to) edit the iptables rules,`--lower-level` can be very useful. Try `--lower-level auto` to auto detect the parameters,you can specify it manually if `auto` fails.

Manual format `if_name#dest_mac_adress`,ie:`eth0#00:23:45:67:89:b9`.

### `--keep-rule`
Monitor iptables and auto re-add iptables rules(for blocking kernel tcp processing) if necessary.Especially useful when iptables rules may be cleared by other programs(for example,if you are using openwrt,everytime you changed and commited a setting,iptables rule may be cleared and re-constructed).

### `--conf-file`

You can also load options from a configuration file in order to keep secrets away from `ps` command.

For example, rewrite the options for the above `server` example (in Getting Started section) into configuration file:

`server.conf`

```
-s
# You can add comments like this
# Comments MUST occupy an entire line
# Or they will not work as expected
# Listen address
-l 0.0.0.0:4096
# Remote address
-r 127.0.0.1:7777
-a
-k passwd
--raw-mode faketcp
```

Pay attention to the `-k` parameter: In command line mode the quotes around the password will be removed by shell. In configuration files we do not remove quotes.

Then start the server with

```bash
./udp2raw_amd64 --conf-file server.conf
```

### `--fifo`
Use a fifo(named pipe) for sending commands to the running program. For example `--fifo fifo.file`.

At client side,you can use `echo reconnect >fifo.file` to force client to reconnect.Currently no command has been implemented for server.

# Peformance Test
#### Test method:
iperf3 TCP via OpenVPN + udp2raw
(iperf3 UDP mode is not used because of a bug mentioned in this issue: https://github.com/esnet/iperf/issues/296 . Instead, we package the TCP traffic into UDP by OpenVPN to test the performance. Read [Application](https://github.com/wangyu-/udp2raw-tunnel#application) for details.

#### iperf3 command:
```
iperf3 -c 10.222.2.1 -P40
iperf3 -c 10.222.2.1 -P40 -R
```
#### Environments
* **Client** Vultr $2.5/monthly plan (single core 2.4GHz cpu, 512MB RAM, Tokyo, Japan)
* **Server** BandwagonHost $3.99/annually plan (single core 2.0GHz cpu, 128MB RAM, Los Angeles, USA)

### Test1
raw_mode: faketcp  cipher_mode: xor  auth_mode: simple

![image4](images/image4.PNG)

(reverse speed was simliar and not uploaded)

### Test2
raw_mode: faketcp  cipher_mode: aes128cbc  auth_mode: md5

![image5](images/image5.PNG)

(reverse speed was simliar and not uploaded)

# wiki

Check wiki for more info:

https://github.com/wangyu-/udp2raw-tunnel/wiki


================================================
FILE: client.cpp
================================================
#include "common.h"
#include "network.h"
#include "connection.h"
#include "misc.h"
#include "log.h"
#include "lib/md5.h"
#include "encrypt.h"
#include "fd_manager.h"

#ifdef UDP2RAW_MP
u32_t detect_interval = 1500;
u64_t laste_detect_time = 0;

int use_udp_for_detection = 0;
int use_tcp_for_detection = 1;

extern pcap_t *pcap_handle;

extern int pcap_captured_full_len;
#endif

int client_on_timer(conn_info_t &conn_info)  // for client. called when a timer is ready in epoll
{
    packet_info_t &send_info = conn_info.raw_info.send_info;
    packet_info_t &recv_info = conn_info.raw_info.recv_info;
    raw_info_t &raw_info = conn_info.raw_info;
    conn_info.blob->conv_manager.c.clear_inactive();
    mylog(log_trace, "timer!\n");

    mylog(log_trace, "roller my %d,oppsite %d,%lld\n", int(conn_info.my_roller), int(conn_info.oppsite_roller), conn_info.last_oppsite_roller_time);

    mylog(log_trace, "<client_on_timer,send_info.ts_ack= %u>\n", send_info.ts_ack);

#ifdef UDP2RAW_MP
    // mylog(log_debug,"pcap cnt :%d\n",pcap_cnt);
    if (send_with_pcap && !pcap_header_captured) {
        if (get_current_time() - laste_detect_time > detect_interval) {
            laste_detect_time = get_current_time();
        } else {
            return 0;
        }
        /*
                        struct sockaddr_in remote_addr_in={0};

                        socklen_t slen = sizeof(sockaddr_in);
                        int port=get_true_random_number()%65534+1;
                        remote_addr_in.sin_family = AF_INET;
                        remote_addr_in.sin_port = htons(port);
                        remote_addr_in.sin_addr.s_addr = remote_ip_uint32;*/
        int port = get_true_random_number() % 65534 + 1;
        address_t tmp_addr = remote_addr;
        tmp_addr.set_port(port);

        if (use_udp_for_detection) {
            int new_udp_fd = socket(tmp_addr.get_type(), SOCK_DGRAM, IPPROTO_UDP);
            if (new_udp_fd < 0) {
                mylog(log_warn, "create new_udp_fd error\n");
                return -1;
            }
            setnonblocking(new_udp_fd);
            u64_t tmp = get_true_random_number();

            int ret = sendto(new_udp_fd, (char *)(&tmp), sizeof(tmp), 0, (struct sockaddr *)&tmp_addr.inner, tmp_addr.get_len());
            if (ret == -1) {
                mylog(log_warn, "sendto() failed\n");
            }
            sock_close(new_udp_fd);
        }

        if (use_tcp_for_detection) {
            static int last_tcp_fd = -1;

            int new_tcp_fd = socket(tmp_addr.get_type(), SOCK_STREAM, IPPROTO_TCP);
            if (new_tcp_fd < 0) {
                mylog(log_warn, "create new_tcp_fd error\n");
                return -1;
            }
            setnonblocking(new_tcp_fd);
            connect(new_tcp_fd, (struct sockaddr *)&tmp_addr.inner, tmp_addr.get_len());
            if (last_tcp_fd != -1)
                sock_close(last_tcp_fd);
            last_tcp_fd = new_tcp_fd;
            // close(new_tcp_fd);
        }

        mylog(log_info, "waiting for a use-able packet to be captured\n");

        return 0;
    }
#endif
    if (raw_info.disabled) {
        conn_info.state.client_current_state = client_idle;
        conn_info.my_id = get_true_random_number_nz();

        mylog(log_info, "state back to client_idle\n");
    }

    if (conn_info.state.client_current_state == client_idle) {
        raw_info.rst_received = 0;
        raw_info.disabled = 0;

        fail_time_counter++;
        if (max_fail_time > 0 && fail_time_counter > max_fail_time) {
            mylog(log_fatal, "max_fail_time exceed\n");
            myexit(-1);
        }

        conn_info.blob->anti_replay.re_init();
        conn_info.my_id = get_true_random_number_nz();  /// todo no need to do this everytime

        address_t tmp_addr;
        // u32_t new_ip=0;
        if (!force_source_ip) {
            if (get_src_adress2(tmp_addr, remote_addr) != 0) {
                mylog(log_warn, "get_src_adress() failed\n");
                return -1;
            }
            // source_addr=new_addr;
            // source_addr.set_port(0);

            mylog(log_info, "source_addr is now %s\n", tmp_addr.get_ip());

            /*
            if(new_ip!=source_ip_uint32)
            {
                    mylog(log_info,"source ip changed from %s to ",my_ntoa(source_ip_uint32));
                    log_bare(log_info,"%s\n",my_ntoa(new_ip));
                    source_ip_uint32=new_ip;
                    send_info.src_ip=new_ip;
            }*/

        } else {
            tmp_addr = source_addr;
        }

        send_info.new_src_ip.from_address_t(tmp_addr);

        if (force_source_port == 0) {
            send_info.src_port = client_bind_to_a_new_port2(bind_fd, tmp_addr);
        } else {
            send_info.src_port = source_port;
        }

        if (raw_mode == mode_icmp) {
            send_info.dst_port = send_info.src_port;
        }

        mylog(log_info, "using port %d\n", send_info.src_port);
        init_filter(send_info.src_port);

        if (raw_mode == mode_icmp || raw_mode == mode_udp) {
            conn_info.state.client_current_state = client_handshake1;

            mylog(log_info, "state changed from client_idle to client_pre_handshake\n");
        }
        if (raw_mode == mode_faketcp) {
            if (use_tcp_dummy_socket) {
                setnonblocking(bind_fd);
                int ret = connect(bind_fd, (struct sockaddr *)&remote_addr.inner, remote_addr.get_len());
                mylog(log_debug, "ret=%d,errno=%s, %d %s\n", ret, get_sock_error(), bind_fd, remote_addr.get_str());
                // mylog(log_info,"ret=%d,errno=,%d %s\n",ret,bind_fd,remote_addr.get_str());
                conn_info.state.client_current_state = client_tcp_handshake_dummy;
                mylog(log_info, "state changed from client_idle to client_tcp_handshake_dummy\n");
            } else {
                conn_info.state.client_current_state = client_tcp_handshake;
                mylog(log_info, "state changed from client_idle to client_tcp_handshake\n");
            }
        }
        conn_info.last_state_time = get_current_time();
        conn_info.last_hb_sent_time = 0;
        // dont return;
    }
    if (conn_info.state.client_current_state == client_tcp_handshake)  // send and resend syn
    {
        assert(raw_mode == mode_faketcp);
        if (get_current_time() - conn_info.last_state_time > client_handshake_timeout) {
            conn_info.state.client_current_state = client_idle;
            mylog(log_info, "state back to client_idle from client_tcp_handshake\n");
            return 0;

        } else if (get_current_time() - conn_info.last_hb_sent_time > client_retry_interval) {
            if (raw_mode == mode_faketcp) {
                if (conn_info.last_hb_sent_time == 0) {
                    send_info.psh = 0;
                    send_info.syn = 1;
                    send_info.ack = 0;
                    send_info.ts_ack = 0;
                    send_info.seq = get_true_random_number();
                    send_info.ack_seq = get_true_random_number();
                }
            }

            send_raw0(raw_info, 0, 0);

            conn_info.last_hb_sent_time = get_current_time();
            mylog(log_info, "(re)sent tcp syn\n");
            return 0;
        } else {
            return 0;
        }
        return 0;
    } else if (conn_info.state.client_current_state == client_tcp_handshake_dummy) {
        assert(raw_mode == mode_faketcp);
        if (get_current_time() - conn_info.last_state_time > client_handshake_timeout) {
            conn_info.state.client_current_state = client_idle;
            mylog(log_info, "state back to client_idle from client_tcp_handshake_dummy\n");
            return 0;
        }
    } else if (conn_info.state.client_current_state == client_handshake1)  // send and resend handshake1
    {
        if (get_current_time() - conn_info.last_state_time > client_handshake_timeout) {
            conn_info.state.client_current_state = client_idle;
            mylog(log_info, "state back to client_idle from client_handshake1\n");
            return 0;

        } else if (get_current_time() - conn_info.last_hb_sent_time > client_retry_interval) {
            if (raw_mode == mode_faketcp) {
                if (conn_info.last_hb_sent_time == 0) {
                    send_info.seq++;
                    send_info.ack_seq = recv_info.seq + 1;
                    send_info.ts_ack = recv_info.ts;
                    raw_info.reserved_send_seq = send_info.seq;
                }
                send_info.seq = raw_info.reserved_send_seq;
                send_info.psh = 0;
                send_info.syn = 0;
                send_info.ack = 1;

                if (!use_tcp_dummy_socket)
                    send_raw0(raw_info, 0, 0);

                send_handshake(raw_info, conn_info.my_id, 0, const_id);

                send_info.seq += raw_info.send_info.data_len;
            } else {
                send_handshake(raw_info, conn_info.my_id, 0, const_id);
                if (raw_mode == mode_icmp)
                    send_info.my_icmp_seq++;
            }

            conn_info.last_hb_sent_time = get_current_time();
            mylog(log_info, "(re)sent handshake1\n");
            return 0;
        } else {
            return 0;
        }
        return 0;
    } else if (conn_info.state.client_current_state == client_handshake2) {
        if (get_current_time() - conn_info.last_state_time > client_handshake_timeout) {
            conn_info.state.client_current_state = client_idle;
            mylog(log_info, "state back to client_idle from client_handshake2\n");
            return 0;
        } else if (get_current_time() - conn_info.last_hb_sent_time > client_retry_interval) {
            if (raw_mode == mode_faketcp) {
                if (conn_info.last_hb_sent_time == 0) {
                    send_info.ack_seq = recv_info.seq + raw_info.recv_info.data_len;
                    send_info.ts_ack = recv_info.ts;
                    raw_info.reserved_send_seq = send_info.seq;
                }
                send_info.seq = raw_info.reserved_send_seq;
                send_handshake(raw_info, conn_info.my_id, conn_info.oppsite_id, const_id);
                send_info.seq += raw_info.send_info.data_len;

            } else {
                send_handshake(raw_info, conn_info.my_id, conn_info.oppsite_id, const_id);
                if (raw_mode == mode_icmp)
                    send_info.my_icmp_seq++;
            }
            conn_info.last_hb_sent_time = get_current_time();
            mylog(log_info, "(re)sent handshake2\n");
            return 0;

        } else {
            return 0;
        }
        return 0;
    } else if (conn_info.state.client_current_state == client_ready) {
        fail_time_counter = 0;
        mylog(log_trace, "time %llu,%llu\n", get_current_time(), conn_info.last_state_time);

        if (get_current_time() - conn_info.last_hb_recv_time > client_conn_timeout) {
            conn_info.state.client_current_state = client_idle;
            conn_info.my_id = get_true_random_number_nz();
            mylog(log_info, "state back to client_idle from  client_ready bc of server-->client direction timeout\n");
            return 0;
        }

        if (get_current_time() - conn_info.last_oppsite_roller_time > client_conn_uplink_timeout) {
            conn_info.state.client_current_state = client_idle;
            conn_info.my_id = get_true_random_number_nz();
            mylog(log_info, "state back to client_idle from  client_ready bc of client-->server direction timeout\n");
        }

        if (get_current_time() - conn_info.last_hb_sent_time < heartbeat_interval) {
            return 0;
        }

        mylog(log_debug, "heartbeat sent <%x,%x>\n", conn_info.oppsite_id, conn_info.my_id);

        if (hb_mode == 0)
            send_safer(conn_info, 'h', hb_buf, 0);  /////////////send
        else
            send_safer(conn_info, 'h', hb_buf, hb_len);
        conn_info.last_hb_sent_time = get_current_time();
        return 0;
    } else {
        mylog(log_fatal, "unknown state,this shouldnt happen.\n");
        myexit(-1);
    }
    return 0;
}
int client_on_raw_recv_hs2_or_ready(conn_info_t &conn_info, char type, char *data, int data_len) {
    packet_info_t &send_info = conn_info.raw_info.send_info;
    packet_info_t &recv_info = conn_info.raw_info.recv_info;

    if (!recv_info.new_src_ip.equal(send_info.new_dst_ip) || recv_info.src_port != send_info.dst_port) {
        mylog(log_warn, "unexpected adress %s %s %d %d,this shouldnt happen.\n", recv_info.new_src_ip.get_str1(), send_info.new_dst_ip.get_str2(), recv_info.src_port, send_info.dst_port);
        return -1;
    }

    if (conn_info.state.client_current_state == client_handshake2) {
        mylog(log_info, "changed state from to client_handshake2 to client_ready\n");
        conn_info.state.client_current_state = client_ready;
        conn_info.last_hb_sent_time = 0;
        conn_info.last_hb_recv_time = get_current_time();
        conn_info.last_oppsite_roller_time = conn_info.last_hb_recv_time;
        client_on_timer(conn_info);
    }
    if (data_len >= 0 && type == 'h') {
        mylog(log_debug, "[hb]heart beat received,oppsite_roller=%d\n", int(conn_info.oppsite_roller));
        conn_info.last_hb_recv_time = get_current_time();
        return 0;
    } else if (data_len >= int(sizeof(u32_t)) && type == 'd') {
        mylog(log_trace, "received a data from fake tcp,len:%d\n", data_len);

        if (hb_mode == 0)
            conn_info.last_hb_recv_time = get_current_time();

        u32_t tmp_conv_id;
        memcpy(&tmp_conv_id, &data[0], sizeof(tmp_conv_id));
        tmp_conv_id = ntohl(tmp_conv_id);

        if (!conn_info.blob->conv_manager.c.is_conv_used(tmp_conv_id)) {
            mylog(log_info, "unknow conv %d,ignore\n", tmp_conv_id);
            return 0;
        }

        conn_info.blob->conv_manager.c.update_active_time(tmp_conv_id);

        // u64_t u64=conn_info.blob->conv_manager.c.find_data_by_conv(tmp_conv_id);
        address_t tmp_addr = conn_info.blob->conv_manager.c.find_data_by_conv(tmp_conv_id);

        // sockaddr_in tmp_sockaddr={0};

        // tmp_sockaddr.sin_family = AF_INET;
        // tmp_sockaddr.sin_addr.s_addr=(u64>>32u);

        // tmp_sockaddr.sin_port= htons(uint16_t((u64<<32u)>>32u));

        int ret = sendto(udp_fd, data + sizeof(u32_t), data_len - (sizeof(u32_t)), 0, (struct sockaddr *)&tmp_addr.inner, tmp_addr.get_len());

        if (ret < 0) {
            mylog(log_warn, "sento returned %d,%s,%02x,%s\n", ret, get_sock_error(), int(tmp_addr.get_type()), tmp_addr.get_str());
            // perror("ret<0");
        }
    } else {
        mylog(log_warn, "unknown packet,this shouldnt happen.\n");
        return -1;
    }
    return 0;
}
int client_on_raw_recv(conn_info_t &conn_info)  // called when raw fd received a packet.
{
    char *data;
    int data_len;
    packet_info_t &send_info = conn_info.raw_info.send_info;
    packet_info_t &recv_info = conn_info.raw_info.recv_info;

    raw_info_t &raw_info = conn_info.raw_info;

    mylog(log_trace, "<client_on_raw_recv,send_info.ts_ack= %u>\n", send_info.ts_ack);

#ifdef UDP2RAW_LINUX
    if (pre_recv_raw_packet() < 0) return -1;
#endif

    if (conn_info.state.client_current_state == client_idle) {
        discard_raw_packet();
        // recv(raw_recv_fd, 0,0, 0  );
    } else if (conn_info.state.client_current_state == client_tcp_handshake || conn_info.state.client_current_state == client_tcp_handshake_dummy)  // received syn ack
    {
        assert(raw_mode == mode_faketcp);
        if (recv_raw0(raw_info, data, data_len) < 0) {
            return -1;
        }
        if (data_len >= max_data_len + 1) {
            mylog(log_debug, "data_len=%d >= max_data_len+1,ignored", data_len);
            return -1;
        }
        if (!recv_info.new_src_ip.equal(send_info.new_dst_ip) || recv_info.src_port != send_info.dst_port) {
            mylog(log_debug, "unexpected adress %s %s %d %d\n", recv_info.new_src_ip.get_str1(), send_info.new_dst_ip.get_str2(), recv_info.src_port, send_info.dst_port);
            return -1;
        }
        if (data_len == 0 && raw_info.recv_info.syn == 1 && raw_info.recv_info.ack == 1) {
            if (conn_info.state.client_current_state == client_tcp_handshake) {
                if (recv_info.ack_seq != send_info.seq + 1) {
                    mylog(log_debug, "seq ack_seq mis match\n");
                    return -1;
                }
                mylog(log_info, "state changed from client_tcp_handshake to client_handshake1\n");
            } else {
                send_info.seq = recv_info.ack_seq - 1;
                mylog(log_info, "state changed from client_tcp_dummy to client_handshake1\n");
                // send_info.ack_seq=recv_info.seq+1;
            }
            conn_info.state.client_current_state = client_handshake1;

            conn_info.last_state_time = get_current_time();
            conn_info.last_hb_sent_time = 0;
            client_on_timer(conn_info);
            return 0;
        } else {
            mylog(log_debug, "unexpected packet type,expected:syn ack\n");
            return -1;
        }
    } else if (conn_info.state.client_current_state == client_handshake1)  // recevied respond of handshake1
    {
        if (recv_bare(raw_info, data, data_len) != 0) {
            mylog(log_debug, "recv_bare failed!\n");
            return -1;
        }
        if (!recv_info.new_src_ip.equal(send_info.new_dst_ip) || recv_info.src_port != send_info.dst_port) {
            mylog(log_debug, "unexpected adress %s %s %d %d\n", recv_info.new_src_ip.get_str1(), send_info.new_dst_ip.get_str2(), recv_info.src_port, send_info.dst_port);
            return -1;
        }
        if (data_len < int(3 * sizeof(my_id_t))) {
            mylog(log_debug, "too short to be a handshake\n");
            return -1;
        }
        my_id_t tmp_oppsite_id;
        memcpy(&tmp_oppsite_id, &data[0], sizeof(tmp_oppsite_id));
        tmp_oppsite_id = ntohl(tmp_oppsite_id);

        my_id_t tmp_my_id;
        memcpy(&tmp_my_id, &data[sizeof(my_id_t)], sizeof(tmp_my_id));
        tmp_my_id = ntohl(tmp_my_id);

        my_id_t tmp_oppsite_const_id;
        memcpy(&tmp_oppsite_const_id, &data[sizeof(my_id_t) * 2], sizeof(tmp_oppsite_const_id));
        tmp_oppsite_const_id = ntohl(tmp_oppsite_const_id);

        if (tmp_my_id != conn_info.my_id) {
            mylog(log_debug, "tmp_my_id doesnt match\n");
            return -1;
        }

        if (raw_mode == mode_faketcp) {
            if (recv_info.ack_seq != send_info.seq) {
                mylog(log_debug, "seq ack_seq mis match\n");
                return -1;
            }
            if (recv_info.seq != send_info.ack_seq) {
                mylog(log_debug, "seq ack_seq mis match\n");
                return -1;
            }
        }
        conn_info.oppsite_id = tmp_oppsite_id;

        mylog(log_info, "changed state from to client_handshake1 to client_handshake2,my_id is %x,oppsite id is %x\n", conn_info.my_id, conn_info.oppsite_id);

        conn_info.state.client_current_state = client_handshake2;
        conn_info.last_state_time = get_current_time();
        conn_info.last_hb_sent_time = 0;
        client_on_timer(conn_info);

        return 0;
    } else if (conn_info.state.client_current_state == client_handshake2 || conn_info.state.client_current_state == client_ready)  // received heartbeat or data
    {
        vector<char> type_vec;
        vector<string> data_vec;
        recv_safer_multi(conn_info, type_vec, data_vec);
        if (data_vec.empty()) {
            mylog(log_debug, "recv_safer failed!\n");
            return -1;
        }

        for (int i = 0; i < (int)type_vec.size(); i++) {
            char type = type_vec[i];
            char *data = (char *)data_vec[i].c_str();  // be careful, do not append data to it
            int data_len = data_vec[i].length();
            client_on_raw_recv_hs2_or_ready(conn_info, type, data, data_len);
        }

        return 0;
    } else {
        mylog(log_fatal, "unknown state,this shouldnt happen.\n");
        myexit(-1);
    }
    return 0;
}
int client_on_udp_recv(conn_info_t &conn_info) {
    int recv_len;
    char buf[buf_len];
    address_t::storage_t udp_new_addr_in = {{0}};
    socklen_t udp_new_addr_len = sizeof(address_t::storage_t);
    if ((recv_len = recvfrom(udp_fd, buf, max_data_len + 1, 0,
                             (struct sockaddr *)&udp_new_addr_in, &udp_new_addr_len)) == -1) {
        mylog(log_debug, "recv_from error,%s\n", get_sock_error());
        return -1;
        // myexit(1);
    };

    if (recv_len == max_data_len + 1) {
        mylog(log_warn, "huge packet, data_len > %d,dropped\n", max_data_len);
        return -1;
    }

    if (recv_len >= mtu_warn) {
        mylog(log_warn, "huge packet,data len=%d (>=%d).strongly suggested to set a smaller mtu at upper level,to get rid of this warn\n ", recv_len, mtu_warn);
    }

    address_t tmp_addr;
    tmp_addr.from_sockaddr((sockaddr *)&udp_new_addr_in, udp_new_addr_len);
    u32_t conv;

    if (!conn_info.blob->conv_manager.c.is_data_used(tmp_addr)) {
        if (conn_info.blob->conv_manager.c.get_size() >= max_conv_num) {
            mylog(log_warn, "ignored new udp connect bc max_conv_num exceed\n");
            return -1;
        }
        conv = conn_info.blob->conv_manager.c.get_new_conv();
        conn_info.blob->conv_manager.c.insert_conv(conv, tmp_addr);
        mylog(log_info, "new packet from %s,conv_id=%x\n", tmp_addr.get_str(), conv);
    } else {
        conv = conn_info.blob->conv_manager.c.find_conv_by_data(tmp_addr);
    }

    conn_info.blob->conv_manager.c.update_active_time(conv);

    if (conn_info.state.client_current_state == client_ready) {
        send_data_safer(conn_info, buf, recv_len, conv);
    }
    return 0;
}
void udp_accept_cb(struct ev_loop *loop, struct ev_io *watcher, int revents) {
    conn_info_t &conn_info = *((conn_info_t *)watcher->data);
    client_on_udp_recv(conn_info);
}
void raw_recv_cb(struct ev_loop *loop, struct ev_io *watcher, int revents) {
    if (is_udp2raw_mp) assert(0 == 1);
    conn_info_t &conn_info = *((conn_info_t *)watcher->data);
    client_on_raw_recv(conn_info);
}
#ifdef UDP2RAW_MP
void async_cb(struct ev_loop *loop, struct ev_async *watcher, int revents) {
    conn_info_t &conn_info = *((conn_info_t *)watcher->data);

    if (send_with_pcap && !pcap_header_captured) {
        int empty = 0;
        char *p;
        int len;
        pthread_mutex_lock(&queue_mutex);
        empty = my_queue.empty();
        if (!empty) {
            my_queue.peek_front(p, len);
            my_queue.pop_front();
        }
        pthread_mutex_unlock(&queue_mutex);
        if (empty) return;

        pcap_header_captured = 1;
        assert(pcap_link_header_len != -1);
        memcpy(pcap_header_buf, p, max_data_len);

        log_bare(log_info, "link level header captured:\n");
        unsigned char *tmp = (unsigned char *)pcap_header_buf;
        pcap_captured_full_len = len;
        for (int i = 0; i < pcap_link_header_len; i++)
            log_bare(log_info, "<%x>", (u32_t)tmp[i]);

        log_bare(log_info, "\n");
        return;
    }

    // mylog(log_info,"async_cb called\n");
    while (1) {
        int empty = 0;
        char *p;
        int len;
        pthread_mutex_lock(&queue_mutex);
        empty = my_queue.empty();
        if (!empty) {
            my_queue.peek_front(p, len);
            my_queue.pop_front();
        }
        pthread_mutex_unlock(&queue_mutex);

        if (empty) break;
        if (g_fix_gro == 0 && len > max_data_len) {
            mylog(log_warn, "huge packet %d > %d, dropped. maybe you need to turn down mtu at upper level, or maybe you need the --fix-gro option\n", len, max_data_len);
            break;
        }

        int new_len = len - pcap_link_header_len;
        memcpy(g_packet_buf, p + pcap_link_header_len, new_len);
        g_packet_buf_len = new_len;
        assert(g_packet_buf_cnt == 0);
        g_packet_buf_cnt++;
        client_on_raw_recv(conn_info);
    }
}
#endif
void clear_timer_cb(struct ev_loop *loop, struct ev_timer *watcher, int revents) {
    conn_info_t &conn_info = *((conn_info_t *)watcher->data);
    client_on_timer(conn_info);
}
void fifo_cb(struct ev_loop *loop, struct ev_io *watcher, int revents) {
    conn_info_t &conn_info = *((conn_info_t *)watcher->data);

    char buf[buf_len];
    int fifo_fd = watcher->fd;

    int len = read(fifo_fd, buf, sizeof(buf));
    if (len < 0) {
        mylog(log_warn, "fifo read failed len=%d,errno=%s\n", len, get_sock_error());
        return;
    }
    buf[len] = 0;
    while (len >= 1 && buf[len - 1] == '\n')
        buf[len - 1] = 0;
    mylog(log_info, "got data from fifo,len=%d,s=[%s]\n", len, buf);
    if (strcmp(buf, "reconnect") == 0) {
        mylog(log_info, "received command: reconnect\n");
        conn_info.state.client_current_state = client_idle;
        conn_info.my_id = get_true_random_number_nz();
    } else {
        mylog(log_info, "unknown command\n");
    }
}
int client_event_loop() {
    char buf[buf_len];

    conn_info_t conn_info;
    conn_info.my_id = get_true_random_number_nz();

    conn_info.prepare();
    packet_info_t &send_info = conn_info.raw_info.send_info;
    packet_info_t &recv_info = conn_info.raw_info.recv_info;

#ifdef UDP2RAW_LINUX
    if (lower_level) {
        if (lower_level_manual) {
            int index;
            init_ifindex(if_name, raw_send_fd, index);
            // init_ifindex(if_name);
            memset(&send_info.addr_ll, 0, sizeof(send_info.addr_ll));
            send_info.addr_ll.sll_family = AF_PACKET;
            send_info.addr_ll.sll_ifindex = index;
            send_info.addr_ll.sll_halen = ETHER_ADDR_LEN;
            send_info.addr_ll.sll_protocol = htons(ETH_P_IP);
            memcpy(&send_info.addr_ll.sll_addr, dest_hw_addr, ETHER_ADDR_LEN);
            mylog(log_info, "we are running at lower-level (manual) mode\n");
        } else {
            u32_t dest_ip;
            string if_name_string;
            string hw_string;
            assert(remote_addr.get_type() == AF_INET);

            if (retry_on_error == 0) {
                if (find_lower_level_info(remote_addr.inner.ipv4.sin_addr.s_addr, dest_ip, if_name_string, hw_string) != 0) {
                    mylog(log_fatal, "auto detect lower-level info failed for %s,specific it manually\n", remote_addr.get_ip());
                    myexit(-1);
                }
            } else {
                int ok = 0;
                while (!ok) {
                    if (find_lower_level_info(remote_addr.inner.ipv4.sin_addr.s_addr, dest_ip, if_name_string, hw_string) != 0) {
                        mylog(log_warn, "auto detect lower-level info failed for %s,retry in %d seconds\n", remote_addr.get_ip(), retry_on_error_interval);
                        sleep(retry_on_error_interval);
                    } else {
                        ok = 1;
                    }
                }
            }
            mylog(log_info, "we are running at lower-level (auto) mode,%s %s %s\n", my_ntoa(dest_ip), if_name_string.c_str(), hw_string.c_str());

            u32_t hw[6];
            memset(hw, 0, sizeof(hw));
            sscanf(hw_string.c_str(), "%x:%x:%x:%x:%x:%x", &hw[0], &hw[1], &hw[2],
                   &hw[3], &hw[4], &hw[5]);

            mylog(log_warn,
                  "make sure this is correct:   if_name=<%s>  dest_mac_adress=<%02x:%02x:%02x:%02x:%02x:%02x>  \n",
                  if_name_string.c_str(), hw[0], hw[1], hw[2], hw[3], hw[4], hw[5]);
            for (int i = 0; i < 6; i++) {
                dest_hw_addr[i] = uint8_t(hw[i]);
            }

            // mylog(log_fatal,"--lower-level auto for client hasnt been implemented\n");
            int index;
            init_ifindex(if_name_string.c_str(), raw_send_fd, index);

            memset(&send_info.addr_ll, 0, sizeof(send_info.addr_ll));
            send_info.addr_ll.sll_family = AF_PACKET;
            send_info.addr_ll.sll_ifindex = index;
            send_info.addr_ll.sll_halen = ETHER_ADDR_LEN;
            send_info.addr_ll.sll_protocol = htons(ETH_P_IP);
            memcpy(&send_info.addr_ll.sll_addr, dest_hw_addr, ETHER_ADDR_LEN);
            // mylog(log_info,"we are running at lower-level (manual) mode\n");
        }
    }
#endif

#ifdef UDP2RAW_MP

    address_t tmp_addr;
    if (get_src_adress2(tmp_addr, remote_addr) != 0) {
        mylog(log_error, "get_src_adress() failed\n");
        myexit(-1);
    }
    if (strcmp(dev, "") == 0) {
        mylog(log_info, "--dev have not been set, trying to detect automatically, available devices:\n");

        mylog(log_info, "available device(device name: ip address ; description):\n");

        char errbuf[PCAP_ERRBUF_SIZE];

        int found = 0;

        pcap_if_t *interfaces, *d;
        if (pcap_findalldevs(&interfaces, errbuf) == -1) {
            mylog(log_fatal, "error in pcap_findalldevs(),%s\n", errbuf);
            myexit(-1);
        }

        for (pcap_if_t *d = interfaces; d != NULL; d = d->next) {
            log_bare(log_warn, "%s:", d->name);
            int cnt = 0;
            for (pcap_addr_t *a = d->addresses; a != NULL; a = a->next) {
                if (a->addr == NULL) {
                    log_bare(log_debug, " [a->addr==NULL]");
                    continue;
                }
                if (a->addr->sa_family == AF_INET || a->addr->sa_family == AF_INET6) {
                    cnt++;

                    if (a->addr->sa_family == AF_INET) {
                        char s[max_addr_len];
                        inet_ntop(AF_INET, &((struct sockaddr_in *)a->addr)->sin_addr, s, max_addr_len);
                        log_bare(log_warn, " [%s]", s);

                        if (a->addr->sa_family == raw_ip_version) {
                            if (((struct sockaddr_in *)a->addr)->sin_addr.s_addr == tmp_addr.inner.ipv4.sin_addr.s_addr) {
                                found++;
                                strcpy(dev, d->name);
                            }
                        }
                    } else {
                        assert(a->addr->sa_family == AF_INET6);

                        char s[max_addr_len];
                        inet_ntop(AF_INET6, &((struct sockaddr_in6 *)a->addr)->sin6_addr, s, max_addr_len);
                        log_bare(log_warn, " [%s]", s);

                        if (a->addr->sa_family == raw_ip_version) {
                            if (memcmp(&((struct sockaddr_in6 *)a->addr)->sin6_addr, &tmp_addr.inner.ipv6.sin6_addr, sizeof(struct in6_addr)) == 0) {
                                found++;
                                strcpy(dev, d->name);
                            }
                        }
                    }
                } else {
                    log_bare(log_debug, " [unknow:%d]", int(a->addr->sa_family));
                }
            }
            if (cnt == 0) log_bare(log_warn, " [no ip found]");
            if (d->description == 0) {
                log_bare(log_warn, "; (no description available)");
            } else {
                log_bare(log_warn, "; %s", d->description);
            }
            log_bare(log_warn, "\n");
        }

        if (found == 0) {
            mylog(log_fatal, "no matched device found for ip: [%s]\n", tmp_addr.get_ip());
            myexit(-1);
        } else if (found == 1) {
            mylog(log_info, "using device:[%s], ip: [%s]\n", dev, tmp_addr.get_ip());
        } else {
            mylog(log_fatal, "more than one devices found for ip: [%s] , you need to use --dev manually\n", tmp_addr.get_ip());
            myexit(-1);
        }
    } else {
        mylog(log_info, "--dev has been manually set, using device:[%s]\n", dev);
    }
#endif

    send_info.src_port = 0;
    memset(&send_info.new_src_ip, 0, sizeof(send_info.new_src_ip));

    int i, j, k;
    int ret;

    send_info.new_dst_ip.from_address_t(remote_addr);
    send_info.dst_port = remote_addr.get_port();

    udp_fd = socket(local_addr.get_type(), SOCK_DGRAM, IPPROTO_UDP);
    set_buf_size(udp_fd, socket_buf_size);

    if (::bind(udp_fd, (struct sockaddr *)&local_addr.inner, local_addr.get_len()) == -1) {
        mylog(log_fatal, "socket bind error\n");
        // perror("socket bind error");
        myexit(1);
    }
    setnonblocking(udp_fd);

    // epollfd = epoll_create1(0);

    // const int max_events = 4096;
    // struct epoll_event ev, events[max_events];
    // if (epollfd < 0) {
    //	mylog(log_fatal,"epoll return %d\n", epollfd);
    //	myexit(-1);
    // }

    struct ev_loop *loop = ev_default_loop(0);
    assert(loop != NULL);

    // ev.events = EPOLLIN;
    // ev.data.u64 = udp_fd;
    // ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, udp_fd, &ev);
    // if (ret!=0) {
    //	mylog(log_fatal,"add  udp_listen_fd error\n");
    //	myexit(-1);
    // }

    struct ev_io udp_accept_watcher;

    udp_accept_watcher.data = &conn_info;
    ev_io_init(&udp_accept_watcher, udp_accept_cb, udp_fd, EV_READ);
    ev_io_start(loop, &udp_accept_watcher);

    // ev.events = EPOLLIN;
    // ev.data.u64 = raw_recv_fd;

    // ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, raw_recv_fd, &ev);
    // if (ret!= 0) {
    //	mylog(log_fatal,"add raw_fd error\n");
    //	myexit(-1);
    // }

#ifdef UDP2RAW_LINUX
    struct ev_io raw_recv_watcher;

    raw_recv_watcher.data = &conn_info;
    ev_io_init(&raw_recv_watcher, raw_recv_cb, raw_recv_fd, EV_READ);
    ev_io_start(loop, &raw_recv_watcher);
#endif

#ifdef UDP2RAW_MP
    g_default_loop = loop;
    async_watcher.data = &conn_info;
    ev_async_init(&async_watcher, async_cb);
    ev_async_start(loop, &async_watcher);

    init_raw_socket();  // must be put after dev detection
#endif

    // set_timer(epollfd,timer_fd);
    struct ev_timer clear_timer;

    clear_timer.data = &conn_info;
    ev_timer_init(&clear_timer, clear_timer_cb, 0, timer_interval / 1000.0);
    ev_timer_start(loop, &clear_timer);

    mylog(log_debug, "send_raw : from %s %d  to %s %d\n", send_info.new_src_ip.get_str1(), send_info.src_port, send_info.new_dst_ip.get_str2(), send_info.dst_port);

    int fifo_fd = -1;

    struct ev_io fifo_watcher;
    fifo_watcher.data = &conn_info;

    if (fifo_file[0] != 0) {
        fifo_fd = create_fifo(fifo_file);

        ev_io_init(&fifo_watcher, fifo_cb, fifo_fd, EV_READ);
        ev_io_start(loop, &fifo_watcher);

        mylog(log_info, "fifo_file=%s\n", fifo_file);
    }

    ev_run(loop, 0);
    return 0;
}


================================================
FILE: common.cpp
================================================
/*
 * comm.cpp
 *
 *  Created on: Jul 29, 2017
 *      Author: wangyu
 */

#include "common.h"
#include "log.h"
#include "misc.h"

#include <random>
#include <cmath>

// static int random_number_fd=-1;
int force_socket_buf = 0;

int address_t::from_str(char *str) {
    clear();

    char ip_addr_str[100];
    u32_t port;
    mylog(log_info, "parsing address: %s\n", str);
    int is_ipv6 = 0;
    if (sscanf(str, "[%[^]]]:%u", ip_addr_str, &port) == 2) {
        mylog(log_info, "its an ipv6 adress\n");
        inner.ipv6.sin6_family = AF_INET6;
        is_ipv6 = 1;
    } else if (sscanf(str, "%[^:]:%u", ip_addr_str, &port) == 2) {
        mylog(log_info, "its an ipv4 adress\n");
        inner.ipv4.sin_family = AF_INET;
    } else {
        mylog(log_error, "failed to parse\n");
        myexit(-1);
    }

    mylog(log_info, "ip_address is {%s}, port is {%u}\n", ip_addr_str, port);

    if (port > 65535) {
        mylog(log_error, "invalid port: %d\n", port);
        myexit(-1);
    }

    int ret = -100;
    if (is_ipv6) {
        ret = inet_pton(AF_INET6, ip_addr_str, &(inner.ipv6.sin6_addr));
        inner.ipv6.sin6_port = htons(port);
        if (ret == 0)  // 0 if address type doesnt match
        {
            mylog(log_error, "ip_addr %s is not an ipv6 address, %d\n", ip_addr_str, ret);
            myexit(-1);
        } else if (ret == 1)  // inet_pton returns 1 on success
        {
            // okay
        } else {
            mylog(log_error, "ip_addr %s is invalid, %d\n", ip_addr_str, ret);
            myexit(-1);
        }
    } else {
        ret = inet_pton(AF_INET, ip_addr_str, &(inner.ipv4.sin_addr));
        inner.ipv4.sin_port = htons(port);

        if (ret == 0) {
            mylog(log_error, "ip_addr %s is not an ipv4 address, %d\n", ip_addr_str, ret);
            myexit(-1);
        } else if (ret == 1) {
            // okay
        } else {
            mylog(log_error, "ip_addr %s is invalid, %d\n", ip_addr_str, ret);
            myexit(-1);
        }
    }

    return 0;
}

int address_t::from_str_ip_only(char *str) {
    clear();

    u32_t type;

    if (strchr(str, ':') == NULL)
        type = AF_INET;
    else
        type = AF_INET6;

    ((sockaddr *)&inner)->sa_family = type;

    int ret;
    if (type == AF_INET) {
        ret = inet_pton(type, str, &inner.ipv4.sin_addr);
    } else {
        ret = inet_pton(type, str, &inner.ipv6.sin6_addr);
    }

    if (ret == 0)  // 0 if address type doesnt match
    {
        mylog(log_error, "confusion in parsing %s, %d\n", str, ret);
        myexit(-1);
    } else if (ret == 1)  // inet_pton returns 1 on success
    {
        // okay
    } else {
        mylog(log_error, "ip_addr %s is invalid, %d\n", str, ret);
        myexit(-1);
    }
    return 0;
}

char *address_t::get_str() {
    static char res[max_addr_len];
    to_str(res);
    return res;
}
void address_t::to_str(char *s) {
    // static char res[max_addr_len];
    char ip_addr[max_addr_len];
    u32_t port;
    const char *ret = 0;
    if (get_type() == AF_INET6) {
        ret = inet_ntop(AF_INET6, &inner.ipv6.sin6_addr, ip_addr, max_addr_len);
        port = inner.ipv6.sin6_port;
    } else if (get_type() == AF_INET) {
        ret = inet_ntop(AF_INET, &inner.ipv4.sin_addr, ip_addr, max_addr_len);
        port = inner.ipv4.sin_port;
    } else {
        assert(0 == 1);
    }

    if (ret == 0)  // NULL on failure
    {
        mylog(log_error, "inet_ntop failed\n");
        myexit(-1);
    }

    port = ntohs(port);

    ip_addr[max_addr_len - 1] = 0;
    if (get_type() == AF_INET6) {
        sprintf(s, "[%s]:%u", ip_addr, (u32_t)port);
    } else {
        sprintf(s, "%s:%u", ip_addr, (u32_t)port);
    }

    // return res;
}

char *address_t::get_ip() {
    char ip_addr[max_addr_len];
    static char s[max_addr_len];
    const char *ret = 0;
    if (get_type() == AF_INET6) {
        ret = inet_ntop(AF_INET6, &inner.ipv6.sin6_addr, ip_addr, max_addr_len);
    } else if (get_type() == AF_INET) {
        ret = inet_ntop(AF_INET, &inner.ipv4.sin_addr, ip_addr, max_addr_len);
    } else {
        assert(0 == 1);
    }

    if (ret == 0)  // NULL on failure
    {
        mylog(log_error, "inet_ntop failed\n");
        myexit(-1);
    }

    ip_addr[max_addr_len - 1] = 0;
    if (get_type() == AF_INET6) {
        sprintf(s, "%s", ip_addr);
    } else {
        sprintf(s, "%s", ip_addr);
    }

    return s;
}

int address_t::from_sockaddr(sockaddr *addr, socklen_t slen) {
    clear();
    // memset(&inner,0,sizeof(inner));
    if (addr->sa_family == AF_INET6) {
        assert(slen == sizeof(sockaddr_in6));
        // inner.ipv6= *( (sockaddr_in6*) addr );
        memcpy(&inner, addr, slen);
    } else if (addr->sa_family == AF_INET) {
        assert(slen == sizeof(sockaddr_in));
        // inner.ipv4= *( (sockaddr_in*) addr );
        memcpy(&inner, addr, slen);
    } else {
        assert(0 == 1);
    }
    return 0;
}

int address_t::new_connected_udp_fd() {
    int new_udp_fd;
    new_udp_fd = socket(get_type(), SOCK_DGRAM, IPPROTO_UDP);
    if (new_udp_fd < 0) {
        mylog(log_warn, "create udp_fd error\n");
        return -1;
    }
    setnonblocking(new_udp_fd);
    set_buf_size(new_udp_fd, socket_buf_size);

    mylog(log_debug, "created new udp_fd %d\n", new_udp_fd);
    int ret = connect(new_udp_fd, (struct sockaddr *)&inner, get_len());
    if (ret != 0) {
        mylog(log_warn, "udp fd connect fail %d %s\n", ret, strerror(errno));
        // sock_close(new_udp_fd);
        close(new_udp_fd);
        return -1;
    }

    return new_udp_fd;
}

bool my_ip_t::equal(const my_ip_t &b) const {
    // extern int raw_ip_version;
    if (raw_ip_version == AF_INET) {
        return v4 == b.v4;
    } else if (raw_ip_version == AF_INET6) {
        return memcmp(&v6, &b.v6, sizeof(v6)) == 0;
    }
    assert(0 == 1);
    return 0;
}
char *my_ip_t::get_str1() const {
    static char res[max_addr_len];
    if (raw_ip_version == AF_INET6) {
        assert(inet_ntop(AF_INET6, &v6, res, max_addr_len) != 0);
    } else {
        assert(raw_ip_version == AF_INET);
        assert(inet_ntop(AF_INET, &v4, res, max_addr_len) != 0);
    }
    return res;
}
char *my_ip_t::get_str2() const {
    static char res[max_addr_len];
    if (raw_ip_version == AF_INET6) {
        assert(inet_ntop(AF_INET6, &v6, res, max_addr_len) != 0);
    } else {
        assert(raw_ip_version == AF_INET);
        assert(inet_ntop(AF_INET, &v4, res, max_addr_len) != 0);
    }
    return res;
}

int my_ip_t::from_address_t(address_t tmp_addr) {
    if (tmp_addr.get_type() == raw_ip_version && raw_ip_version == AF_INET) {
        v4 = tmp_addr.inner.ipv4.sin_addr.s_addr;
    } else if (tmp_addr.get_type() == raw_ip_version && raw_ip_version == AF_INET6) {
        v6 = tmp_addr.inner.ipv6.sin6_addr;
    } else {
        assert(0 == 1);
    }
    return 0;
}
/*
int my_ip_t::from_str(char * str)
{
        u32_t type;
        if(strchr(str,':')==NULL)
                type=AF_INET;
        else
                type=AF_INET6;
        int ret;
        ret=inet_pton(type, str,this);
        if(ret==0)  // 0 if address type doesnt match
        {
                mylog(log_error,"confusion in parsing %s, %d\n",str,ret);
                myexit(-1);
        }
        else if(ret==1) // inet_pton returns 1 on success
        {
                //okay
        }
        else
        {
                mylog(log_error,"ip_addr %s is invalid, %d\n",str,ret);
                myexit(-1);
        }
        return 0;
}*/
#ifdef UDP2RAW_MP

int init_ws() {
#if defined(__MINGW32__)
    WORD wVersionRequested;
    WSADATA wsaData;
    int err;

    /* Use the MAKEWORD(lowbyte, highbyte) macro declared in Windef.h */
    wVersionRequested = MAKEWORD(2, 2);

    err = WSAStartup(wVersionRequested, &wsaData);
    if (err != 0) {
        /* Tell the user that we could not find a usable */
        /* Winsock DLL.                                  */
        printf("WSAStartup failed with error: %d\n", err);
        exit(-1);
    }

    /* Confirm that the WinSock DLL supports 2.2.*/
    /* Note that if the DLL supports versions greater    */
    /* than 2.2 in addition to 2.2, it will still return */
    /* 2.2 in wVersion since that is the version we      */
    /* requested.                                        */

    if (LOBYTE(wsaData.wVersion) != 2 || HIBYTE(wsaData.wVersion) != 2) {
        /* Tell the user that we could not find a usable */
        /* WinSock DLL.                                  */
        printf("Could not find a usable version of Winsock.dll\n");
        WSACleanup();
        exit(-1);
    } else {
        printf("The Winsock 2.2 dll was found okay");
    }

    int tmp[] = {0, 100, 200, 300, 500, 800, 1000, 2000, 3000, 4000, -1};
    int succ = 0;
    for (int i = 1; tmp[i] != -1; i++) {
        if (_setmaxstdio(100) == -1)
            break;
        else
            succ = i;
    }
    printf(", _setmaxstdio() was set to %d\n", tmp[succ]);
#endif
    return 0;
}

#endif

#if defined(__MINGW32__)
int inet_pton(int af, const char *src, void *dst) {
    struct sockaddr_storage ss;
    int size = sizeof(ss);
    char src_copy[max_addr_len + 1];

    ZeroMemory(&ss, sizeof(ss));
    /* stupid non-const API */
    strncpy(src_copy, src, max_addr_len + 1);
    src_copy[max_addr_len] = 0;

    if (WSAStringToAddress(src_copy, af, NULL, (struct sockaddr *)&ss, &size) == 0) {
        switch (af) {
            case AF_INET:
                *(struct in_addr *)dst = ((struct sockaddr_in *)&ss)->sin_addr;
                return 1;
            case AF_INET6:
                *(struct in6_addr *)dst = ((struct sockaddr_in6 *)&ss)->sin6_addr;
                return 1;
        }
    }
    return 0;
}

const char *inet_ntop(int af, const void *src, char *dst, socklen_t size) {
    struct sockaddr_storage ss;
    unsigned long s = size;

    ZeroMemory(&ss, sizeof(ss));
    ss.ss_family = af;

    switch (af) {
        case AF_INET:
            ((struct sockaddr_in *)&ss)->sin_addr = *(struct in_addr *)src;
            break;
        case AF_INET6:
            ((struct sockaddr_in6 *)&ss)->sin6_addr = *(struct in6_addr *)src;
            break;
        default:
            return NULL;
    }
    /* cannot direclty use &size because of strict aliasing rules */
    return (WSAAddressToString((struct sockaddr *)&ss, sizeof(ss), NULL, dst, &s) == 0) ? dst : NULL;
}
char *get_sock_error() {
    static char buf[1000];
    int e = WSAGetLastError();
    wchar_t *s = NULL;
    FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
                   NULL, e,
                   MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
                   (LPWSTR)&s, 0, NULL);
    sprintf(buf, "%d:%S", e, s);
    int len = strlen(buf);
    while (len > 0 && (buf[len - 1] == '\r' || buf[len - 1] == '\n')) {
        len--;
        buf[len] = 0;
    }
    LocalFree(s);
    return buf;
}
int get_sock_errno() {
    return WSAGetLastError();
}
#else
char *get_sock_error() {
    static char buf[1000];
    sprintf(buf, "%d:%s", errno, strerror(errno));
    return buf;
}
int get_sock_errno() {
    return errno;
}
#endif

u64_t get_current_time_us() {
    static u64_t value_fix = 0;
    static u64_t largest_value = 0;

    u64_t raw_value = (u64_t)(ev_time() * 1000 * 1000);

    u64_t fixed_value = raw_value + value_fix;

    if (fixed_value < largest_value) {
        value_fix += largest_value - fixed_value;
    } else {
        largest_value = fixed_value;
    }

    // printf("<%lld,%lld,%lld>\n",raw_value,value_fix,raw_value + value_fix);
    return raw_value + value_fix;  // new fixed value
}

u64_t get_current_time() {
    return get_current_time_us() / 1000;
}

u64_t pack_u64(u32_t a, u32_t b) {
    u64_t ret = a;
    ret <<= 32u;
    ret += b;
    return ret;
}
u32_t get_u64_h(u64_t a) {
    return a >> 32u;
}
u32_t get_u64_l(u64_t a) {
    return (a << 32u) >> 32u;
}

char *my_ntoa(u32_t ip) {
    in_addr a;
    a.s_addr = ip;
    return inet_ntoa(a);
}
/*
void init_random_number_fd()
{

        random_number_fd=open("/dev/urandom",O_RDONLY);

        if(random_number_fd==-1)
        {
                mylog(log_fatal,"error open /dev/urandom\n");
                myexit(-1);
        }
        setnonblocking(random_number_fd);
}*/

#if !defined(__MINGW32__)
struct random_fd_t {
    int random_number_fd;
    random_fd_t() {
        random_number_fd = open("/dev/urandom", O_RDONLY);

        if (random_number_fd == -1) {
            mylog(log_fatal, "error open /dev/urandom\n");
            myexit(-1);
        }
        setnonblocking(random_number_fd);
    }
    int get_fd() {
        return random_number_fd;
    }
} random_fd;
#else
struct my_random_t {
    std::random_device rd;
    std::mt19937 gen;
    std::uniform_int_distribution<u64_t> dis64;
    std::uniform_int_distribution<u32_t> dis32;

    std::uniform_int_distribution<unsigned char> dis8;

    my_random_t() {
        // std::mt19937 gen_tmp(rd());  //random device is broken on mingw
        timespec tmp_time;
        clock_gettime(CLOCK_MONOTONIC, &tmp_time);
        long long a = ((u64_t)tmp_time.tv_sec) * 1000000000llu + ((u64_t)tmp_time.tv_nsec);
        std::mt19937 gen_tmp(a);
        gen = gen_tmp;
        gen.discard(700000);  // magic
    }
    u64_t gen64() {
        return dis64(gen);
    }
    u32_t gen32() {
        return dis32(gen);
    }

    unsigned char gen8() {
        return dis8(gen);
    }
    /*int random_number_fd;
    random_fd_t()
    {
                    random_number_fd=open("/dev/urandom",O_RDONLY);
                    if(random_number_fd==-1)
                    {
                            mylog(log_fatal,"error open /dev/urandom\n");
                            myexit(-1);
                    }
                    setnonblocking(random_number_fd);
    }
    int get_fd()
    {
            return random_number_fd;
    }*/
} my_random;
#endif

u64_t get_true_random_number_64() {
#if !defined(__MINGW32__)
    u64_t ret;
    int size = read(random_fd.get_fd(), &ret, sizeof(ret));
    if (size != sizeof(ret)) {
        mylog(log_fatal, "get random number failed %d\n", size);
        myexit(-1);
    }
    return ret;
#else
    return my_random.gen64();  // fake random number
#endif
}
u32_t get_true_random_number() {
#if !defined(__MINGW32__)
    u32_t ret;
    int size = read(random_fd.get_fd(), &ret, sizeof(ret));
    if (size != sizeof(ret)) {
        mylog(log_fatal, "get random number failed %d\n", size);
        myexit(-1);
    }
    return ret;
#else
    return my_random.gen32();  // fake random number
#endif
}
u32_t get_true_random_number_nz()  // nz for non-zero
{
    u32_t ret = 0;
    while (ret == 0) {
        ret = get_true_random_number();
    }
    return ret;
}

inline int is_big_endian() {
    int i = 1;
    return !*((char *)&i);
}
u64_t ntoh64(u64_t a) {
#ifdef UDP2RAW_LITTLE_ENDIAN
    u32_t h = get_u64_h(a);
    u32_t l = get_u64_l(a);
    return pack_u64(ntohl(l), ntohl(h));
    // return bswap_64( a);
#else
    return a;
#endif
}
u64_t hton64(u64_t a) {
    return ntoh64(a);
}

void write_u16(char *p, u16_t w) {
    *(unsigned char *)(p + 1) = (w & 0xff);
    *(unsigned char *)(p + 0) = (w >> 8);
}
u16_t read_u16(char *p) {
    u16_t res;
    res = *(const unsigned char *)(p + 0);
    res = *(const unsigned char *)(p + 1) + (res << 8);
    return res;
}

void write_u32(char *p, u32_t l) {
    *(unsigned char *)(p + 3) = (unsigned char)((l >> 0) & 0xff);
    *(unsigned char *)(p + 2) = (unsigned char)((l >> 8) & 0xff);
    *(unsigned char *)(p + 1) = (unsigned char)((l >> 16) & 0xff);
    *(unsigned char *)(p + 0) = (unsigned char)((l >> 24) & 0xff);
}
u32_t read_u32(char *p) {
    u32_t res;
    res = *(const unsigned char *)(p + 0);
    res = *(const unsigned char *)(p + 1) + (res << 8);
    res = *(const unsigned char *)(p + 2) + (res << 8);
    res = *(const unsigned char *)(p + 3) + (res << 8);
    return res;
}

void write_u64(char *s, u64_t a) {
    assert(0 == 1);
}
u64_t read_u64(char *s) {
    assert(0 == 1);
    return 0;
}

void setnonblocking(int sock) {
#if !defined(__MINGW32__)
    int opts;
    opts = fcntl(sock, F_GETFL);

    if (opts < 0) {
        mylog(log_fatal, "fcntl(sock,GETFL)\n");
        // perror("fcntl(sock,GETFL)");
        myexit(1);
    }
    opts = opts | O_NONBLOCK;
    if (fcntl(sock, F_SETFL, opts) < 0) {
        mylog(log_fatal, "fcntl(sock,SETFL,opts)\n");
        // perror("fcntl(sock,SETFL,opts)");
        myexit(1);
    }
#else
    int iResult;
    u_long iMode = 1;
    iResult = ioctlsocket(sock, FIONBIO, &iMode);
    if (iResult != NO_ERROR)
        printf("ioctlsocket failed with error: %d\n", iResult);

#endif
}

/*
    Generic checksum calculation function
*/
unsigned short csum(const unsigned short *ptr, int nbytes) {  // works both for big and little endian
    long sum;
    unsigned short oddbyte;
    short answer;

    sum = 0;
    while (nbytes > 1) {
        sum += *ptr++;
        nbytes -= 2;
    }
    if (nbytes == 1) {
        oddbyte = 0;
        *((u_char *)&oddbyte) = *(u_char *)ptr;
        sum += oddbyte;
    }

    sum = (sum >> 16) + (sum & 0xffff);
    sum = sum + (sum >> 16);
    answer = (short)~sum;

    return (answer);
}

unsigned short csum_with_header(char *header, int hlen, const unsigned short *ptr, int nbytes) {  // works both for big and little endian

    long sum;
    unsigned short oddbyte;
    short answer;

    assert(hlen % 2 == 0);

    sum = 0;
    unsigned short *tmp = (unsigned short *)header;
    for (int i = 0; i < hlen / 2; i++) {
        sum += *tmp++;
    }

    while (nbytes > 1) {
        sum += *ptr++;
        nbytes -= 2;
    }
    if (nbytes == 1) {
        oddbyte = 0;
        *((u_char *)&oddbyte) = *(u_char *)ptr;
        sum += oddbyte;
    }

    sum = (sum >> 16) + (sum & 0xffff);
    sum = sum + (sum >> 16);
    answer = (short)~sum;

    return (answer);
}

int set_buf_size(int fd, int socket_buf_size) {
    if (force_socket_buf) {
        if (is_udp2raw_mp) {
            mylog(log_fatal, "force_socket_buf not supported in this verion\n");
            myexit(-1);
        }
        // assert(0==1);
#ifdef UDP2RAW_LINUX
        if (setsockopt(fd, SOL_SOCKET, SO_SNDBUFFORCE, &socket_buf_size, sizeof(socket_buf_size)) < 0) {
            mylog(log_fatal, "SO_SNDBUFFORCE fail  socket_buf_size=%d  errno=%s\n", socket_buf_size, strerror(errno));
            myexit(1);
        }
        if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &socket_buf_size, sizeof(socket_buf_size)) < 0) {
            mylog(log_fatal, "SO_RCVBUFFORCE fail  socket_buf_size=%d  errno=%s\n", socket_buf_size, strerror(errno));
            myexit(1);
        }
#endif

    } else {
        if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &socket_buf_size, sizeof(socket_buf_size)) < 0) {
            mylog(log_fatal, "SO_SNDBUF fail  socket_buf_size=%d  errno=%s\n", socket_buf_size, get_sock_error());
            myexit(1);
        }
        if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &socket_buf_size, sizeof(socket_buf_size)) < 0) {
            mylog(log_fatal, "SO_RCVBUF fail  socket_buf_size=%d  errno=%s\n", socket_buf_size, get_sock_error());
            myexit(1);
        }
    }
    return 0;
}

int numbers_to_char(my_id_t id1, my_id_t id2, my_id_t id3, char *&data, int &len) {
    static char buf[buf_len];
    data = buf;
    my_id_t tmp = htonl(id1);
    memcpy(buf, &tmp, sizeof(tmp));

    tmp = htonl(id2);
    memcpy(buf + sizeof(tmp), &tmp, sizeof(tmp));

    tmp = htonl(id3);
    memcpy(buf + sizeof(tmp) * 2, &tmp, sizeof(tmp));

    len = sizeof(my_id_t) * 3;
    return 0;
}

int char_to_numbers(const char *data, int len, my_id_t &id1, my_id_t &id2, my_id_t &id3) {
    if (len < int(sizeof(my_id_t) * 3)) return -1;
    // id1=ntohl(  *((id_t*)(data+0)) );
    memcpy(&id1, data + 0, sizeof(id1));
    id1 = ntohl(id1);
    // id2=ntohl(  *((id_t*)(data+sizeof(id_t))) );
    memcpy(&id2, data + sizeof(my_id_t), sizeof(id2));
    id2 = ntohl(id2);
    // id3=ntohl(  *((id_t*)(data+sizeof(id_t)*2)) );
    memcpy(&id3, data + sizeof(my_id_t) * 2, sizeof(id3));
    id3 = ntohl(id3);
    return 0;
}
int hex_to_u32(const string &a, u32_t &output) {
    // string b="0x";
    // b+=a;
    if (sscanf(a.c_str(), "%x", &output) == 1) {
        // printf("%s %x\n",a.c_str(),output);
        return 0;
    }
    mylog(log_error, "<%s> doesnt contain a hex\n", a.c_str());
    return -1;
}
int hex_to_u32_with_endian(const string &a, u32_t &output) {
    // string b="0x";
    // b+=a;
    if (sscanf(a.c_str(), "%x", &output) == 1) {
        output = htonl(output);
        // printf("%s %x\n",a.c_str(),output);
        return 0;
    }
    mylog(log_error, "<%s> doesnt contain a hex\n", a.c_str());
    return -1;
}
bool larger_than_u32(u32_t a, u32_t b) {
    return ((i32_t(a - b)) > 0);
    /*
            u32_t smaller,bigger;
            smaller=min(a,b);//smaller in normal sense
            bigger=max(a,b);
            u32_t distance=min(bigger-smaller,smaller+(0xffffffff-bigger+1));
            if(distance==bigger-smaller)
            {
                    if(bigger==a)
                    {
                            return 1;
                    }
                    else
                    {
                            return 0;
                    }
            }
            else
            {
                    if(smaller==b)
                    {
                            return 0;
                    }
                    else
                    {
                            return 1;
                    }
            }
    */
}

bool larger_than_u16(uint16_t a, uint16_t b) {
    return ((i16_t(a - b)) > 0);
    /*
            uint16_t smaller,bigger;
            smaller=min(a,b);//smaller in normal sense
            bigger=max(a,b);
            uint16_t distance=min(bigger-smaller,smaller+(0xffff-bigger+1));
            if(distance==bigger-smaller)
            {
                    if(bigger==a)
                    {
                            return 1;
                    }
                    else
                    {
                            return 0;
                    }
            }
            else
            {
                    if(smaller==b)
                    {
                            return 0;
                    }
                    else
                    {
                            return 1;
                    }
            }*/
}

void myexit(int a) {
    if (enable_log_color)
        printf("%s\n", RESET);
#ifdef UDP2RAW_LINUX
    if (keep_thread_running) {
        if (pthread_cancel(keep_thread)) {
            mylog(log_warn, "pthread_cancel failed\n");
        } else {
            mylog(log_info, "pthread_cancel success\n");
        }
    }
    clear_iptables_rule();
#endif
    exit(a);
}

vector<string> string_to_vec(const char *s, const char *sp) {
    vector<string> res;
    string str = s;
    char *p = strtok((char *)str.c_str(), sp);
    while (p != NULL) {
        res.push_back(p);
        // printf ("%s\n",p);
        p = strtok(NULL, sp);
    }

    /* for(int i=0;i<(int)res.size();i++)
     {
             printf("<<%s>>\n",res[i].c_str());
     }*/
    return res;
}

vector<vector<string> > string_to_vec2(const char *s) {
    vector<vector<string> > res;
    vector<string> lines = string_to_vec(s, "\n");
    for (int i = 0; i < int(lines.size()); i++) {
        vector<string> tmp;
        tmp = string_to_vec(lines[i].c_str(), "\t ");
        res.push_back(tmp);
    }
    return res;
}
int read_file(const char *file, string &output) {
    const int max_len = 3 * 1024 * 1024;
    // static char buf[max_len+100];
    string buf0;
    buf0.reserve(max_len + 200);
    char *buf = (char *)buf0.c_str();
    buf[max_len] = 0;
    // buf[sizeof(buf)-1]=0;
    int fd = open(file, O_RDONLY);
    if (fd == -1) {
        mylog(log_error, "read_file %s fail\n", file);
        return -1;
    }
    int len = read(fd, buf, max_len);
    if (len == max_len) {
        buf[0] = 0;
        mylog(log_error, "%s too long,buf not large enough\n", file);
        return -2;
    } else if (len < 0) {
        buf[0] = 0;
        mylog(log_error, "%s read fail %d\n", file, len);
        return -3;
    } else {
        buf[len] = 0;
        output = buf;
    }
    return 0;
}
int run_command(string command0, char *&output, int flag) {
    if (is_udp2raw_mp) {
        mylog(log_fatal, "run_command not supported in this version\n");
        myexit(-1);
    }
#ifdef UDP2RAW_LINUX
    FILE *in;

    if ((flag & show_log) == 0) command0 += " 2>&1 ";

    const char *command = command0.c_str();

    int level = (flag & show_log) ? log_warn : log_debug;

    if (flag & show_command) {
        mylog(log_info, "run_command %s\n", command);
    } else {
        mylog(log_debug, "run_command %s\n", command);
    }
    static __thread char buf[1024 * 1024 + 100];
    buf[sizeof(buf) - 1] = 0;
    if (!(in = popen(command, "r"))) {
        mylog(level, "command %s popen failed,errno %s\n", command, strerror(errno));
        return -1;
    }

    int len = fread(buf, 1024 * 1024, 1, in);
    if (len == 1024 * 1024) {
        buf[0] = 0;
        mylog(level, "too long,buf not larger enough\n");
        return -2;
    } else {
        buf[len] = 0;
    }
    int ret;
    if ((ret = ferror(in))) {
        mylog(level, "command %s fread failed,ferror return value %d \n", command, ret);
        return -3;
    }
    // if(output!=0)
    output = buf;
    ret = pclose(in);

    int ret2 = WEXITSTATUS(ret);

    if (ret != 0 || ret2 != 0) {
        mylog(level, "commnad %s ,pclose returned %d ,WEXITSTATUS %d,errnor :%s \n", command, ret, ret2, strerror(errno));
        return -4;
    }

#endif
    return 0;
}
/*
int run_command_no_log(string command0,char * &output) {
    FILE *in;
    command0+=" 2>&1 ";
    const char * command=command0.c_str();
    mylog(log_debug,"run_command_no_log %s\n",command);
    static char buf[1024*1024+100];
    buf[sizeof(buf)-1]=0;
    if(!(in = popen(command, "r"))){
        mylog(log_debug,"command %s popen failed,errno %s\n",command,strerror(errno));
        return -1;
    }

    int len =fread(buf, 1024*1024, 1, in);
    if(len==1024*1024)
    {
        buf[0]=0;
        mylog(log_debug,"too long,buf not larger enough\n");
        return -2;
    }
    else
    {
        buf[len]=0;
    }
    int ret;
    if(( ret=ferror(in) ))
    {
        mylog(log_debug,"command %s fread failed,ferror return value %d \n",command,ret);
        return -3;
    }
    //if(output!=0)
    output=buf;
    ret= pclose(in);

    int ret2=WEXITSTATUS(ret);

    if(ret!=0||ret2!=0)
    {
        mylog(log_debug,"commnad %s ,pclose returned %d ,WEXITSTATUS %d,errnor :%s \n",command,ret,ret2,strerror(errno));
        return -4;
    }

    return 0;

}*/

// Remove preceding and trailing characters
string trim(const string &str, char c) {
    size_t first = str.find_first_not_of(c);
    if (string::npos == first) {
        return "";
    }
    size_t last = str.find_last_not_of(c);
    return str.substr(first, (last - first + 1));
}

vector<string> parse_conf_line(const string &s0) {
    string s = s0;
    s.reserve(s.length() + 200);
    char *buf = (char *)s.c_str();
    // char buf[s.length()+200];
    char *p = buf;
    int i = int(s.length()) - 1;
    int j;
    vector<string> res;
    // strcpy(buf,(char *)s.c_str());
    while (i >= 0) {
        if (buf[i] == ' ' || buf[i] == '\t')
            buf[i] = 0;
        else
            break;
        i--;
    }
    while (*p != 0) {
        if (*p == ' ' || *p == '\t') {
            p++;
        } else
            break;
    }
    int new_len = strlen(p);
    if (new_len == 0) return res;
    if (p[0] == '#') return res;
    if (p[0] != '-') {
        mylog(log_fatal, "line :<%s> not begin with '-' ", s.c_str());
        myexit(-1);
    }

    for (i = 0; i < new_len; i++) {
        if (p[i] == ' ' || p[i] == '\t') {
            break;
        }
    }
    if (i == new_len) {
        res.push_back(p);
        return res;
    }

    j = i;
    while (p[j] == ' ' || p[j] == '\t')
        j++;
    p[i] = 0;
    res.push_back(p);
    res.push_back(p + j);
    return res;
}

int create_fifo(char *file) {
#if !defined(__MINGW32__)
    if (mkfifo(file, 0666) != 0) {
        if (errno == EEXIST) {
            mylog(log_warn, "warning fifo file %s exist\n", file);
        } else {
            mylog(log_fatal, "create fifo file %s failed\n", file);
            myexit(-1);
        }
    }
    int fifo_fd = open(file, O_RDWR);
    if (fifo_fd < 0) {
        mylog(log_fatal, "create fifo file %s failed\n", file);
        myexit(-1);
    }
    struct stat st;
    if (fstat(fifo_fd, &st) != 0) {
        mylog(log_fatal, "fstat failed for fifo file %s\n", file);
        myexit(-1);
    }

    if (!S_ISFIFO(st.st_mode)) {
        mylog(log_fatal, "%s is not a fifo\n", file);
        myexit(-1);
    }

    setnonblocking(fifo_fd);
    return fifo_fd;
#else
    mylog(log_fatal, "--fifo not supported in this version\n");
    myexit(-1);
    return 0;
#endif
}

/*
void ip_port_t::from_u64(u64_t u64)
{
        ip=get_u64_h(u64);
        port=get_u64_l(u64);
}
u64_t ip_port_t::to_u64()
{
        return pack_u64(ip,port);
}
char * ip_port_t::to_s()
{
        static char res[40];
        sprintf(res,"%s:%d",my_ntoa(ip),port);
        return res;
}*/

void print_binary_chars(const char *a, int len) {
    for (int i = 0; i < len; i++) {
        unsigned char b = a[i];
        log_bare(log_debug, "<%02x>", (int)b);
    }
    log_bare(log_debug, "\n");
}

u32_t djb2(unsigned char *str, int len) {
    u32_t hash = 5381;
    int c;
    for (int i=0; i<len ;i++) {
        c = *(str++);
        hash = ((hash << 5) + hash) ^ c; /* (hash * 33) ^ c */
    }

    hash = htonl(hash);
    return hash;
}

u32_t sdbm(unsigned char *str, int len) {
    u32_t hash = 0;
    int c;
    for (int i=0; i<len ;i++) {
        c = *(str++);
        hash = c + (hash << 6) + (hash << 16) - hash;
    }
    // hash=htonl(hash);
    return hash;
}


================================================
FILE: common.h
================================================
/*
 * common.h
 *
 *  Created on: Jul 29, 2017
 *      Author: wangyu
 */

#ifndef UDP2RAW_COMMON_H_
#define UDP2RAW_COMMON_H_
#define __STDC_FORMAT_MACROS 1
#include <inttypes.h>

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <getopt.h>

#include <unistd.h>
#include <errno.h>
#include <sys/stat.h>
#include <stdlib.h>  //for exit(0);
#include <errno.h>   //For errno - the error number
#include <fcntl.h>
#include <sys/time.h>
#include <time.h>
#include <stdarg.h>
#include <assert.h>
#include <pthread.h>

#ifndef USE_LIBNET
#define NO_LIBNET
#endif

#if defined(UDP2RAW_MP)
const int is_udp2raw_mp = 1;
#if !defined(__CYGWIN__) && !defined(__MINGW32__)
#include <pcap.h>
#else
#include <pcap_wrapper.h>
#define NO_LIBNET
#endif

#ifndef NO_LIBNET
#include <libnet.h>
#endif

#else
#define UDP2RAW_LINUX
const int is_udp2raw_mp = 0;
//#include <linux/if_ether.h>
#include <linux/filter.h>
#include <linux/if_packet.h>
#include <sys/epoll.h>
//#include <sys/wait.h> //signal
#include <netinet/if_ether.h>
#include <net/if.h>
#include <sys/timerfd.h>

#endif

#if !defined(NO_LIBEV_EMBED)
#include <my_ev.h>
#else
#include "ev.h"
#endif

#if defined(__MINGW32__)
#include <winsock2.h>
#include <ws2ipdef.h>
typedef unsigned char u_int8_t;
typedef unsigned short u_int16_t;
typedef unsigned int u_int32_t;
typedef int socklen_t;
#else
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#endif

#include <unordered_map>
#include <fstream>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <list>
using namespace std;

#if defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN ||             \
    defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ || \
    defined(__BIG_ENDIAN__) ||                                           \
    defined(__ARMEB__) ||                                                \
    defined(__THUMBEB__) ||                                              \
    defined(__AARCH64EB__) ||                                            \
    defined(_MIBSEB) || defined(__MIBSEB) || defined(__MIBSEB__)
#define UDP2RAW_BIG_ENDIAN 1
#endif

#if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN ||             \
    defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || \
    defined(__LITTLE_ENDIAN__) ||                                           \
    defined(__ARMEL__) ||                                                   \
    defined(__THUMBEL__) ||                                                 \
    defined(__AARCH64EL__) ||                                               \
    defined(_MIPSEL) || defined(__MIPSEL) || defined(__MIPSEL__)
#define UDP2RAW_LITTLE_ENDIAN 1
#endif

#if defined(UDP2RAW_BIG_ENDIAN) && defined(UDP2RAW_LITTLE_ENDIAN)
#error "endian detection conflicts"
#endif

#if !defined(UDP2RAW_BIG_ENDIAN) && !defined(UDP2RAW_LITTLE_ENDIAN)
#error "endian detection failed"
#endif

#if defined(__MINGW32__)
int inet_pton(int af, const char *src, void *dst);
const char *inet_ntop(int af, const void *src, char *dst, socklen_t size);
#define setsockopt(a, b, c, d, e) setsockopt(a, b, c, (const char *)(d), e)
#endif

char *get_sock_error();
int get_sock_errno();

#if defined(__MINGW32__)
typedef SOCKET my_fd_t;
inline int sock_close(my_fd_t fd) {
    return closesocket(fd);
}
#else
typedef int my_fd_t;
inline int sock_close(my_fd_t fd) {
    return close(fd);
}

#endif

typedef unsigned long long u64_t;  // this works on most platform,avoid using the PRId64
typedef long long i64_t;

typedef unsigned int u32_t;
typedef int i32_t;

typedef unsigned short u16_t;
typedef short i16_t;

typedef u32_t my_id_t;

typedef u64_t iv_t;

typedef u64_t padding_t;

typedef u64_t anti_replay_seq_t;

typedef u64_t my_time_t;

const int max_addr_len = 100;

extern int force_socket_buf;

extern int g_fix_gro;

/*
struct ip_port_t
{
        u32_t ip;
        int port;
        void from_u64(u64_t u64);
        u64_t to_u64();
        char * to_s();
};*/

typedef u64_t fd64_t;

u32_t djb2(unsigned char *str, int len);
u32_t sdbm(unsigned char *str, int len);

struct address_t  // TODO scope id
{
    struct hash_function {
        u32_t operator()(const address_t &key) const {
            return sdbm((unsigned char *)&key.inner, sizeof(key.inner));
        }
    };

    union storage_t  // sockaddr_storage is too huge, we dont use it.
    {
        sockaddr_in ipv4;
        sockaddr_in6 ipv6;
    };
    storage_t inner;

    address_t() {
        clear();
    }
    void clear() {
        memset(&inner, 0, sizeof(inner));
    }
    int from_ip_port(u32_t ip, int port) {
        clear();
        inner.ipv4.sin_family = AF_INET;
        inner.ipv4.sin_port = htons(port);
        inner.ipv4.sin_addr.s_addr = ip;
        return 0;
    }

    int from_ip_port_new(int type, void *ip, int port) {
        clear();
        if (type == AF_INET) {
            inner.ipv4.sin_family = AF_INET;
            inner.ipv4.sin_port = htons(port);
            inner.ipv4.sin_addr.s_addr = *((u32_t *)ip);
        } else if (type == AF_INET6) {
            inner.ipv6.sin6_family = AF_INET6;
            inner.ipv6.sin6_port = htons(port);
            inner.ipv6.sin6_addr = *((in6_addr *)ip);
        }
        return 0;
    }

    int from_str(char *str);

    int from_str_ip_only(char *str);

    int from_sockaddr(sockaddr *, socklen_t);

    char *get_str();
    void to_str(char *);

    inline u32_t get_type() {
        u32_t ret = ((sockaddr *)&inner)->sa_family;
        assert(ret == AF_INET || ret == AF_INET6);
        return ret;
    }

    inline u32_t get_len() {
        u32_t type = get_type();
        switch (type) {
            case AF_INET:
                return sizeof(sockaddr_in);
            case AF_INET6:
                return sizeof(sockaddr_in6);
            default:
                assert(0 == 1);
        }
        return -1;
    }

    inline u32_t get_port() {
        u32_t type = get_type();
        switch (type) {
            case AF_INET:
                return ntohs(inner.ipv4.sin_port);
            case AF_INET6:
                return ntohs(inner.ipv6.sin6_port);
            default:
                assert(0 == 1);
        }
        return -1;
    }

    inline void set_port(int port) {
        u32_t type = get_type();
        switch (type) {
            case AF_INET:
                inner.ipv4.sin_port = htons(port);
                break;
            case AF_INET6:
                inner.ipv6.sin6_port = htons(port);
                break;
            default:
                assert(0 == 1);
        }
        return;
    }

    bool operator==(const address_t &b) const {
        // return this->data==b.data;
        return memcmp(&this->inner, &b.inner, sizeof(this->inner)) == 0;
    }

    int new_connected_udp_fd();

    char *get_ip();
};

namespace std {
template <>
struct hash<address_t> {
    std::size_t operator()(const address_t &key) const {
        // return address_t::hash_function(k);
        return sdbm((unsigned char *)&key.inner, sizeof(key.inner));
    }
};
}  // namespace std

union my_ip_t  // just a simple version of address_t,stores ip only
{
    u32_t v4;
    in6_addr v6;

    bool equal(const my_ip_t &b) const;

    // int from_str(char * str);
    char *get_str1() const;
    char *get_str2() const;

    int from_address_t(address_t a);
};

struct not_copy_able_t {
    not_copy_able_t() {
    }
    not_copy_able_t(const not_copy_able_t &other) {
        assert(0 == 1);
    }
    const not_copy_able_t &operator=(const not_copy_able_t &other) {
        assert(0 == 1);
        return other;
    }
};

const int huge_data_len = 65535 + 100;  // a packet with link level header might be larger than 65535
const int huge_buf_len = huge_data_len + 100;

const int max_data_len = 1800;
const int buf_len = max_data_len + 400;

// const int max_address_len=512;

#ifdef UDP2RAW_MP
const int queue_len = 200;

struct queue_t {
    char data[queue_len][huge_buf_len];
    int data_len[queue_len];

    int head = 0;
    int tail = 0;
    void clear() {
        head = tail = 0;
    }
    int empty() {
        if (head == tail)
            return 1;
        else
            return 0;
    }
    int full() {
        if ((tail + 1) % queue_len == head)
            return 1;
        else
            return 0;
    }
    void peek_front(char *&p, int &len) {
        assert(!empty());
        p = data[head];
        len = data_len[head];
    }
    void pop_front() {
        assert(!empty());
        head++;
        head %= queue_len;
    }
    void push_back(char *p, int len) {
        assert(!full());
        memcpy(data[tail], p, len);
        data_len[tail] = len;
        tail++;
        tail %= queue_len;
    }
};

int init_ws();
#endif
u64_t get_current_time();
u64_t pack_u64(u32_t a, u32_t b);

u32_t get_u64_h(u64_t a);

u32_t get_u64_l(u64_t a);

char *my_ntoa(u32_t ip);

void init_random_number_fd();
u64_t get_true_random_number_64();
u32_t get_true_random_number();
u32_t get_true_random_number_nz();
u64_t ntoh64(u64_t a);
u64_t hton64(u64_t a);

void write_u16(char *, u16_t a);  // network order
u16_t read_u16(char *);
void write_u32(char *, u32_t a);  // network order
u32_t read_u32(char *);
void write_u64(char *, u64_t a);
u64_t read_u64(char *);

bool larger_than_u16(uint16_t a, uint16_t b);
bool larger_than_u32(u32_t a, u32_t b);
void setnonblocking(int sock);
int set_buf_size(int fd, int socket_buf_size);

void myexit(int a);

unsigned short csum(const unsigned short *ptr, int nbytes);
unsigned short csum_with_header(char *header, int hlen, const unsigned short *ptr, int nbytes);

int numbers_to_char(my_id_t id1, my_id_t id2, my_id_t id3, char *&data, int &len);
int char_to_numbers(const char *data, int len, my_id_t &id1, my_id_t &id2, my_id_t &id3);

const int show_none = 0;
const int show_command = 0x1;
const int show_log = 0x2;
const int show_all = show_command | show_log;

int run_command(string command, char *&output, int flag = show_all);
// int run_command_no_log(string command,char * &output);
int read_file(const char *file, string &output);

vector<string> string_to_vec(const char *s, const char *sp);
vector<vector<string> > string_to_vec2(const char *s);

string trim(const string &str, char c);

string trim_conf_line(const string &str);

vector<string> parse_conf_line(const string &s);

int hex_to_u32_with_endian(const string &a, u32_t &output);
int hex_to_u32(const string &a, u32_t &output);
// extern string iptables_pattern;

int create_fifo(char *file);

void print_binary_chars(const char *a, int len);

template <class key_t>
struct lru_collector_t : not_copy_able_t {
    // typedef void* key_t;
    //#define key_t void*
    struct lru_pair_t {
        key_t key;
        my_time_t ts;
    };

    unordered_map<key_t, typename list<lru_pair_t>::iterator> mp;

    list<lru_pair_t> q;
    int update(key_t key) {
        assert(mp.find(key) != mp.end());
        auto it = mp[key];
        q.erase(it);

        my_time_t value = get_current_time();
        if (!q.empty()) {
            assert(value >= q.front().ts);
        }
        lru_pair_t tmp;
        tmp.key = key;
        tmp.ts = value;
        q.push_front(tmp);
        mp[key] = q.begin();

        return 0;
    }
    int new_key(key_t key) {
        assert(mp.find(key) == mp.end());

        my_time_t value = get_current_time();
        if (!q.empty()) {
            assert(value >= q.front().ts);
        }
        lru_pair_t tmp;
        tmp.key = key;
        tmp.ts = value;
        q.push_front(tmp);
        mp[key] = q.begin();

        return 0;
    }
    int size() {
        return q.size();
    }
    int empty() {
        return q.empty();
    }
    void clear() {
        mp.clear();
        q.clear();
    }
    my_time_t ts_of(key_t key) {
        assert(mp.find(key) != mp.end());
        return mp[key]->ts;
    }

    my_time_t peek_back(key_t &key) {
        assert(!q.empty());
        auto it = q.end();
        it--;
        key = it->key;
        return it->ts;
    }
    void erase(key_t key) {
        assert(mp.find(key) != mp.end());
        q.erase(mp[key]);
        mp.erase(key);
    }
    /*
    void erase_back()
    {
            assert(!q.empty());
            auto it=q.end(); it--;
            key_t key=it->key;
            erase(key);
    }*/
};

#endif /* COMMON_H_ */


================================================
FILE: connection.cpp
================================================
/*
 * connection.cpp
 *
 *  Created on: Sep 23, 2017
 *      Author: root
 */

#include "connection.h"
#include "encrypt.h"
#include "fd_manager.h"

int disable_anti_replay = 0;  // if anti_replay windows is diabled

const int disable_conn_clear = 0;  // a raw connection is called conn.

conn_manager_t conn_manager;

anti_replay_seq_t anti_replay_t::get_new_seq_for_send() {
    return anti_replay_seq++;
}
anti_replay_t::anti_replay_t() {
    max_packet_received = 0;
    anti_replay_seq = get_true_random_number_64() / 10;  // random first seq
    // memset(window,0,sizeof(window)); //not necessary
}
void anti_replay_t::re_init() {
    max_packet_received = 0;
    // memset(window,0,sizeof(window));
}

int anti_replay_t::is_vaild(u64_t seq) {
    if (disable_anti_replay) return 1;
    // if(disabled) return 0;

    if (seq == max_packet_received)
        return 0;
    else if (seq > max_packet_received) {
        if (seq - max_packet_received >= anti_replay_window_size) {
            memset(window, 0, sizeof(window));
            window[seq % anti_replay_window_size] = 1;
        } else {
            for (u64_t i = max_packet_received + 1; i < seq; i++)
                window[i % anti_replay_window_size] = 0;
            window[seq % anti_replay_window_size] = 1;
        }
        max_packet_received = seq;
        return 1;
    } else if (seq < max_packet_received) {
        if (max_packet_received - seq >= anti_replay_window_size)
            return 0;
        else {
            if (window[seq % anti_replay_window_size] == 1)
                return 0;
            else {
                window[seq % anti_replay_window_size] = 1;
                return 1;
            }
        }
    }

    return 0;  // for complier check
}

void conn_info_t::recover(const conn_info_t &conn_info) {
    raw_info = conn_info.raw_info;

    raw_info.rst_received = 0;
    raw_info.disabled = 0;

    last_state_time = conn_info.last_state_time;
    last_hb_recv_time = conn_info.last_hb_recv_time;
    last_hb_sent_time = conn_info.last_hb_sent_time;
    my_id = conn_info.my_id;
    oppsite_id = conn_info.oppsite_id;
    blob->anti_replay.re_init();

    my_roller = 0;       // no need to set,but for easier debug,set it to zero
    oppsite_roller = 0;  // same as above
    last_oppsite_roller_time = 0;
}

void conn_info_t::re_init() {
    // send_packet_info.protocol=g_packet_info_send.protocol;
    if (program_mode == server_mode)
        state.server_current_state = server_idle;
    else
        state.client_current_state = client_idle;
    last_state_time = 0;
    oppsite_const_id = 0;

    timer_fd64 = 0;

    my_roller = 0;
    oppsite_roller = 0;
    last_oppsite_roller_time = 0;
}
conn_info_t::conn_info_t() {
    blob = 0;
    re_init();
}
void conn_info_t::prepare() {
    assert(blob == 0);
    blob = new blob_t;
    if (program_mode == server_mode) {
        blob->conv_manager.s.additional_clear_function = server_clear_function;
    } else {
        assert(program_mode == client_mode);
    }
}

conn_info_t::conn_info_t(const conn_info_t &b) {
    assert(0 == 1);
    // mylog(log_error,"called!!!!!!!!!!!!!\n");
}

conn_info_t &conn_info_t::operator=(const conn_info_t &b) {
    mylog(log_fatal, "not allowed\n");
    myexit(-1);
    return *this;
}
conn_info_t::~conn_info_t() {
    if (program_mode == server_mode) {
        if (state.server_current_state == server_ready) {
            assert(blob != 0);
            assert(oppsite_const_id != 0);
            // assert(conn_manager.const_id_mp.find(oppsite_const_id)!=conn_manager.const_id_mp.end()); // conn_manager 's deconstuction function  erases it
        } else {
            assert(blob == 0);
            assert(oppsite_const_id == 0);
        }
    }
    assert(timer_fd64 == 0);
    // if(oppsite_const_id!=0)     //do this at conn_manager 's deconstuction function
    // conn_manager.const_id_mp.erase(oppsite_const_id);
    if (blob != 0)
        delete blob;

    // send_packet_info.protocol=g_packet_info_send.protocol;
}

conn_manager_t::conn_manager_t() {
    ready_num = 0;
    mp.reserve(10007);
    // clear_it=mp.begin();
    // timer_fd_mp.reserve(10007);
    const_id_mp.reserve(10007);
    // udp_fd_mp.reserve(100007);
    last_clear_time = 0;
    // current_ready_ip=0;
    // current_ready_port=0;
}
int conn_manager_t::exist(address_t addr) {
    // u64_t u64=0;
    // u64=ip;
    // u64<<=32u;
    // u64|=port;
    if (mp.find(addr) != mp.end()) {
        return 1;
    }
    return 0;
}
/*
int insert(uint32_t ip,uint16_t port)
{
        uint64_t u64=0;
        u64=ip;
        u64<<=32u;
        u64|=port;
        mp[u64];
        return 0;
}*/
conn_info_t *&conn_manager_t::find_insert_p(address_t addr)  // be aware,the adress may change after rehash
{
    // u64_t u64=0;
    // u64=ip;
    // u64<<=32u;
    // u64|=port;
    unordered_map<address_t, conn_info_t *>::iterator it = mp.find(addr);
    if (it == mp.end()) {
        mp[addr] = new conn_info_t;
        // lru.new_key(addr);
    } else {
        // lru.update(addr);
    }
    return mp[addr];
}
conn_info_t &conn_manager_t::find_insert(address_t addr)  // be aware,the adress may change after rehash
{
    // u64_t u64=0;
    // u64=ip;
    // u64<<=32u;
    // u64|=port;
    unordered_map<address_t, conn_info_t *>::iterator it = mp.find(addr);
    if (it == mp.end()) {
        mp[addr] = new conn_info_t;
        // lru.new_key(addr);
    } else {
        // lru.update(addr);
    }
    return *mp[addr];
}
int conn_manager_t::erase(unordered_map<address_t, conn_info_t *>::iterator erase_it) {
    if (erase_it->second->state.server_current_state == server_ready) {
        ready_num--;
        assert(i32_t(ready_num) != -1);
        assert(erase_it->second != 0);

        assert(erase_it->second->timer_fd64 != 0);

        assert(fd_manager.exist(erase_it->second->timer_fd64));

        assert(erase_it->second->oppsite_const_id != 0);
        assert(const_id_mp.find(erase_it->second->oppsite_const_id) != const_id_mp.end());

        // assert(timer_fd_mp.find(erase_it->second->timer_fd)!=timer_fd_mp.end());

        const_id_mp.erase(erase_it->second->oppsite_const_id);

        fd_manager.fd64_close(erase_it->second->timer_fd64);

        erase_it->second->timer_fd64 = 0;
        // timer_fd_mp.erase(erase_it->second->timer_fd);
        // close(erase_it->second->timer_fd);// close will auto delte it from epoll
        delete (erase_it->second);
        mp.erase(erase_it->first);
    } else {
        assert(erase_it->second->blob == 0);
        assert(erase_it->second->timer_fd64 == 0);

        assert(erase_it->second->oppsite_const_id == 0);
        delete (erase_it->second);
        mp.erase(erase_it->first);
    }
    return 0;
}
int conn_manager_t::clear_inactive() {
    if (get_current_time() - last_clear_time > conn_clear_interval) {
        last_clear_time = get_current_time();
        return clear_inactive0();
    }
    return 0;
}
int conn_manager_t::clear_inactive0() {
    unordered_map<address_t, conn_info_t *>::iterator it;
    unordered_map<address_t, conn_info_t *>::iterator old_it;

    if (disable_conn_clear) return 0;

    // map<uint32_t,uint64_t>::iterator it;
    int cnt = 0;
    it = clear_it;
    int size = mp.size();
    int num_to_clean = size / conn_clear_ratio + conn_clear_min;  // clear 1/10 each time,to avoid latency glitch

    mylog(log_trace, "mp.size() %d\n", size);

    num_to_clean = min(num_to_clean, (int)mp.size());
    u64_t current_time = get_current_time();

    for (;;) {
        if (cnt >= num_to_clean) break;
        if (mp.begin() == mp.end()) break;

        if (it == mp.end()) {
            it = mp.begin();
        }

        if (it->second->state.server_current_state == server_ready && current_time - it->second->last_hb_recv_time <= server_conn_timeout) {
            it++;
        } else if (it->second->state.server_current_state != server_ready && current_time - it->second->last_state_time <= server_handshake_timeout) {
            it++;
        } else if (it->second->blob != 0 && it->second->blob->conv_manager.s.get_size() > 0) {
            assert(it->second->state.server_current_state == server_ready);
            it++;
        } else {
            mylog(log_info, "[%s:%d]inactive conn cleared \n", it->second->raw_info.recv_info.new_src_ip.get_str1(), it->second->raw_info.recv_info.src_port);
            old_it = it;
            it++;
            erase(old_it);
        }
        cnt++;
    }
    clear_it = it;

    return 0;
}

int send_bare(raw_info_t &raw_info, const char *data, int len)  // send function with encryption but no anti replay,this is used when client and server verifys each other
// you have to design the protocol carefully, so that you wont be affect by relay attack
{
    if (len < 0) {
        mylog(log_debug, "input_len <0\n");
        return -1;
    }
    packet_info_t &send_info = raw_info.send_info;
    packet_info_t &recv_info = raw_info.recv_info;

    char send_data_buf[buf_len];  // buf for send data and send hb
    char send_data_buf2[buf_len];

    // static send_bare[buf_len];
    iv_t iv = get_true_random_number_64();
    padding_t padding = get_true_random_number_64();

    memcpy(send_data_buf, &iv, sizeof(iv));
    memcpy(send_data_buf + sizeof(iv), &padding, sizeof(padding));

    send_data_buf[sizeof(iv) + sizeof(padding)] = 'b';
    memcpy(send_data_buf + sizeof(iv) + sizeof(padding) + 1, data, len);
    int new_len = len + sizeof(iv) + sizeof(padding) + 1;

    if (my_encrypt(send_data_buf, send_data_buf2, new_len) != 0) {
        return -1;
    }
    send_raw0(raw_info, send_data_buf2, new_len);
    return 0;
}
int reserved_parse_bare(const char *input, int input_len, char *&data, int &len)  // a sub function used in recv_bare
{
    static char recv_data_buf[buf_len];

    if (input_len < 0) {
        mylog(log_debug, "input_len <0\n");
        return -1;
    }
    if (my_decrypt(input, recv_data_buf, input_len) != 0) {
        mylog(log_debug, "decrypt_fail in recv bare\n");
        return -1;
    }
    if (recv_data_buf[sizeof(iv_t) + sizeof(padding_t)] != 'b') {
        mylog(log_debug, "not a bare packet\n");
        return -1;
    }
    len = input_len;
    data = recv_data_buf + sizeof(iv_t) + sizeof(padding_t) + 1;
    len -= sizeof(iv_t) + sizeof(padding_t) + 1;
    if (len < 0) {
        mylog(log_debug, "len <0\n");
        return -1;
    }
    return 0;
}
int recv_bare(raw_info_t &raw_info, char *&data, int &len)  // recv function with encryption but no anti replay,this is used when client and server verifys each other
// you have to design the protocol carefully, so that you wont be affect by relay attack
{
    packet_info_t &send_info = raw_info.send_info;
    packet_info_t &recv_info = raw_info.recv_info;

    if (recv_raw0(raw_info, data, len) < 0) {
        // printf("recv_raw_fail in recv bare\n");
        return -1;
    }

    if (len >= max_data_len + 1) {
        mylog(log_debug, "data_len=%d >= max_data_len+1,ignored", len);
        return -1;
    }

    mylog(log_trace, "data len=%d\n", len);
    if ((raw_mode == mode_faketcp && (recv_info.syn == 1 || recv_info.ack != 1))) {
        mylog(log_debug, "unexpect packet type recv_info.syn=%d recv_info.ack=%d \n", recv_info.syn, recv_info.ack);
        return -1;
    }
    return reserved_parse_bare(data, len, data, len);
}

int send_handshake(raw_info_t &raw_info, my_id_t id1, my_id_t id2, my_id_t id3)  // a warp for send_bare for sending handshake(this is not tcp handshake) easily
{
    packet_info_t &send_info = raw_info.send_info;
    packet_info_t &recv_info = raw_info.recv_info;

    char *data;
    int len;
    // len=sizeof(id_t)*3;
    if (numbers_to_char(id1, id2, id3, data, len) != 0) return -1;
    if (send_bare(raw_info, data, len) != 0) {
        mylog(log_warn, "send bare fail\n");
        return -1;
    }
    return 0;
}
/*
int recv_handshake(packet_info_t &info,id_t &id1,id_t &id2,id_t &id3)
{
        char * data;int len;
        if(recv_bare(info,data,len)!=0) return -1;

        if(char_to_numbers(data,len,id1,id2,id3)!=0) return -1;

        return 0;
}*/

int send_safer(conn_info_t &conn_info, char type, const char *data, int len)  // safer transfer function with anti-replay,when mutually verification is done.
{
    packet_info_t &send_info = conn_info.raw_info.send_info;
    packet_info_t &recv_info = conn_info.raw_info.recv_info;

    if (type != 'h' && type != 'd') {
        mylog(log_warn, "first byte is not h or d  ,%x\n", type);
        return -1;
    }

    char send_data_buf[buf_len];  // buf for send data and send hb
    char send_data_buf2[buf_len];

    my_id_t n_tmp_id = htonl(conn_info.my_id);

    memcpy(send_data_buf, &n_tmp_id, sizeof(n_tmp_id));

    n_tmp_id = htonl(conn_info.oppsite_id);

    memcpy(send_data_buf + sizeof(n_tmp_id), &n_tmp_id, sizeof(n_tmp_id));

    anti_replay_seq_t n_seq = hton64(conn_info.blob->anti_replay.get_new_seq_for_send());

    memcpy(send_data_buf + sizeof(n_tmp_id) * 2, &n_seq, sizeof(n_seq));

    send_data_buf[sizeof(n_tmp_id) * 2 + sizeof(n_seq)] = type;
    send_data_buf[sizeof(n_tmp_id) * 2 + sizeof(n_seq) + 1] = conn_info.my_roller;

    memcpy(send_data_buf + 2 + sizeof(n_tmp_id) * 2 + sizeof(n_seq), data, len);  // data;

    int new_len = len + sizeof(n_seq) + sizeof(n_tmp_id) * 2 + 2;

    if (g_fix_gro == 0) {
        if (my_encrypt(send_data_buf, send_data_buf2, new_len) != 0) {
            return -1;
        }
    } else {
        if (my_encrypt(send_data_buf, send_data_buf2 + 2, new_len) != 0) {
            return -1;
        }
        write_u16(send_data_buf2, new_len);
        new_len += 2;
        if (cipher_mode == cipher_xor) {
            send_data_buf2[0] ^= gro_xor[0];
            send_data_buf2[1] ^= gro_xor[1];
        } else if (cipher_mode == cipher_aes128cbc || cipher_mode == cipher_aes128cfb) {
            aes_ecb_encrypt1(send_data_buf2);
        }
    }

    if (send_raw0(conn_info.raw_info, send_data_buf2, new_len) != 0) return -1;

    if (after_send_raw0(conn_info.raw_info) != 0) return -1;

    return 0;
}
int send_data_safer(conn_info_t &conn_info, const char *data, int len, u32_t conv_num)  // a wrap for  send_safer for transfer data.
{
    packet_info_t &send_info = conn_info.raw_info.send_info;
    packet_info_t &recv_info = conn_info.raw_info.recv_info;

    char send_data_buf[buf_len];
    // send_data_buf[0]='d';
    u32_t n_conv_num = htonl(conv_num);
    memcpy(send_data_buf, &n_conv_num, sizeof(n_conv_num));

    memcpy(send_data_buf + sizeof(n_conv_num), data, len);
    int new_len = len + sizeof(n_conv_num);
    send_safer(conn_info, 'd', send_data_buf, new_len);
    return 0;
}
int reserved_parse_safer(conn_info_t &conn_info, const char *input, int input_len, char &type, char *&data, int &len)  // subfunction for recv_safer,allow overlap
{
    static char recv_data_buf[buf_len];

    // char *recv_data_buf=recv_data_buf0; //fix strict alias warning
    if (my_decrypt(input, recv_data_buf, input_len) != 0) {
        // printf("decrypt fail\n");
        return -1;
    }

    // char *a=recv_data_buf;
    // id_t h_oppiste_id= ntohl (  *((id_t * )(recv_data_buf)) );
    my_id_t h_oppsite_id;
    memcpy(&h_oppsite_id, recv_data_buf, sizeof(h_oppsite_id));
    h_oppsite_id = ntohl(h_oppsite_id);

    // id_t h_my_id= ntohl (  *((id_t * )(recv_data_buf+sizeof(id_t)))    );
    my_id_t h_my_id;
    memcpy(&h_my_id, recv_data_buf + sizeof(my_id_t), sizeof(h_my_id));
    h_my_id = ntohl(h_my_id);

    // anti_replay_seq_t h_seq= ntoh64 (  *((anti_replay_seq_t * )(recv_data_buf  +sizeof(id_t) *2 ))   );
    anti_replay_seq_t h_seq;
    memcpy(&h_seq, recv_data_buf + sizeof(my_id_t) * 2, sizeof(h_seq));
    h_seq = ntoh64(h_seq);

    if (h_oppsite_id != conn_info.oppsite_id || h_my_id != conn_info.my_id) {
        mylog(log_debug, "id and oppsite_id verification failed %x %x %x %x \n", h_oppsite_id, conn_info.oppsite_id, h_my_id, conn_info.my_id);
        return -1;
    }

    if (conn_info.blob->anti_replay.is_vaild(h_seq) != 1) {
        mylog(log_debug, "dropped replay packet\n");
        return -1;
    }

    // printf("recv _len %d\n ",recv_len);
    data = recv_data_buf + sizeof(anti_replay_seq_t) + sizeof(my_id_t) * 2;
    len = input_len - (sizeof(anti_replay_seq_t) + sizeof(my_id_t) * 2);

    if (data[0] != 'h' && data[0] != 'd') {
        mylog(log_debug, "first byte is not h or d  ,%x\n", data[0]);
        return -1;
    }

    uint8_t roller = data[1];

    type = data[0];
    data += 2;
    len -= 2;

    if (len < 0) {
        mylog(log_debug, "len <0 ,%d\n", len);
        return -1;
    }

    if (roller != conn_info.oppsite_roller) {
        conn_info.oppsite_roller = roller;
        conn_info.last_oppsite_roller_time = get_current_time();
    }
    if (hb_mode == 0)
        conn_info.my_roller++;  // increase on a successful recv
    else if (hb_mode == 1) {
        if (type == 'h')
            conn_info.my_roller++;
    } else {
        mylog(log_fatal, "unknow hb_mode\n");
        myexit(-1);
    }

    if (after_recv_raw0(conn_info.raw_info) != 0) return -1;  // TODO might need to move this function to somewhere else after --fix-gro is introduced

    return 0;
}
int recv_safer_notused(conn_info_t &conn_info, char &type, char *&data, int &len)  /// safer transfer function with anti-replay,when mutually verification is done.
{
    packet_info_t &send_info = conn_info.raw_info.send_info;
    packet_info_t &recv_info = conn_info.raw_info.recv_info;

    char *recv_data;
    int recv_len;
    // static char recv_data_buf[buf_len];

    if (recv_raw0(conn_info.raw_info, recv_data, recv_len) != 0) return -1;

    return reserved_parse_safer(conn_info, recv_data, recv_len, type, data, len);
}

int recv_safer_multi(conn_info_t &conn_info, vector<char> &type_arr, vector<string> &data_arr)  /// safer transfer function with anti-replay,when mutually verification is done.
{
    packet_info_t &send_info = conn_info.raw_info.send_info;
    packet_info_t &recv_info = conn_info.raw_info.recv_info;

    char *recv_data;
    int recv_len;
    assert(type_arr.empty());
    assert(data_arr.empty());

    if (recv_raw0(conn_info.raw_info, recv_data, recv_len) != 0) return -1;

    char type;
    char *data;
    int len;

    if (g_fix_gro == 0) {
        int ret = reserved_parse_safer(conn_info, recv_data, recv_len, type, data, len);
        if (ret == 0) {
            type_arr.push_back(type);
            data_arr.emplace_back(data, data + len);
            // std::copy(data,data+len,data_arr[0]);
        }
        return 0;
    } else {
        char *ori_recv_data = recv_data;
        int ori_recv_len = recv_len;
        // mylog(log_debug,"recv_len:%d\n",recv_len);
        int cnt = 0;
        while (recv_len >= 16) {
            cnt++;
            int single_len_no_xor;
            single_len_no_xor = read_u16(recv_data);
            int single_len;
            if (cipher_mode == cipher_xor) {
                recv_data[0] ^= gro_xor[0];
                recv_data[1] ^= gro_xor[1];
            } else if (cipher_mode == cipher_aes128cbc || cipher_mode == cipher_aes128cfb) {
                aes_ecb_decrypt1(recv_data);
            }
            single_len = read_u16(recv_data);
            recv_len -= 2;
            recv_data += 2;
            if (single_len > recv_len) {
                mylog(log_debug, "illegal single_len %d(%d), recv_len %d left,dropped\n", single_len, single_len_no_xor, recv_len);
                break;
            }
            if (single_len > max_data_len) {
                mylog(log_warn, "single_len %d(%d) > %d, maybe you need to turn down mtu at upper level\n", single_len, single_len_no_xor, max_data_len);
                break;
            }

            int ret = reserved_parse_safer(conn_info, recv_data, single_len, type, data, len);

            if (ret != 0) {
                mylog(log_debug, "parse failed, offset= %d,single_len=%d(%d)\n", (int)(recv_data - ori_recv_data), single_len, single_len_no_xor);
            } else {
                type_arr.push_back(type);
                data_arr.emplace_back(data, data + len);
                // std::copy(data,data+len,data_arr[data_arr.size()-1]);
            }
            recv_data += single_len;
            recv_len -= single_len;
        }
        if (cnt > 1) {
            mylog(log_debug, "got a suspected gro packet, %d packets recovered, recv_len=%d, loop_cnt=%d\n", (int)data_arr.size(), ori_recv_len, cnt);
        }
        return 0;
    }
}

void server_clear_function(u64_t u64)  // used in conv_manager in server mode.for server we have to use one udp fd for one conv(udp connection),
// so we have to close the fd when conv expires
{
    // int fd=int(u64);
    //	int ret;
    // assert(fd!=0);
    /*
    epoll_event ev;

    ev.events = EPOLLIN;
    ev.data.u64 = u64;

    ret = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &ev);
    if (ret!=0)
    {
            mylog(log_fatal,"fd:%d epoll delete failed!!!!\n",fd);
            myexit(-1);   //this shouldnt happen
    }*/
    // no need

    /*ret= close(fd);  //closed fd should be auto removed from epoll

    if (ret!=0)
    {
            mylog(log_fatal,"close fd %d failed !!!!\n",fd);
            myexit(-1);  //this shouldnt happen
    }*/
    // mylog(log_fatal,"size:%d !!!!\n",conn_manager.udp_fd_mp.size());
    fd64_t fd64 = u64;
    assert(fd_manager.exist(fd64));
    fd_manager.fd64_close(fd64);

    // assert(conn_manager.udp_fd_mp.find(fd)!=conn_manager.udp_fd_mp.end());
    // conn_manager.udp_fd_mp.erase(fd);
}


================================================
FILE: connection.h
================================================
/*
 * connection.h
 *
 *  Created on: Sep 23, 2017
 *      Author: root
 */

#ifndef CONNECTION_H_
#define CONNECTION_H_

extern int disable_anti_replay;

#include "connection.h"
#include "common.h"
#include "log.h"
#include "network.h"
#include "misc.h"

const int disable_conv_clear = 0;  // a udp connection in the multiplexer is called conversation in this program,conv for short.

struct anti_replay_t  // its for anti replay attack,similar to openvpn/ipsec 's anti replay window
{
    u64_t max_packet_received;
    char window[anti_replay_window_size];
    anti_replay_seq_t anti_replay_seq;
    anti_replay_seq_t get_new_seq_for_send();
    anti_replay_t();
    void re_init();

    int is_vaild(u64_t seq);
};  // anti_replay;

void server_clear_function(u64_t u64);

#include <type_traits>

template <class T>
struct conv_manager_t  // manage the udp connections
{
    // typedef hash_map map;
    unordered_map<T, u32_t> data_to_conv;  // conv and u64 are both supposed to be uniq
    unordered_map<u32_t, T> conv_to_data;

    lru_collector_t<u32_t> lru;
    // unordered_map<u32_t,u64_t> conv_last_active_time;

    // unordered_map<u32_t,u64_t>::iterator clear_it;

    void (*additional_clear_function)(T data) = 0;

    long long last_clear_time;

    conv_manager_t() {
        // clear_it=conv_last_active_time.begin();
        long long last_clear_time = 0;
        additional_clear_function = 0;
    }
    ~conv_manager_t() {
        clear();
    }
    int get_size() {
        return conv_to_data.size();
    }
    void reserve() {
        data_to_conv.reserve(10007);
        conv_to_data.reserve(10007);
        // conv_last_active_time.reserve(10007);

        lru.mp.reserve(10007);
    }
    void clear() {
        if (disable_conv_clear) return;

        if (additional_clear_function != 0) {
            for (auto it = conv_to_data.begin(); it != conv_to_data.end(); it++) {
                // int fd=int((it->second<<32u)>>32u);
                additional_clear_function(it->second);
            }
        }
        data_to_conv.clear();
        conv_to_data.clear();

        lru.clear();
        // conv_last_active_time.clear();

        // clear_it=conv_last_active_time.begin();
    }
    u32_t get_new_conv() {
        u32_t conv = get_true_random_number_nz();
        while (conv_to_data.find(conv) != conv_to_data.end()) {
            conv = get_true_random_number_nz();
        }
        return conv;
    }
    int is_conv_used(u32_t conv) {
        return conv_to_data.find(conv) != conv_to_data.end();
    }
    int is_data_used(T data) {
        return data_to_conv.find(data) != data_to_conv.end();
    }
    u32_t find_conv_by_data(T data) {
        return data_to_conv[data];
    }
    T find_data_by_conv(u32_t conv) {
        return conv_to_data[conv];
    }
    int update_active_time(u32_t conv) {
        // return conv_last_active_time[conv]=get_current_time();
        lru.update(conv);
        return 0;
    }
    int insert_conv(u32_t conv, T data) {
        data_to_conv[data] = conv;
        conv_to_data[conv] = data;
        // conv_last_active_time[conv]=get_current_time();
        lru.new_key(conv);
        return 0;
    }
    int erase_conv(u32_t conv) {
        if (disable_conv_clear) return 0;
        T data = conv_to_data[conv];
        if (additional_clear_function != 0) {
            additional_clear_function(data);
        }
        conv_to_data.erase(conv);
        data_to_conv.erase(data);
        // conv_last_active_time.erase(conv);
        lru.erase(conv);
        return 0;
    }
    int clear_inactive(char *info = 0) {
        if (get_current_time() - last_clear_time > conv_clear_interval) {
            last_clear_time = get_current_time();
            return clear_inactive0(info);
        }
        return 0;
    }
    int clear_inactive0(char *info) {
        if (disable_conv_clear) return 0;

        unordered_map<u32_t, u64_t>::iterator it;
        unordered_map<u32_t, u64_t>::iterator old_it;

        // map<uint32_t,uint64_t>::iterator it;
        int cnt = 0;
        // it=clear_it;
        int size = lru.size();
        int num_to_clean = size / conv_clear_ratio + conv_clear_min;  // clear 1/10 each time,to avoid latency glitch

        num_to_clean = min(num_to_clean, size);

        my_time_t current_time = get_current_time();
        for (;;) {
            if (cnt >= num_to_clean) break;
            if (lru.empty()) break;

            u32_t conv;
            my_time_t ts = lru.peek_back(conv);

            if (current_time - ts < conv_timeout) break;

            erase_conv(conv);
            if (info == 0) {
                mylog(log_info, "conv %x cleared\n", conv);
            } else {
                mylog(log_info, "[%s]conv %x cleared\n", info, conv);
            }
            cnt++;
        }
        return 0;
    }

    /*
conv_manager_t();
~conv_manager_t();
int get_size();
void reserve();
void clear();
u32_t get_new_conv();
int is_conv_used(u32_t conv);
int is_u64_used(T u64);
u32_t find_conv_by_u64(T u64);
T find_u64_by_conv(u32_t conv);
int update_active_time(u32_t conv);
int insert_conv(u32_t conv,T u64);
int erase_conv(u32_t conv);
int clear_inactive(char * ip_port=0);
int clear_inactive0(char * ip_port);*/
};  // g_conv_manager;

struct blob_t : not_copy_able_t  // used in conn_info_t.
{
    union tmp_union_t  // conv_manager_t is here to avoid copying when a connection is recovered
    {
        conv_manager_t<address_t> c;
        conv_manager_t<u64_t> s;
        // avoid templates here and there, avoid pointer and type cast
        tmp_union_t() {
            if (program_mode == client_mode) {
                new (&c) conv_manager_t<address_t>();
            } else {
                assert(program_mode == server_mode);
                new (&s) conv_manager_t<u64_t>();
            }
        }
        ~tmp_union_t() {
            if (program_mode == client_mode) {
                c.~conv_manager_t<address_t>();
            } else {
                assert(program_mode == server_mode);
                s.~conv_manager_t<u64_t>();
            }
        }
    } conv_manager;

    anti_replay_t anti_replay;  // anti_replay_t is here bc its huge,its allocation is delayed.
};
struct conn_info_t  // stores info for a raw connection.for client ,there is only one connection,for server there can be thousand of connection since server can
// handle multiple clients
{
    current_state_t state;

    raw_info_t raw_info;
    u64_t last_state_time;
    u64_t last_hb_sent_time;  // client re-use this for retry
    u64_t last_hb_recv_time;
    // long long last_resent_time;

    my_id_t my_id;
    my_id_t oppsite_id;

    fd64_t timer_fd64;
    fd64_t udp_fd64;

    my_id_t oppsite_const_id;

    blob_t *blob;

    uint8_t my_roller;
    uint8_t oppsite_roller;
    u64_t last_oppsite_roller_time;

    //	ip_port_t ip_port;

    /*
            const uint32_t &ip=raw_info.recv_info.src_ip;
            const uint16_t &port=raw_info.recv_info.src_port;

    */
    void recover(const conn_info_t &conn_info);
    void re_init();
    conn_info_t();
    void prepare();
    conn_info_t(const conn_info_t &b);
    conn_info_t &operator=(const conn_info_t &b);
    ~conn_info_t();
};  // g_conn_info;

struct conn_manager_t  // manager for connections. for client,we dont need conn_manager since there is only one connection.for server we use one conn_manager for all connections
{
    u32_t ready_num;

    // unordered_map<int,conn_info_t *> udp_fd_mp;  //a bit dirty to used pointer,but can void unordered_map search
    // unordered_map<int,conn_info_t *> timer_fd_mp;//we can use pointer here since unordered_map.rehash() uses shallow copy

    unordered_map<my_id_t, conn_info_t *> const_id_mp;

    unordered_map<address_t, conn_info_t *> mp;  // put it at end so that it de-consturcts first

    // lru_collector_t<address_t> lru;

    unordered_map<address_t, conn_info_t *>::iterator clear_it;

    long long last_clear_time;

    conn_manager_t();
    int exist(address_t addr);
    /*
    int insert(uint32_t ip,uint16_t port)
    {
            uint64_t u64=0;
            u64=ip;
            u64<<=32u;
            u64|=port;
            mp[u64];
            return 0;
    }*/
    conn_info_t *&find_insert_p(address_t addr);  // be aware,the adress may change after rehash //not true?
    conn_info_t &find_insert(address_t addr);     // be aware,the adress may change after rehash

    int erase(unordered_map<address_t, conn_info_t *>::iterator erase_it);
    int clear_inactive();
    int clear_inactive0();
};

extern conn_manager_t conn_manager;

void server_clear_function(u64_t u64);

int send_bare(raw_info_t &raw_info, const char *data, int len);  // send function with encryption but no anti replay,this is used when client and server verifys each other
// you have to design the protocol carefully, so that you wont be affect by relay attack
// int reserved_parse_bare(const char *input,int input_len,char* & data,int & len); // a sub function used in recv_bare
int recv_bare(raw_info_t &raw_info, char *&data, int &len);  // recv function with encryption but no anti replay,this is used when client and server verifys each other
// you have to design the protocol carefully, so that you wont be affect by relay attack
int send_handshake(raw_info_t &raw_info, my_id_t id1, my_id_t id2, my_id_t id3);         // a warp for send_bare for sending handshake(this is not tcp handshake) easily
int send_safer(conn_info_t &conn_info, char type, const char *data, int len);            // safer transfer function with anti-replay,when mutually verification is done.
int send_data_safer(conn_info_t &conn_info, const char *data, int len, u32_t conv_num);  // a wrap for  send_safer for transfer data.
// int reserved_parse_safer(conn_info_t &conn_info,const char * input,int input_len,char &type,char* &data,int &len);//subfunction for recv_safer,allow overlap

// int recv_safer(conn_info_t &conn_info,char &type,char* &data,int &len);///safer transfer function with anti-replay,when mutually verification is done.

int recv_safer_multi(conn_info_t &conn_info, vector<char> &type_arr, vector<string> &data_arr);  // new api for handle gro
#endif                                                                                           /* CONNECTION_H_ */


================================================
FILE: doc/README.zh-cn.md
================================================
# Udp2raw-tunnel 
![image2](/images/image0.PNG)
udp2raw tunnel，通过raw socket给UDP包加上TCP或ICMP header，进而绕过UDP屏蔽或QoS，或在UDP不稳定的环境下提升稳定性。可以有效防止在使用kcptun或者finalspeed的情况下udp端口被运营商限速。

支持心跳保活、自动重连，重连后会恢复上次连接，在底层掉线的情况下可以保持上层不掉线。同时有加密、防重放攻击、信道复用的功能。

[English](/README.md)

[udp2raw+kcptun step_by_step教程](kcptun_step_by_step.md)

[udp2raw+finalspeed step_by_step教程](finalspeed_step_by_step.md)

[udp2raw wiki](https://github.com/wangyu-/udp2raw-tunnel/wiki)

**提示：**

udp2raw不是加速器，只是一个帮助你绕过UDP限制的工具。如果你需要UDP“加速器” (改善UDP丢包)，请看UDPspeeder。

UDPspeeder的repo:

https://github.com/wangyu-/UDPspeeder
# 支持的平台
Linux主机，有root权限或cap_net_raw capability.。可以是PC、android手机/平板、openwrt路由器、树莓派。主机上最好安装了iptables命令(apt/yum很容易安装)。

Release中提供了`amd64`、`x86`、`arm`、`mips_be`、`mips_le`的预编译binary.

##### 对于windows和mac用户：

可以用[这个repo](https://github.com/wangyu-/udp2raw-multiplatform)里的udp2raw。

##### 对于ios和游戏主机用户：

可以把udp2raw运行在局域网的其他机器/虚拟机上。最好的办法是买个能刷OpenWrt/LEDE/梅林的路由器，把udp2raw运行在路由器上。

# 功能特性
### 把udp流量伪装成tcp /icmp
用raw socket给udp包加上tcp/icmp包头，可以突破udp流量限制或Udp QOS。或者在udp nat有问题的环境下，提升稳定性。  另外也支持用raw 发udp包，这样流量不会被伪装，只会被加密。

### 模拟TCP3次握手
模拟TCP3次握手，模拟seq ack过程。另外还模拟了一些tcp option：MSS,sackOk,TS,TS_ack,wscale，用来使流量看起来更像是由普通的linux tcp协议栈发送的。

### 心跳保活、自动重连，连接恢复
心跳保活、自动重连，udp2raw重连可以恢复上次的连接，重连后上层连接继续有效，底层掉线上层不掉线。有效解决上层连接断开的问题。 （功能借鉴自[kcptun-raw](https://github.com/Chion82/kcptun-raw)）（**就算你拔掉网线重插，或者重新拨号获得新ip，上层应用也不会断线**）

### 加密、防重放攻击
用aes128cbc加密(或更弱的xor)，hmac-sha1(或更弱的md5/crc32/simple)做数据完整校验。用类似ipsec/openvpn的replay window机制来防止重放攻击。

[Notes on encryption](https://github.com/wangyu-/udp2raw-tunnel/wiki/Notes-on-encryption)

### 其他特性
信道复用，client的udp端支持多个连接。

server支持多个client，也能正确处理多个连接的重连和连接恢复。

NAT 穿透 ，tcp icmp udp模式都支持nat穿透。

支持Openvz，配合finalspeed使用，可以在openvz上用tcp模式的finalspeed.

支持Openwrt，没有编译依赖，容易编译到任何平台上。

### 关键词
突破udp qos,突破udp屏蔽，openvpn tcp over tcp problem,openvpn over icmp,udp to icmp tunnel,udp to tcp tunnel,udp via icmp,udp via tcp

# 简明操作说明

### 安装
下载编译好的二进制文件，解压到任意目录。

https://github.com/wangyu-/udp2raw-tunnel/releases

### 运行
假设你有一个server，ip为44.55.66.77，有一个服务监听在udp 7777端口。 假设你本地的主机到44.55.66.77的UDP流量被屏蔽了，或者被qos了

```
在server端运行:
./udp2raw_amd64 -s -l0.0.0.0:4096  -r127.0.0.1:7777   -k "passwd" --raw-mode faketcp   --cipher-mode xor  -a

在client端运行:
./udp2raw_amd64 -c -l0.0.0.0:3333  -r44.55.66.77:4096 -k "passwd" --raw-mode faketcp   --cipher-mode xor  -a
```
(以上例子需要用root账号运行。 用非root运行udp2raw需要一些额外的步骤，具体方法请看 [这个](https://github.com/wangyu-/udp2raw-tunnel/wiki/run-udp2raw-as-non-root) 链接。用非root运行更安全)

###### Server端输出:
![](/images/output_server.PNG)
###### Client端输出:
![](/images/output_client.PNG)

现在client和server之间建立起了，tunnel。想要在本地连接44.55.66.77:7777，只需要连接 127.0.0.1:3333。来回的所有的udp流量会被经过tunneling发送。在外界看起来是tcp流量，不会有udp流量暴露到公网。

### MTU设置(重要)

不论你用udp2raw来加速kcptun还是vpn,为了稳定使用,都需要设置合理的MTU（在kcptun/vpn里设置，而不是在udp2raw里），建议把MTU设置成1200。client和server端都要设置。

### 提醒
`--cipher-mode xor`表示仅使用简单的XOR加密，这样可以节省CPU占用，以免CPU成为速度瓶颈。如果你需要更强的加密，可以去掉此选项，使用默认的AES加密。加密相关的选项见后文的`--cipher-mode`和`--auth-mode`。

如果要在anroid上运行，请看[Android简明教程](/doc/android_guide.md)

`-a`选项会自动添加一条/几条iptables规则，udp2raw必须和相应的iptables规则配合才能稳定工作，一定要注意不要忘了`-a`(这是个常见错误)。 如果你不想让udp2raw自动添加iptables规则，可以自己手动添加相应的iptables规则(看一下`-g`选项)，然后以不带`-a`的方式运行udp2raw。

# 进阶操作说明

### 命令选项
```
udp2raw-tunnel
git version:4623f878e0    build date:Nov  3 2024 23:15:46
repository: https://github.com/wangyu-/udp2raw-tunnel

usage:
    run as client : ./this_program -c -l local_listen_ip:local_port -r server_address:server_port  [options]
    run as server : ./this_program -s -l server_listen_ip:server_port -r remote_address:remote_port  [options]

common options,these options must be same on both side:
    --raw-mode            <string>        available values:faketcp(default),udp,icmp and easy-faketcp
    -k,--key              <string>        password to gen symetric key,default:"secret key"
    --cipher-mode         <string>        available values:aes128cfb,aes128cbc(default),xor,none
    --auth-mode           <string>        available values:hmac_sha1,md5(default),crc32,simple,none
    -a,--auto-rule                        auto add (and delete) iptables rule
    -g,--gen-rule                         generate iptables rule then exit,so that you can copy and
                                          add it manually.overrides -a
    --disable-anti-replay                 disable anti-replay,not suggested
    --fix-gro                             try to fix huge packet caused by GRO. this option is at an early stage.
                                          make sure client and server are at same version.
client options:
    --source-ip           <ip>            force source-ip for raw socket
    --source-port         <port>          force source-port for raw socket,tcp/udp only
                                          this option disables port changing while re-connecting
other options:
    --conf-file           <string>        read options from a configuration file instead of command line.
                                          check example.conf in repo for format
    --fifo                <string>        use a fifo(named pipe) for sending commands to the running program,
                                          check readme.md in repository for supported commands.
    --log-level           <number>        0:never    1:fatal   2:error   3:warn
                                          4:info (default)     5:debug   6:trace
    --log-position                        enable file name,function name,line number in log
    --disable-color                       disable log color
    --disable-bpf                         disable the kernel space filter,most time its not necessary
                                          unless you suspect there is a bug
    --dev                 <string>        bind raw socket to a device, not necessary but improves performance
    --sock-buf            <number>        buf size for socket,>=10 and <=10240,unit:kbyte,default:1024
    --force-sock-buf                      bypass system limitation while setting sock-buf
    --seq-mode            <number>        seq increase mode for faketcp:
                                          0:static header,do not increase seq and ack_seq
                                          1:increase seq for every packet,simply ack last seq
                                          2:increase seq randomly, about every 3 packets,simply ack last seq
                                          3:simulate an almost real seq/ack procedure(default)
                                          4:similiar to 3,but do not consider TCP Option Window_Scale,
                                          maybe useful when firewall doesnt support TCP Option
    --lower-level         <string>        send packets at OSI level 2, format:'if_name#dest_mac_adress'
                                          ie:'eth0#00:23:45:67:89:b9'.or try '--lower-level auto' to obtain
                                          the parameter automatically,specify it manually if 'auto' failed
    --wait-lock                           wait for xtables lock while invoking iptables, need iptables v1.4.20+
    --gen-add                             generate iptables rule and add it permanently,then exit.overrides -g
    --keep-rule                           monitor iptables and auto re-add if necessary.implys -a
    --hb-len              <number>        length of heart-beat packet, >=0 and <=1500
    --mtu-warn            <number>        mtu warning threshold, unit:byte, default:1375
    --clear                               clear any iptables rules added by this program.overrides everything
    --retry-on-error                      retry on error, allow to start udp2raw before network is initialized
    -h,--help                             print this help message
```

### iptables 规则,`-a`和`-g`
用raw收发tcp包本质上绕过了linux内核的tcp协议栈。linux碰到raw socket发来的包会不认识，如果一直收到不认识的包，会回复大量RST，造成不稳定或性能问题。所以强烈建议添加iptables规则屏蔽Linux内核的对指定端口的处理。用-a选项，udp2raw会在启动的时候自动帮你加上Iptables规则，退出的时候再自动删掉。如果长期使用，可以用-g选项来生成相应的Iptables规则再自己手动添加，这样规则不会在udp2raw退出时被删掉，可以避免停掉udp2raw后内核向对端回复RST。

用raw收发udp包也类似，只是内核回复的是icmp unreachable。而用raw 收发icmp，内核会自动回复icmp echo。都需要相应的iptables规则。
### `--cipher-mode` 和 `--auth-mode` 
如果要最大的安全性建议用aes128cbc+hmac_sha1。如果要运行在路由器上，建议用xor+simple，可以节省CPU。但是注意xor+simple只能骗过防火墙的包检测，不能防止真正的攻击者。

### `--seq-mode`
faketcp模式并没有模拟tcp的全部。所以理论上有办法把faketcp和真正的tcp流量区分开来（虽然大部分ISP不太可能做这种程度的包检测）。seq-mode可以改变一些seq ack的行为。如果遇到了连接问题，可以尝试更改。在我这边的移动线路用3种模式都没问题。

### `--keep-rule`
定期主动检查iptables，如果udp2raw添加的iptables规则丢了，就重新添加。在一些iptables可能会被其他程序清空的情况下(比如梅林固件和openwrt的路由器)格外有用。

### `--fifo`
指定一个fifo(named pipe)来向运行中的程序发送命令，例如`--fifo fifo.file`：

在client端,可以用`echo reconnect >fifo.file`来强制client换端口重连（上层不断线）.对Server，目前没有效果。

### `--lower-level`
大部分udp2raw不能连通的情况都是设置了不兼容的iptables造成的。--lower-level选项允许绕过本地iptables。在一些iptables不好改动的情况下尤其有效（比如你用的是梅林固件，iptables全是固件自己生成的）。

##### 格式
`if_name#dest_mac_adress`,例如 `eth0#00:23:45:67:89:b9` 。`eth0`换成你的出口网卡名。`00:23:45:67:89:b9`换成网关的mac地址（如果client和server在同一个局域网内，可能不需要网关，这时候直接用对方主机的mac地址，这个属于罕见的应用场景，可以忽略）。

可以用`--lower-level auto`自动获取参数，如果获取参数失败，再手动填写。

##### client端获得--lower-level参数的办法
在client 端，运行`traceroute <server_ip>`，记下第一跳的地址，这个就是`网关ip`。再运行`arp -s <网关ip>`，可以同时查到出口网卡名和mac。

![](/images/lower_level.PNG)

如果traceroute第一跳结果是`* * *`，说明网关屏蔽了对traceroute的应答。需要用`ip route`或`route`查询网关：

![](/images/route.PNG)
##### server端获得--lower-level参数的办法
如果client有公网ip，就`traceroute <client_ip>`。下一步和client端的方法一样。

如果client没有公网ip，就`traceroute google.com` 或`traceroute baidu.com`。下一步和client端的方法一样。

server端也可以用`--lower-level auto` 来尝试自动获得参数，如果无法连接再手动填写。

##### 注意
如果用了`--lower-level`选项。server虽然还可以bind在0.0.0.0，但是因为你显式指定了网络接口，就只能工作在这一个网络接口了。

如果`arps -s`命令查询不到，首先再试几次。如果还是查询不到，那么可能是因为你用的是pppoe方式的拨号宽带，查询不到是正常的。这种情况下`if_name`填pppoe产生的虚拟interface，通常名字叫`pppXXXX`，从`ifconfig`命令的输出里找一下；`des_mac_adress`填`00:00:00:00:00:00`,例如`ppp0#00:00:00:00:00:00`

### `--conf-file`

为了避免将密码等私密信息暴露给`ps`命令，你也可以使用 `配置文件` 来存储参数。

比如，将以上服务端参数改写成配置文件

`server.conf`:

```
-s
# 你可以像这样添加注释
# 注意，只有整行注释才能在配置文件里使用
# 注释必须独占一行
-l 0.0.0.0:4096
-r 127.0.0.1:7777
-a
-k passwd
--raw-mode faketcp
```

注意，当写入配置文件的时候，密码等参数两边的引号必须去除。

然后就可以使用下面的方式启动服务端

```bash
./udp2raw_amd64 --conf-file server.conf
```

# 性能测试
iperf3 的UDP模式有BUG，所以，这里用iperf3的tcp模式，配合Openvpn，测试udp2raw的性能。（iperf3 udp issue ,https://github.com/esnet/iperf/issues/296 ）

openvpn关掉了自带的加密。
#### iperf3 命令: 
```
iperf3 -c 10.222.2.1 -P40 
iperf3 -c 10.222.2.1 -P40 -R
```
#### client主机
vultr 2.5美元每月套餐(single core 2.4ghz cpu,512m ram,日本东京机房),
#### server主机
bandwagonhost 3.99美元每年套餐(single core 2.0ghz cpu,128m ram,美国洛杉矶机房)
### 测试1
raw_mode: faketcp  cipher_mode: xor  auth_mode: simple

![image4](/images/image4.PNG)

（反向的速度几乎一样，所以只发正向测试的图)

测试中cpu被打满。其中有30%的cpu是被openvpn占的。 如果不用Openvpn中转，实际达到100+Mb/S 应该没问题。

### 测试2
raw_mode: faketcp  cipher_mode: aes128cbc  auth_mode: md5

![image5](/images/image5.PNG)

（反向的速度几乎一样，所以只发正向测试的图)

测试中cpu被打满。绝大多数cpu都是被udp2raw占用的（主要消耗在aes加密）。即使不用Openvpn，速度也不会快很多了。
# 应用
### 中转 kcptun
[udp2raw+kcptun step_by_step教程](kcptun_step_by_step.md)
### 中转 finalspeed
[udp2raw+finalspeed step_by_step教程](finalspeed_step_by_step.md)

# wiki

更多内容请看 wiki:

https://github.com/wangyu-/udp2raw-tunnel/wiki


================================================
FILE: doc/android_guide.md
================================================
# How to run udp2raw on a rooted android device(arm cpu)

There is currently no GUI for udp2raw on android.Make sure you have installed Terminal to run it.Your device has to be rooted,otherwise you cant use raw socket.

Download udp2raw_arm from https://github.com/wangyu-/udp2raw-tunnel/releases.

Copy udp2raw_arm to any dir of your **internal storage** .Copying it to **SD card wont work**.

# Steps
1.  run udp2raw_arm  as usual, except you must change the -a option to -g
```
./udp2raw_arm -c -r 44.55.66.77:9966 -l 0.0.0.0:4000 -k1234 --cipher xor -g
```

2. find the generated iptables rule from udp2raw's output,add it manually by running:
```
iptables -I INPUT -s 44.55.66.77/32 -p tcp -m tcp --sport 9966 -j DROP
```

3. run udp2raw_ram without -g command

```
./udp2raw_arm -c -r 44.55.66.77:9966 -l 0.0.0.0:4000 -k1234 --cipher xor 
```

# ScreenShot 
zoom-in if not large enough

![](/images/android.png)


================================================
FILE: doc/build_guide.md
================================================
# udp2raw build guide

the guide on how to build udp2raw

## Build udp2raw for a specific platform

### linux platform which supports local compile
such as PC,raspberry pi

##### install git
run on debian/ubuntun：
```
sudo apt-get install git
```
run on redhat/centos:
```
sudo yum install git
```
##### clone git code

run in any dir：

```
git clone https://github.com/wangyu-/udp2raw-tunnel.git
cd udp2raw-tunnel
```

##### install compile tool
run on debian/ubuntun：
```
sudo apt-get install build-essential
```

run on redhat/centos:
```
sudo yum groupinstall 'Development Tools'
```

run 'make'，compilation done. the udp2raw file is the just compiled binary

### platform which needs cross-compile
such as openwrt router,run following instructions on your PC

##### install git
run on debian/ubuntun：
```
sudo apt-get install git
```
run on redhat/centos:
```
sudo yum install git
```

##### download cross compile tool chain

find it on downloads.openwrt.org according to your openwrt version and cpu model.

for example, my tplink wdr4310 runs chaos_calmer 15.05,its with ar71xx cpu，download the following package.

```
http://downloads.openwrt.org/chaos_calmer/15.05/ar71xx/generic/OpenWrt-SDK-15.05-ar71xx-generic_gcc-4.8-linaro_uClibc-0.9.33.2.Linux-x86_64.tar.bz2
```
unzip it to any dir,such as ：/home/wangyu/OpenWrt-SDK-ar71xx-for-linux-x86_64-gcc-4.8-linaro_uClibc-0.9.33.2

cd into staging_dir ，toolchain-xxxxx ，bin .find the soft link with g++ suffix. in my case ,its mips-openwrt-linux-g++ ,check for its full path:

```
/home/wangyu/Desktop/OpenWrt-SDK-15.05-ar71xx-generic_gcc-4.8-linaro_uClibc-0.9.33.2.Linux-x86_64/staging_dir/toolchain-mips_34kc_gcc-4.8-linaro_uClibc-0.9.33.2/bin/mips-openwrt-linux-g++
```
##### compile
modify first line of makefile to:
```
cc_cross=/home/wangyu/Desktop/OpenWrt-SDK-15.05-ar71xx-generic_gcc-4.8-linaro_uClibc-0.9.33.2.Linux-x86_64/staging_dir/toolchain-mips_34kc_gcc-4.8-linaro_uClibc-0.9.33.2/bin/mips-openwrt-linux-g++
```

run `make cross`，the just generated `udp2raw_cross` is the binary,compile done. copy it to your router to run.

`make cross` generates non-static binary. If you have any problem on running it,try to compile a static binary by using `make cross2` or `make cross3`.If your toolchain supports static compiling, usually one of them will succeed. The generated file is still named `udp2raw_cross`.


## Build a full release (include all binaries supported in the makefile)

1. make sure your linux is amd64 version

2. clone the repo

3. make sure you have g++ , make sure your g++ support the `-m32` option; make your your have installed libraries for `-m32` option

4. download https://github.com/wangyu-/files/releases/download/files/toolchains.tar.gz , and extract it to the right position (according to the makefile)

5. run `make release` inside udp2raw's directory


================================================
FILE: doc/build_guide.zh-cn.md
================================================
# udp2raw编译方法
本文演示怎么把udp2raw编译到自己所需的平台。

## 可以本地编译的linux平台
比如电脑、树莓派

##### 首先安装git
debian/ubuntun执行：
```
sudo apt-get install git
```
redhat/centos执行:
```
sudo yum install git
```
##### 用git把源码clone至本地

在任意目录执行：

```
git clone https://github.com/wangyu-/udp2raw-tunnel.git
cd udp2raw-tunnel
```

##### 安装g++ make 等工具
debian/ubuntun执行：
```
sudo apt-get install build-essential
```

redhat/centos执行:
```
sudo yum groupinstall 'Development Tools'
```

然后运行make，编译完成。 生成的udp2raw就是编译好的bianry。

## 需要交叉编译的平台
比如各种openwrt路由器

##### 首先安装git
debian/ubuntun执行：
```
sudo apt-get install git
```
redhat/centos执行:
```
sudo yum install git
```

##### 下载安装交叉编译工具包
去downloads.openwrt.org上找到自己的openwrt版本和cpu型号对应的SDK。通常openwrt版本号不一样也问题不大，最主要是cpu型号。

比如我的tplink wdr4310运行的是chaos_calmer 15.05,ar71xx cpu，应该下载这个包：

```
http://downloads.openwrt.org/chaos_calmer/15.05/ar71xx/generic/OpenWrt-SDK-15.05-ar71xx-generic_gcc-4.8-linaro_uClibc-0.9.33.2.Linux-x86_64.tar.bz2
```
解压到本地任意目录，比如：/home/wangyu/OpenWrt-SDK-ar71xx-for-linux-x86_64-gcc-4.8-linaro_uClibc-0.9.33.2

让后依次进入，staging_dir ，toolchain-xxxxx ，bin 目录，找到后缀是g++的软链,比如我的是mips-openwrt-linux-g++ ，记下这个文件的完整路径：

```
/home/wangyu/Desktop/OpenWrt-SDK-15.05-ar71xx-generic_gcc-4.8-linaro_uClibc-0.9.33.2.Linux-x86_64/staging_dir/toolchain-mips_34kc_gcc-4.8-linaro_uClibc-0.9.33.2/bin/mips-openwrt-linux-g++
```
##### 编译
把makefile的第一行 cross_cc=后面的内容改成你刚才记下的完整路径：
```
cc_cross=/home/wangyu/Desktop/OpenWrt-SDK-15.05-ar71xx-generic_gcc-4.8-linaro_uClibc-0.9.33.2.Linux-x86_64/staging_dir/toolchain-mips_34kc_gcc-4.8-linaro_uClibc-0.9.33.2/bin/mips-openwrt-linux-g++
```

执行`make cross`，目录下生成udp2raw_cross文件。编译完成。

`make cross`编译出的binary是非静态的。如果运行有问题，可以尝试用`make cross2`或`make cross3`编译静态的binary,你的工具链必须带静态库才能成功编译,生成的文件仍然叫udp2raw_cross.


================================================
FILE: doc/finalspeed_step_by_step/11
================================================
11


================================================
FILE: doc/finalspeed_step_by_step.md
================================================
# udp2raw+finalspeed 加速tcp流量 Step by Step 教程
![image](finalspeed_step_by_step/Capture0.PNG)

##### 背景
国内有些ISP会对UDP做QOS或屏蔽，这时候加速协议对TCP发包模式的支持就很重要。finalspeed虽然本身支持在底层用TCP发包，但是其依赖的libpcap不支持openvz架构，即使不是openvz架构的主机，也存在不稳定的问题。


##### 摘要
udp2raw是一个把udp流量通过raw socket包装成tcp流量的工具。通过用udp2raw配合udp模式的 finalspeed一样可以达到在底层发tcp包，绕过QOS的效果。支持openvz,稳定性也好很多。原理上相当于在finalspeed外面再包了一层tunnel。

本教程会一步一步演示用udp2raw+finalspeed加速http流量的过程。加速任何其他tcp流量也一样，包括$\*\*\*。本文避免讨论科学上网，所以只演示加速http流量。

udp2raw也支持把udp流量包装成Icmp发送，本教程不做演示。

### 环境要求
服务器主机是linux，有root权限。  可以是openvz架构的vps。 也可以是openwrt路由器。

本地主机是windows,本地有openwrt路由器或树莓派或安装了linux虚拟机（网卡设置为桥接模式）。

(如果嫌给虚拟机安装linux麻烦，可以用release里发布的预装了udp2raw的openwrt_x86虚拟机镜像，容量4.4mb)

下面的教程按虚拟机演示，如果你有openwrt路由器或树莓派，可以直接运行再路由器或树莓派上，就不需要虚拟机了。

### 安装
下载好udp2raw的压缩包，解压分别解压到服务器和本地的虚拟机。

https://github.com/wangyu-/udp2raw-tunnel/releases

在服务器端安装好finalspeed服务端，在本地windows安装好finalspeed的客户端。服务端我以前是用91yun的一键安装脚本安装的，没装过的可以去网上搜一键安装脚本。

### 运行
1.先在服务器主机运行如下命令，确定finalspeed服务端已经正常启动了。

```
netstat -nlp|grep java
```
![image](finalspeed_step_by_step/Capture5.PNG)

如果显示了150端口，就表示服务端启动好了。

2.在服务器启动udp2raw server
```
 ./udp2raw_amd64 -s -l0.0.0.0:8855 -r 127.0.0.1:150  -a -k "passwd" --raw-mode faketcp
```
![image](finalspeed_step_by_step/Capture2.PNG)

3.在本地的虚拟机上启动udp2raw client  ,假设服务器ip是45.66.77.88
```
./udp2raw_amd64 -c -r45.66.77.88:8855 -l0.0.0.0:150 --raw-mode faketcp -a -k"passwd"
```
如果一切正常，client端会显示client_ready:

![image](finalspeed_step_by_step/Capture3.PNG)

记下红框中的ip,这是虚拟机的网卡ip

在server端也会显示server_ready
![image](finalspeed_step_by_step/Capture4.PNG)

4.在本地windows,按图配置好finalspeed的客户端。注意，192.168.205.8改成你刚才记下来的IP，带宽也要按实际的填。传输协议要选UDP.
![image](finalspeed_step_by_step/Capture.PNG)

5.所有准备工作已经完成了，在本地访问本地的8012端口，相当于访问服务器的80端口。

来试一下通过http://127.0.0.1:8012/ 下载文件 ，1.5M/s：
![image](finalspeed_step_by_step/Capture6.PNG)

再试一下直接通过服务器的ip访问，http://45.66.77.88:80/ ，速度只有600K/s
![image](finalspeed_step_by_step/Capture7.PNG)

教程就到这里了，用来加速其他的tcp服务也是一样的，只要再第三步那里设置其他的端口。


================================================
FILE: doc/kcptun_step_by_step.md
================================================
# udp2raw+kcptun 加速tcp流量 Step by Step 教程
![image](kcptun_step_by_step/Capture00.PNG)

本教程会一步一步演示用udp2raw+kcptun加速SSH流量的过程。加速任何其他tcp流量也一样，包括$\*\*\*；本文避免涉及科学上网，所以演示ssh。

### 环境要求
两边的主机都是linux，有root权限。 可以是openwrt路由器或树莓派，也可以是root了的android。

在windows/mac上运行udp2raw可以参考这个教程：

https://github.com/wangyu-/udp2raw-tunnel/wiki/在windows-mac上运行udp2raw客户端，带图形界面


### 安装
下载好kcptun和udp2raw的压缩包，解压分别解压到client端和server端。

https://github.com/xtaci/kcptun/releases
https://github.com/wangyu-/udp2raw-tunnel/releases

解压好后，如图：
![image](kcptun_step_by_step/Capture0.PNG)

### 运行
1.在远程服务器运行 udp2raw_amd64 server模式：
```
./udp2raw_amd64 -s -l0.0.0.0:8855 -r 127.0.0.1:4000 -k "passwd" --raw-mode faketcp -a
```
![image](kcptun_step_by_step/Capture.PNG)

2.在本地运行udp2raw_amd64 client模式，假设server ip是45.66.77.88：
```
./udp2raw_amd64 -c -r45.66.77.88:8855 -l0.0.0.0:4000 --raw-mode faketcp -a -k"passwd"
```
如果一切正常client端输出如下，显示client_ready：
![image](kcptun_step_by_step/Capture2.PNG)

server端也会有类似输出,显示server_ready：
![image](kcptun_step_by_step/Capture3.PNG)

3.在远程服务器运行 kcp server


```
./server_linux_amd64 -t "127.0.0.1:22" -l ":4000" -mode fast2 -mtu 1300
```
-mtu 1300很重要，或者设置成更小。
![image](kcptun_step_by_step/Capture6.PNG)

4.在本地运行 


```
 ./client_linux_amd64 -r "127.0.0.1:4000" -l ":3322" -mode fast2 -mtu 1300
```
-mtu 1300很重要，或者设置成更小。
![image](kcptun_step_by_step/Capture7.PNG)

5.所有准备工作已经做好，在本地运行
```
ssh -p 3322 root@127.0.0.1
```
已经连进去了，而且是经过kcptun加速的：
![image](kcptun_step_by_step/Capture8.PNG)


================================================
FILE: doc/openvpn_guide.md
================================================
# udp2raw+openvpn config guide
![image_vpn](/images/openvpn.PNG)

![image4](/images/image4.PNG)

# udp2raw command
#### run at server side
```
./udp2raw_amd64 -s -l0.0.0.0:8855 -r 127.0.0.1:7777 -k "passwd" --raw-mode faketcp -a
```
#### run at client side
assume server ip is 45.66.77.88
```
./udp2raw_amd64 -c -l0.0.0.0:3333 -r 45.66.77.88:8855 -k "passwd" --raw-mode faketcp -a
```

#### hint
You can add `--cipher-mode xor` `--auth-mode simple` to **both** sides to obtain maximum performance(but poor security).

# openvpn config

#### client side config
```
client
dev tun100
proto udp

remote 127.0.0.1 3333
resolv-retry infinite 
nobind 
persist-key 
persist-tun  

ca /root/add-on/openvpn/ca.crt
cert /root/add-on/openvpn/client.crt
key /root/add-on/openvpn/client.key

keepalive 3 20
verb 3
mute 20

comp-lzo no
cipher none      ##### disable openvpn 's cipher and auth for maxmized peformance. 
auth none        ##### you can enable openvpn's cipher and auth,if you dont care about peformance,or you dont trust udp2raw 's encryption

fragment 1200       ##### very important    you can turn it up a bit. but,the lower the safer
mssfix 1200         ##### very important

sndbuf 2000000      ##### important
rcvbuf 2000000      ##### important
txqueuelen 4000     ##### suggested
```


#### server side config
```
local 0.0.0.0
port 7777 
proto udp
dev tun 

ca /etc/openvpn/easy-rsa/2.0/keys/ca.crt
cert /etc/openvpn/easy-rsa/2.0/keys/server.crt
key /etc/openvpn/easy-rsa/2.0/keys/server.key
dh /etc/openvpn/easy-rsa/2.0/keys/dh1024.pem

server 10.222.2.0 255.255.255.0 
ifconfig 10.222.2.1 10.222.2.6

client-to-client
duplicate-cn 
keepalive 10 60 

max-clients 50

persist-key
persist-tun

status /etc/openvpn/openvpn-status.log

verb 3
mute 20  

comp-lzo no
cipher none      ##### disable openvpn 's cipher and auth for maxmized peformance. 
auth none        ##### you can enable openvpn's cipher and auth,if you dont care about peformance,or you dont trust udp2raw 's encryption

fragment 1200       ##### very important    you can turn it up a bit. but,the lower the safer
mssfix 1200         ##### very important

sndbuf 2000000      ##### important
rcvbuf 2000000      ##### important
txqueuelen 4000     ##### suggested
```


================================================
FILE: encrypt.cpp
================================================
#include "lib/aes-common.h"
#include "lib/md5.h"
#include "lib/pbkdf2-sha1.h"
#include "lib/pbkdf2-sha256.h"
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include "encrypt.h"
#include "common.h"
#include "log.h"

// static uint64_t seq=1;

static int8_t zero_iv[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};  // this prog use zero iv,you should make sure first block of data contains a random/nonce data
/****
 * security of zero_iv + nonce first data block
 * https://crypto.stackexchange.com/questions/5421/using-cbc-with-a-fixed-iv-and-a-random-first-plaintext-block
 ****/

char normal_key[16 + 100];    // generated from key_string by md5. reserved for compatiblity
const int hmac_key_len = 64;  // generate 512bit long keys, use first n chars when needed
const int cipher_key_len = 64;
unsigned char hmac_key_encrypt[hmac_key_len + 100];      // key for hmac
unsigned char hmac_key_decrypt[hmac_key_len + 100];      // key for hmac
unsigned char cipher_key_encrypt[cipher_key_len + 100];  // key for aes etc.
unsigned char cipher_key_decrypt[cipher_key_len + 100];  // key for aes etc.

char gro_xor[256 + 100];  // dirty fix for gro

unordered_map<int, const char *> auth_mode_tostring = {
    {auth_none, "none"},
    {auth_md5, "md5"},
    {auth_crc32, "crc32"},
    {auth_simple, "simple"},
    {auth_hmac_sha1, "hmac_sha1"},
};

unordered_map<int, const char *> cipher_mode_tostring = {
    {cipher_none, "none"},
    {cipher_aes128cfb, "aes128cfb"},
    {cipher_aes128cbc, "aes128cbc"},
    {cipher_xor, "xor"},
};
// TODO aes-gcm

auth_mode_t auth_mode = auth_md5;
cipher_mode_t cipher_mode = cipher_aes128cbc;
int is_hmac_used = 0;

int aes128cfb_old = 0;

// TODO key negotiation and forward secrecy

int my_init_keys(const char *user_passwd, int is_client) {
    char tmp[1000] = "";
    int len = strlen(user_passwd);

    strcat(tmp, user_passwd);

    strcat(tmp, "key1");

    md5((uint8_t *)tmp, strlen(tmp), (uint8_t *)normal_key);

    if (auth_mode == auth_hmac_sha1)
        is_hmac_used = 1;
    if (is_hmac_used || g_fix_gro || 1) {
        unsigned char salt[400] = "";
        char salt_text[400] = "udp2raw_salt1";
        md5((uint8_t *)(salt_text), strlen(salt_text), salt);  // TODO different salt per session

        unsigned char pbkdf2_output1[400] = "";
        PKCS5_PBKDF2_HMAC_SHA256((uint8_t *)user_passwd, len, salt, 16, 10000, 32, pbkdf2_output1);  // TODO argon2 ?

        // unsigned char pbkdf2_output2[400]="";
        // PKCS5_PBKDF2_HMAC_SHA256(pbkdf2_output1,32,0,0,1, hmac_key_len*2+cipher_key_len*2,pbkdf2_output2);  //stretch it

        const char *info_hmac_encrypt = "hmac_key server-->client";
        const char *info_hmac_decrypt = "hmac_key client-->server";
        const char *info_cipher_encrypt = "cipher_key server-->client";
        const char *info_cipher_decrypt = "cipher_key client-->server";

        if (is_client) {
            const char *tmp;
            tmp = info_hmac_encrypt;
            info_hmac_encrypt = info_hmac_decrypt;
            info_hmac_decrypt = tmp;
            tmp = info_cipher_encrypt;
            info_cipher_encrypt = info_cipher_decrypt;
            info_cipher_decrypt = tmp;
        } else {
            // nop
        }

        assert(hkdf_sha256_expand(pbkdf2_output1, 32, (unsigned char *)info_cipher_encrypt, strlen(info_cipher_encrypt), cipher_key_encrypt, cipher_key_len) == 0);
        assert(hkdf_sha256_expand(pbkdf2_output1, 32, (unsigned char *)info_cipher_decrypt, strlen(info_cipher_decrypt), cipher_key_decrypt, cipher_key_len) == 0);
        assert(hkdf_sha256_expand(pbkdf2_output1, 32, (unsigned char *)info_hmac_encrypt, strlen(info_hmac_encrypt), hmac_key_encrypt, hmac_key_len) == 0);
        assert(hkdf_sha256_expand(pbkdf2_output1, 32, (unsigned char *)info_hmac_decrypt, strlen(info_hmac_decrypt), hmac_key_decrypt, hmac_key_len) == 0);

        const char *gro_info = "gro";
        assert(hkdf_sha256_expand(pbkdf2_output1, 32, (unsigned char *)gro_info, strlen(gro_info), (unsigned char *)gro_xor, 256) == 0);
    }

    print_binary_chars(normal_key, 16);
    print_binary_chars((char *)hmac_key_encrypt, hmac_key_len);
    print_binary_chars((char *)hmac_key_decrypt, hmac_key_len);
    print_binary_chars((char *)cipher_key_encrypt, cipher_key_len);
    print_binary_chars((char *)cipher_key_decrypt, cipher_key_len);

    return 0;
}
/*
 *  this function comes from  http://www.hackersdelight.org/hdcodetxt/crc.c.txt
 */
unsigned int crc32h(unsigned char *message, int len) {
    int i, crc;
    unsigned int byte, c;
    const unsigned int g0 = 0xEDB88320, g1 = g0 >> 1,
                       g2 = g0 >> 2, g3 = g0 >> 3, g4 = g0 >> 4, g5 = g0 >> 5,
                       g6 = (g0 >> 6) ^ g0, g7 = ((g0 >> 6) ^ g0) >> 1;

    i = 0;
    crc = 0xFFFFFFFF;
    while (i != len) {  // Get next byte.
        byte = message[i];
        crc = crc ^ byte;
        c = ((crc << 31 >> 31) & g7) ^ ((crc << 30 >> 31) & g6) ^
            ((crc << 29 >> 31) & g5) ^ ((crc << 28 >> 31) & g4) ^
            ((crc << 27 >> 31) & g3) ^ ((crc << 26 >> 31) & g2) ^
            ((crc << 25 >> 31) & g1) ^ ((crc << 24 >> 31) & g0);
        crc = ((unsigned)crc >> 8) ^ c;
        i = i + 1;
    }
    return ~crc;
}

/*
 void sum(const unsigned  char *data,int len,unsigned char*  res) {
   memset(res,0,sizeof(int));
   for(int i=0,j=0;i<len;i++,j++)
   {
           if(j==4) j=0;
           res[j]+=data[i];
   }

   return ;
}*/

void simple_hash(unsigned char *str, int len, unsigned char res[8])  // djb2+ sdbm
{
    u32_t hash = 5381;
    u32_t hash2 = 0;
    int c;
    int i = 0;
    while (c = *str++, i++ != len) {
        // hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
        hash = ((hash << 5) + hash) ^ c; /* (hash * 33) ^ c */
        hash2 = c + (hash2 << 6) + (hash2 << 16) - hash2;
    }

    hash = htonl(hash);
    hash2 = htonl(hash2);
    memcpy(res, &hash, sizeof(hash));
    memcpy(res + sizeof(hash), &hash2, sizeof(hash2));
}

int auth_md5_cal(const char *data, char *output, int &len) {
    memcpy(output, data, len);  // TODO inefficient code
    md5((unsigned char *)output, len, (unsigned char *)(output + len));
    len += 16;
    return 0;
}

int auth_hmac_sha1_cal(const char *data, char *output, int &len) {
    mylog(log_trace, "auth_hmac_sha1_cal() is called\n");
    memcpy(output, data, len);  // TODO inefficient code
    sha1_hmac(hmac_key_encrypt, 20, (const unsigned char *)data, len, (unsigned char *)(output + len));
    // use key len of 20 instead of hmac_key_len, "extra length would not significantly increase the function strength" (rfc2104)
    len += 20;
    return 0;
}

int auth_hmac_sha1_verify(const char *data, int &len) {
    mylog(log_trace, "auth_hmac_sha1_verify() is called\n");
    if (len < 20) {
        mylog(log_trace, "auth_hmac_sha1_verify len<20\n");
        return -1;
    }
    char res[20];

    sha1_hmac(hmac_key_decrypt, 20, (const unsigned char *)data, len - 20, (unsigned char *)(res));

    if (memcmp(res, data + len - 20, 20) != 0) {
        mylog(log_trace, "auth_hmac_sha1 check failed\n");
        return -2;
    }
    len -= 20;
    return 0;
}

int auth_crc32_cal(const char *data, char *output, int &len) {
    memcpy(output, data, len);  // TODO inefficient code
    unsigned int ret = crc32h((unsigned char *)output, len);
    unsigned int ret_n = htonl(ret);
    memcpy(output + len, &ret_n, sizeof(unsigned int));
    len += sizeof(unsigned int);
    return 0;
}

int auth_simple_cal(const char *data, char *output, int &len) {
    // char res[4];
    memcpy(output, data, len);  // TODO inefficient code
    simple_hash((unsigned char *)output, len, (unsigned char *)(output + len));
    len += 8;
    return 0;
}
int auth_simple_verify(const char *data, int &len) {
    if (len < 8) return -1;
    unsigned char res[8];
    len -= 8;
    simple_hash((unsigned char *)data, len, res);
    if (memcmp(res, data + len, 8) != 0)
        return -1;
    return 0;
}

int auth_none_cal(const char *data, char *output, int &len) {
    memcpy(output, data, len);
    return 0;
}
int auth_md5_verify(const char *data, int &len) {
    if (len < 16) {
        mylog(log_trace, "auth_md5_verify len<16\n");
        return -1;
    }
    char md5_res[16];

    md5((unsigned char *)data, len - 16, (unsigned char *)md5_res);

    if (memcmp(md5_res, data + len - 16, 16) != 0) {
        mylog(log_trace, "auth_md5_verify md5 check failed\n");
        return -2;
    }
    len -= 16;
    return 0;
}
int auth_none_verify(const char *data, int &len) {
    return 0;
}

int cipher_xor_encrypt(const char *data, char *output, int &len, char *key) {
    int i, j;
    for (i = 0, j = 0; i < len; i++, j++) {
        if (j == 16) j = 0;
        output[i] = data[i] ^ key[j];
    }
    return 0;
}
int cipher_xor_decrypt(const char *data, char *output, int &len, char *key) {
    int i, j;
    // char tmp[buf_len];
    // len=len/16*16+1;
    // AES128_CBC_decrypt_buffer((uint8_t *)tmp, (uint8_t *)input, len, (uint8_t *)key, (uint8_t *)iv);
    // for(i=0;i<len;i++)
    // input[i]=tmp[i];
    for (i = 0, j = 0; i < len; i++, j++) {
        if (j == 16) j = 0;
        output[i] = data[i] ^ key[j];
    }
    return 0;
}

int padding(char *data, int &data_len, int padding_num) {
    int old_len = data_len;
    data_len += 1;
    if (data_len % padding_num != 0) {
        data_len = (data_len / padding_num) * padding_num + padding_num;
    }
    unsigned char *p = (unsigned char *)&data[data_len - 1];
    *p = (data_len - old_len);
    return 0;
}

int de_padding(const char *data, int &data_len, int padding_num) {
    if (data_len == 0) return -1;
    if ((uint8_t)data[data_len - 1] > padding_num) return -1;
    data_len -= (uint8_t)data[data_len - 1];
    if (data_len < 0) {
        return -1;
    }
    return 0;
}
void aes_ecb_encrypt(const char *data, char *output) {
    static int first_time = 1;
    char *key = (char *)cipher_key_encrypt;
    if (aes_key_optimize) {
        if (first_time == 0)
            key = 0;
        else
            first_time = 0;
    }
    AES_ECB_encrypt_buffer((uint8_t *)data, (uint8_t *)key, (uint8_t *)output);
}
void aes_ecb_encrypt1(char *data) {
    char buf[16];
    memcpy(buf, data, 16);
    aes_ecb_encrypt(buf, data);
}
void aes_ecb_decrypt(const char *data, char *output) {
    static int first_time = 1;
    char *key = (char *)cipher_key_decrypt;
    if (aes_key_optimize) {
        if (first_time == 0)
            key = 0;
        else
            first_time = 0;
    }
    AES_ECB_decrypt_buffer((uint8_t *)data, (uint8_t *)key, (uint8_t *)output);
}
void aes_ecb_decrypt1(char *data) {
    char buf[16];
    memcpy(buf, data, 16);
    aes_ecb_decrypt(buf, data);
}
int cipher_aes128cbc_encrypt(const char *data, char *output, int &len, char *key) {
    static int first_time = 1;

    char buf[buf_len];
    memcpy(buf, data, len);  // TODO inefficient code

    if (padding(buf, len, 16) < 0) return -1;

    if (aes_key_optimize) {
        if (first_time == 0)
            key = 0;
        else
            first_time = 0;
    }

    AES_CBC_encrypt_buffer((unsigned char *)output, (unsigned char *)buf, len, (unsigned char *)key, (unsigned char *)zero_iv);
    return 0;
}
int cipher_aes128cfb_encrypt(const char *data, char *output, int &len, char *key) {
    static int first_time = 1;
    assert(len >= 16);

    char buf[buf_len];
    memcpy(buf, data, len);  // TODO inefficient code
    if (aes_key_optimize) {
        if (first_time == 0)
            key = 0;
        else
            first_time = 0;
    }
    if (!aes128cfb_old) {
        aes_ecb_encrypt(data, buf);  // encrypt the first block
    }

    AES_CFB_encrypt_buffer((unsigned char *)output, (unsigned char *)buf, len, (unsigned char *)key, (unsigned char *)zero_iv);
    return 0;
}
int auth_crc32_verify(const char *data, int &len) {
    if (len < int(sizeof(unsigned int))) {
        mylog(log_debug, "auth_crc32_verify len<%d\n", int(sizeof(unsigned int)));
        return -1;
    }
    unsigned int ret = crc32h((unsigned char *)data, len - sizeof(unsigned int));
    unsigned int ret_n = htonl(ret);

    if (memcmp(data + len - sizeof(unsigned int), &ret_n, sizeof(unsigned int)) != 0) {
        mylog(log_debug, "auth_crc32_verify memcmp fail\n");
        return -1;
    }
    len -= sizeof(unsigned int);
    return 0;
}
int cipher_none_encrypt(const char *data, char *output, int &len, char *key) {
    memcpy(output, data, len);
    return 0;
}
int cipher_aes128cbc_decrypt(const char *data, char *output, int &len, char *key) {
    static int first_time = 1;
    if (len % 16 != 0) {
        mylog(log_debug, "len%%16!=0\n");
        return -1;
    }
    if (aes_key_optimize) {
        if (first_time == 0)
            key = 0;
        else
            first_time = 0;
    }
    AES_CBC_decrypt_buffer((unsigned char *)output, (unsigned char *)data, len, (unsigned char *)key, (unsigned char *)zero_iv);
    if (de_padding(output, len, 16) < 0) return -1;
    return 0;
}
int cipher_aes128cfb_decrypt(const char *data, char *output, int &len, char *key) {
    static int first_time = 1;
    if (len < 16) return -1;

    if (aes_key_optimize) {
        if (first_time == 0)
            key = 0;
        else
            first_time = 0;
    }

    AES_CFB_decrypt_buffer((unsigned char *)output, (unsigned char *)data, len, (unsigned char *)key, (unsigned char *)zero_iv);

    if (!aes128cfb_old)
        aes_ecb_decrypt1(output);  // decrypt the first block
    // if(de_padding(output,len,16)<0) return -1;
    return 0;
}

int cipher_none_decrypt(const char *data, char *output, int &len, char *key) {
    memcpy(output, data, len);
    return 0;
}

int auth_cal(const char *data, char *output, int &len) {
    mylog(log_trace, "auth:%d\n", auth_mode);
    switch (auth_mode) {
        case auth_crc32:
            return auth_crc32_cal(data, output, len);
        case auth_md5:
            return auth_md5_cal(data, output, len);
        case auth_simple:
            return auth_simple_cal(data, output, len);
        case auth_none:
            return auth_none_cal(data, output, len);
        case auth_hmac_sha1:
            return auth_hmac_sha1_cal(data, output, len);
        // default:	return auth_md5_cal(data,output,len);//default;
        default:
            assert(0 == 1);
    }
    return -1;
}
int auth_verify(const char *data, int &len) {
    mylog(log_trace, "auth:%d\n", auth_mode);
    switch (auth_mode) {
        case auth_crc32:
            return auth_crc32_verify(data, len);
        case auth_md5:
            return auth_md5_verify(data, len);
        case auth_simple:
            return auth_simple_verify(data, len);
        case auth_none:
            return auth_none_verify(data, len);
        case auth_hmac_sha1:
            return auth_hmac_sha1_verify(data, len);
        // default:	return auth_md5_verify(data,len);//default
        default:
            assert(0 == 1);
    }
    return -1;
}
int cipher_encrypt(const char *data, char *output, int &len, char *key) {
    mylog(log_trace, "cipher:%d\n", cipher_mode);
    switch (cipher_mode) {
        case cipher_aes128cbc:
            return cipher_aes128cbc_encrypt(data, output, len, key);
        case cipher_aes128cfb:
            return cipher_aes128cfb_encrypt(data, output, len, key);
        case cipher_xor:
            return cipher_xor_encrypt(data, output, len, key);
        case cipher_none:
            return cipher_none_encrypt(data, output, len, key);
        // default:return cipher_aes128cbc_encrypt(data,output,len, key);
        default:
            assert(0 == 1);
    }
    return -1;
}
int cipher_decrypt(const char *data, char *output, int &len, char *key) {
    mylog(log_trace, "cipher:%d\n", cipher_mode);
    switch (cipher_mode) {
        case cipher_aes128cbc:
            return cipher_aes128cbc_decrypt(data, output, len, key);
        case cipher_aes128cfb:
            return cipher_aes128cfb_decrypt(data, output, len, key);
        case cipher_xor:
            return cipher_xor_decrypt(data, output, len, key);
        case cipher_none:
            return cipher_none_decrypt(data, output, len, key);
        //	default:	return cipher_aes128cbc_decrypt(data,output,len,key);
        default:
            assert(0 == 1);
    }
    return -1;
}

int encrypt_AE(const char *data, char *output, int &len /*,char * key*/) {
    mylog(log_trace, "encrypt_AE is called\n");
    char buf[buf_len];
    char buf2[buf_len];
    memcpy(buf, data, len);
    if (cipher_encrypt(buf, buf2, len, (char *)cipher_key_encrypt) != 0) {
        mylog(log_debug, "cipher_encrypt failed ");
        return -1;
    }
    if (auth_cal(buf2, output, len) != 0) {
        mylog(log_debug, "auth_cal failed ");
        return -1;
    }

    // printf("%d %x %x\n",len,(int)(output[0]),(int)(output[1]));
    // print_binary_chars(output,len);

    // use encrypt-then-MAC scheme
    return 0;
}

int decrypt_AE(const char *data, char *output, int &len /*,char * key*/) {
    mylog(log_trace, "decrypt_AE is called\n");
    // printf("%d %x %x\n",len,(int)(data[0]),(int)(data[1]));
    // print_binary_chars(data,len);

    if (auth_verify(data, len) != 0) {
        mylog(log_debug, "auth_verify failed\n");
        return -1;
    }
    if (cipher_decrypt(data, output, len, (char *)cipher_key_decrypt) != 0) {
        mylog(log_debug, "cipher_decrypt failed \n");
        return -1;
    }
    return 0;
}

int my_encrypt(const char *data, char *output, int &len /*,char * key*/) {
    if (len < 0) {
        mylog(log_trace, "len<0");
        return -1;
    }
    if (len > max_data_len) {
        mylog(log_warn, "len>max_data_len");
        return -1;
    }

    if (is_hmac_used)
        return encrypt_AE(data, output, len);

    char buf[buf_len];
    char buf2[buf_len];
    memcpy(buf, data, len);
    if (auth_cal(buf, buf2, len) != 0) {
        mylog(log_debug, "auth_cal failed ");
        return -1;
    }
    if (cipher_encrypt(buf2, output, len, normal_key) != 0) {
        mylog(log_debug, "cipher_encrypt failed ");
        return -1;
    }
    return 0;
}

int my_decrypt(const char *data, char *output, int &len /*,char * key*/) {
    if (len < 0) return -1;
    if (len > max_data_len) {
        mylog(log_warn, "len>max_data_len");
        return -1;
    }

    if (is_hmac_used)
        return decrypt_AE(data, output, len);

    if (cipher_decrypt(data, output, len, normal_key) != 0) {
        mylog(log_debug, "cipher_decrypt failed \n");
        return -1;
    }
    if (auth_verify(output, len) != 0) {
        mylog(log_debug, "auth_verify failed\n");
        return -1;
    }

    return 0;
}

int encrypt_AEAD(uint8_t *data, uint8_t *output, int &len, uint8_t *key, uint8_t *header, int hlen) {
    // TODO
    return -1;
}

int decrypt_AEAD(uint8_t *data, uint8_t *output, int &len, uint8_t *key, uint8_t *header, int hlen) {
    // TODO
    return -1;
}


================================================
FILE: encrypt.h
================================================
#ifndef UDP2RAW_ENCRYPTION_H_
#define UDP2RAW_ENCRYPTION_H_

//#include "aes.h"
//#include "md5.h"
#include "common.h"

// using namespace std;
// extern char key[16];

const int aes_key_optimize = 1;  // if enabled,once you used a key for aes,you cant change it anymore
extern int aes128cfb_old;

int my_init_keys(const char *, int);

int my_encrypt(const char *data, char *output, int &len);
int my_decrypt(const char *data, char *output, int &len);

unsigned short csum(const unsigned short *ptr, int nbytes);

enum auth_mode_t { auth_none = 0,
                   auth_md5,
                   auth_crc32,
                   auth_simple,
                   auth_hmac_sha1,
                   auth_end };

enum cipher_mode_t { cipher_none = 0,
                     cipher_aes128cbc,
                     cipher_xor,
                     cipher_aes128cfb,
                     cipher_end };

extern auth_mode_t auth_mode;
extern cipher_mode_t cipher_mode;

extern unordered_map<int, const char *> auth_mode_tostring;
extern unordered_map<int, const char *> cipher_mode_tostring;

extern char gro_xor[256 + 100];

int cipher_decrypt(const char *data, char *output, int &len, char *key);  // internal interface ,exposed for test only
int cipher_encrypt(const char *data, char *output, int &len, char *key);  // internal interface ,exposed for test only

void aes_ecb_encrypt(const char *data, char *output);
void aes_ecb_decrypt(const char *data, char *output);

void aes_ecb_encrypt1(char *data);
void aes_ecb_decrypt1(char *data);

#endif


================================================
FILE: example.conf
================================================
# Basically this file is the equivalent to splitting the command line options into multiple lines
# Each line should contain an option

# This is client
-c
# Or use -s if you use it on server side
# Define local address
-l 127.0.0.1:56789
# Define remote address
-r 45.66.77.88:45678
# Password
-k my_awesome_password
# Mode
--raw-mode faketcp
# Log Level
--log-level 4


================================================
FILE: fd_manager.cpp
================================================
/*
 * fd_manager.cpp
 *
 *  Created on: Sep 25, 2017
 *      Author: root
 */

#include "fd_manager.h"
int fd_manager_t::fd_exist(int fd) {
    return fd_to_fd64_mp.find(fd) != fd_to_fd64_mp.end();
}
int fd_manager_t::exist(fd64_t fd64) {
    return fd64_to_fd_mp.find(fd64) != fd64_to_fd_mp.end();
}
int fd_manager_t::to_fd(fd64_t fd64) {
    assert(exist(fd64));
    return fd64_to_fd_mp[fd64];
}
void fd_manager_t::fd64_close(fd64_t fd64) {
    assert(exist(fd64));
    int fd = fd64_to_fd_mp[fd64];
    fd64_to_fd_mp.erase(fd64);
    fd_to_fd64_mp.erase(fd);
    if (exist_info(fd64)) {
        fd_info_mp.erase(fd64);
    }
    // assert(close(fd)==0);
    sock_close(fd);
}
void fd_manager_t::reserve(int n) {
    fd_to_fd64_mp.reserve(n);
    fd64_to_fd_mp.reserve(n);
    fd_info_mp.reserve(n);
}
u64_t fd_manager_t::create(int fd) {
    assert(!fd_exist(fd));
    fd64_t fd64 = counter++;
    fd_to_fd64_mp[fd] = fd64;
    fd64_to_fd_mp[fd64] = fd;
    return fd64;
}
fd_manager_t::fd_manager_t() {
    counter = u32_t(-1);
    counter += 100;
    reserve(10007);
}
fd_info_t& fd_manager_t::get_info(fd64_t fd64) {
    assert(exist(fd64));
    return fd_info_mp[fd64];
}
int fd_manager_t::exist_info(fd64_t fd64) {
    return fd_info_mp.find(fd64) != fd_info_mp.end();
}


================================================
FILE: fd_manager.h
================================================
/*
 * fd_manager.h
 *
 *  Created on: Sep 25, 2017
 *      Author: root
 */

#ifndef FD_MANAGER_H_
#define FD_MANAGER_H_

#include "common.h"
//#include "packet.h"
#include "connection.h"

struct fd_info_t {
    // ip_port_t ip_port;
    conn_info_t *p_conn_info;
};

struct fd_manager_t  // conver fd to a uniq 64bit number,avoid fd value conflict caused by close and re-create
// this class is not strictly necessary,it just makes epoll fd handling easier
{
    fd_info_t &get_info(fd64_t fd64);
    int exist_info(fd64_t);
    int exist(fd64_t fd64);
    int to_fd(fd64_t);
    void fd64_close(fd64_t fd64);
    void reserve(int n);
    u64_t create(int fd);
    fd_manager_t();

   private:
    u64_t counter;
    unordered_map<int, fd64_t> fd_to_fd64_mp;
    unordered_map<fd64_t, int> fd64_to_fd_mp;
    unordered_map<fd64_t, fd_info_t> fd_info_mp;
    int fd_exist(int fd);
    // void remove_fd(int fd);
    // fd64_t fd_to_fd64(int fd);
};

extern fd_manager_t fd_manager;
#endif /* FD_MANAGER_H_ */


================================================
FILE: images/speedtest/111
================================================


================================================
FILE: images/wiki/111
================================================


================================================
FILE: lib/aes-common.h
================================================
/*
 *  this file comes from https://github.com/kokke/tiny-AES128-C
 */

#pragma once

#include <stdint.h>


void AES_ECB_encrypt_buffer(const uint8_t* input, const uint8_t* key, uint8_t *output);
void AES_ECB_decrypt_buffer(const uint8_t* input, const uint8_t* key, uint8_t *output);

void AES_CBC_encrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv);
void AES_CBC_decrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv);


void AES_CFB_encrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv);
void AES_CFB_decrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv);


================================================
FILE: lib/aes_acc/aesacc.c
================================================
/*
 * This file is adapted from PolarSSL 1.3.19 (GPL)
 */

#include "aesni.h"
#include "aesarm.h"
#include <stdint.h>
#include <string.h>
#include <assert.h>

#if defined(AES256) && (AES256 == 1)
#define AES_KEYSIZE 256
#ifdef HAVE_AMD64
  #define aeshw_setkey_enc aesni_setkey_enc_256
#endif
#elif defined(AES192) && (AES192 == 1)
#define AES_KEYSIZE 192
#ifdef HAVE_AMD64
  #define aeshw_setkey_enc aesni_setkey_enc_192
#endif
#else
#define AES_KEYSIZE 128
#ifdef HAVE_AMD64
  #define aeshw_setkey_enc aesni_setkey_enc_128
#endif
#endif

#define AES_NR ((AES_KEYSIZE >> 5) + 6)
#define AES_RKSIZE      272

#ifdef HAVE_AMD64
#define HAVE_HARDAES 1
#define aeshw_supported aesni_supported
#define aeshw_crypt_ecb aesni_crypt_ecb
#define aeshw_inverse_key(a,b) aesni_inverse_key(a,b,AES_NR)
#endif /* HAVE_AMD64 */

#ifdef HAVE_ARM64
#define HAVE_HARDAES 1
#define aeshw_supported aesarm_supported
#define aeshw_crypt_ecb aesarm_crypt_ecb

#include "aesarm_table.h"

#ifndef GET_UINT32_LE
#define GET_UINT32_LE(n,b,i)                            \
{                                                       \
    (n) = ( (uint32_t) (b)[(i)    ]       )             \
        | ( (uint32_t) (b)[(i) + 1] <<  8 )             \
        | ( (uint32_t) (b)[(i) + 2] << 16 )             \
        | ( (uint32_t) (b)[(i) + 3] << 24 );            \
}
#endif

static void aeshw_setkey_enc(uint8_t *rk, const uint8_t *key)
{
    unsigned int i;
    uint32_t *RK;

    RK = (uint32_t *) rk;

    for( i = 0; i < ( AES_KEYSIZE >> 5 ); i++ )
    {
        GET_UINT32_LE( RK[i], key, i << 2 );
    }

    switch( AES_NR )
    {
        case 10:

            for( i = 0; i < 10; i++, RK += 4 )
            {
                RK[4]  = RK[0] ^ RCON[i] ^
                ( (uint32_t) FSb[ ( RK[3] >>  8 ) & 0xFF ]       ) ^
                ( (uint32_t) FSb[ ( RK[3] >> 16 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) FSb[ ( RK[3] >> 24 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) FSb[ ( RK[3]       ) & 0xFF ] << 24 );

                RK[5]  = RK[1] ^ RK[4];
                RK[6]  = RK[2] ^ RK[5];
                RK[7]  = RK[3] ^ RK[6];
            }
            break;

        case 12:

            for( i = 0; i < 8; i++, RK += 6 )
            {
                RK[6]  = RK[0] ^ RCON[i] ^
                ( (uint32_t) FSb[ ( RK[5] >>  8 ) & 0xFF ]       ) ^
                ( (uint32_t) FSb[ ( RK[5] >> 16 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) FSb[ ( RK[5] >> 24 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) FSb[ ( RK[5]       ) & 0xFF ] << 24 );

                RK[7]  = RK[1] ^ RK[6];
                RK[8]  = RK[2] ^ RK[7];
                RK[9]  = RK[3] ^ RK[8];
                RK[10] = RK[4] ^ RK[9];
                RK[11] = RK[5] ^ RK[10];
            }
            break;

        case 14:

            for( i = 0; i < 7; i++, RK += 8 )
            {
                RK[8]  = RK[0] ^ RCON[i] ^
                ( (uint32_t) FSb[ ( RK[7] >>  8 ) & 0xFF ]       ) ^
                ( (uint32_t) FSb[ ( RK[7] >> 16 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) FSb[ ( RK[7] >> 24 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) FSb[ ( RK[7]       ) & 0xFF ] << 24 );

                RK[9]  = RK[1] ^ RK[8];
                RK[10] = RK[2] ^ RK[9];
                RK[11] = RK[3] ^ RK[10];

                RK[12] = RK[4] ^
                ( (uint32_t) FSb[ ( RK[11]       ) & 0xFF ]       ) ^
                ( (uint32_t) FSb[ ( RK[11] >>  8 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) FSb[ ( RK[11] >> 16 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) FSb[ ( RK[11] >> 24 ) & 0xFF ] << 24 );

                RK[13] = RK[5] ^ RK[12];
                RK[14] = RK[6] ^ RK[13];
                RK[15] = RK[7] ^ RK[14];
            }
            break;
    }
}

static void aeshw_inverse_key(uint8_t *invkey, const uint8_t *fwdkey)
{
  int i, j;
  uint32_t *RK;
  uint32_t *SK;

  RK = (uint32_t *) invkey;
  SK = ((uint32_t *) fwdkey) + AES_NR * 4;

  *RK++ = *SK++;
  *RK++ = *SK++;
  *RK++ = *SK++;
  *RK++ = *SK++;

  for( i = AES_NR - 1, SK -= 8; i > 0; i--, SK -= 8 )
  {
      for( j = 0; j < 4; j++, SK++ )
      {
          *RK++ = RT0[ FSb[ ( *SK       ) & 0xFF ] ] ^
                  RT1[ FSb[ ( *SK >>  8 ) & 0xFF ] ] ^
                  RT2[ FSb[ ( *SK >> 16 ) & 0xFF ] ] ^
                  RT3[ FSb[ ( *SK >> 24 ) & 0xFF ] ];
      }
  }

  *RK++ = *SK++;
  *RK++ = *SK++;
  *RK++ = *SK++;
  *RK++ = *SK++;
}
#endif /* HAVE_ARM64 */

#ifdef HAVE_HARDAES
static void aeshw_setkey_dec(uint8_t *rk, const uint8_t *key)
{
  uint8_t rk_tmp[AES_RKSIZE];
  aeshw_setkey_enc(rk_tmp, key);
  aeshw_inverse_key(rk, rk_tmp);
}

static void aeshw_encrypt_ecb( int nr,
                               unsigned char *rk,
                               const unsigned char input[16],
                               unsigned char output[16] )
{
  aeshw_crypt_ecb(nr, rk, AES_ENCRYPT, input, output);
}

static void aeshw_decrypt_ecb( int nr,
                               unsigned char *rk,
                               const unsigned char input[16],
                               unsigned char output[16] )
{
  aeshw_crypt_ecb(nr, rk, AES_DECRYPT, input, output);
}
#endif /* HAVE_HARDAES */

/* OpenSSL assembly functions */
#define AES_MAXNR 14
typedef struct {
  uint32_t rd_key[4 * (AES_MAXNR + 1)];
  uint32_t rounds;
} AES_KEY;

#if defined(__amd64__) || defined(__x86_64__) || \
    defined(__aarch64__)
#define AES_set_encrypt_key vpaes_set_encrypt_key
#define AES_set_decrypt_key vpaes_set_decrypt_key
#define AES_encrypt vpaes_encrypt
#define AES_decrypt vpaes_decrypt
#endif /* VPAES for 64-bit Intel and ARM */

#ifdef __cplusplus
extern "C" {
#endif

int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
                        AES_KEY *key);
int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
                        AES_KEY *key);

void AES_encrypt(const unsigned char *in, unsigned char *out,
                 const AES_KEY *key);
void AES_decrypt(const unsigned char *in, unsigned char *out,
                 const AES_KEY *key);

#ifdef __cplusplus
}
#endif

static void aes_encrypt_ecb( int nr,
                             unsigned char *rk,
                             const unsigned char input[16],
                             unsigned char output[16] )
{
  AES_encrypt(input, output, (AES_KEY *) rk);
}

static void aes_decrypt_ecb( int nr,
                             unsigned char *rk,
                             const unsigned char input[16],
                             unsigned char output[16] )
{
  AES_decrypt(input, output, (AES_KEY *) rk);
}

static void aes_setkey_enc(uint8_t *rk, const uint8_t *key)
{
  AES_set_encrypt_key(key, AES_KEYSIZE, (AES_KEY *) rk);
}

static void aes_setkey_dec(uint8_t *rk, const uint8_t *key)
{
  AES_set_decrypt_key(key, AES_KEYSIZE, (AES_KEY *) rk);
}

static void (*encrypt_ecb) ( int nr,
                             unsigned char *rk,
                             const unsigned char input[16],
                             unsigned char output[16] )
  = aes_encrypt_ecb;

static void (*decrypt_ecb) ( int nr,
                             unsigned char *rk,
                             const unsigned char input[16],
                             unsigned char output[16] )
  = aes_decrypt_ecb;

static void (*setkey_enc) (uint8_t *rk, const uint8_t *key)
  = aes_setkey_enc;

static void (*setkey_dec) (uint8_t *rk, const uint8_t *key)
  = aes_setkey_dec;

/*
 * AESNI-CBC buffer encryption/decryption
 */
static void encrypt_cbc( uint8_t* rk,
                         uint32_t length,
                         uint8_t iv[16],
                         const uint8_t *input,
                         uint8_t *output )
{
    int i;
    uint8_t temp[16];

    while( length > 0 )
    {
        for( i = 0; i < 16; i++ )
            output[i] = (uint8_t)( input[i] ^ iv[i] );

        encrypt_ecb( AES_NR, rk, output, output );
        memcpy( iv, output, 16 );

        input  += 16;
        output += 16;
        length -= 16;
    }
}

static void decrypt_cbc( uint8_t* rk,
                         uint32_t length,
                         uint8_t iv[16],
                         const uint8_t *input,
                         uint8_t *output )
{
    int i;
    uint8_t temp[16];

    while( length > 0 )
    {
        memcpy( temp, input, 16 );
        decrypt_ecb( AES_NR, rk, input, output );

        for( i = 0; i < 16; i++ )
            output[i] = (uint8_t)( output[i] ^ iv[i] );

        memcpy( iv, temp, 16 );

        input  += 16;
        output += 16;
        length -= 16;
    }
}

static void aeshw_init(void)
{
#ifdef HAVE_HARDAES
  static int done = 0;
  if (!done) {
    if (aeshw_supported()) {
      encrypt_ecb = aeshw_encrypt_ecb;
      decrypt_ecb = aeshw_decrypt_ecb;
      setkey_enc = aeshw_setkey_enc;
      setkey_dec = aeshw_setkey_dec;
    }
    done = 1;
  }
#endif
}

int AES_support_hwaccel(void)
{
#ifdef HAVE_HARDAES
  return aeshw_supported();
#else
  return 0;
#endif
}

void AES_CBC_encrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv)
{
  uint8_t iv_tmp[16];
  static uint8_t rk[AES_RKSIZE];

  assert(iv!=NULL);
  aeshw_init();
  memcpy(iv_tmp, iv, 16);
  if(key!= NULL)
	  setkey_enc(rk, key);
  encrypt_cbc(rk, length, iv_tmp, input, output);
}

void AES_CBC_decrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv)
{
  uint8_t iv_tmp[16];
  static uint8_t rk[AES_RKSIZE];

  assert(iv!=NULL);
  aeshw_init();
  memcpy(iv_tmp, iv, 16);
  if(key!= NULL)
  {
	  setkey_dec(rk, key);
  }
  decrypt_cbc(rk, length, iv_tmp, input, output);
}

void AES_ECB_encrypt_buffer(const uint8_t* input, const uint8_t* key, uint8_t* output)
{
  static uint8_t rk[AES_RKSIZE];

  aeshw_init();
  if(key!=NULL)
    setkey_enc(rk, key);
  encrypt_ecb(AES_NR, rk, input, output);
}

void AES_ECB_decrypt_buffer(const uint8_t* input, const uint8_t* key, uint8_t *output)
{
  static uint8_t rk[AES_RKSIZE];

  aeshw_init();
  if(key!=NULL)
    setkey_dec(rk, key);
  decrypt_ecb(AES_NR, rk, input, output);
}

static void encrypt_cfb( uint8_t* rk,
                         uint32_t length,size_t *iv_off,
                         uint8_t iv[16],
                         const uint8_t *input,
                         uint8_t *output )
{
    int c;
    size_t n = *iv_off;
    while( length-- )
    {
        if( n == 0 )
        	encrypt_ecb( AES_NR, rk, iv, iv );

        iv[n] = *output++ = (unsigned char)( iv[n] ^ *input++ );

        n = ( n + 1 ) & 0x0F;
    }

    *iv_off = n;
}

static void decrypt_cfb( uint8_t* rk,
                         uint32_t length,size_t *iv_off,
                         uint8_t iv[16],
                         const uint8_t *input,
                         uint8_t *output )
{
    int c;
    size_t n = *iv_off;
    while( length-- )
    {
        if( n == 0 )
        	encrypt_ecb( AES_NR, rk, iv, iv );

        c = *input++;
        *output++ = (unsigned char)( c ^ iv[n] );
        iv[n] = (unsigned char) c;

        n = ( n + 1 ) & 0x0F;
    }

    *iv_off = n;
}

void AES_CFB_encrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv)
{
  uint8_t iv_tmp[16];
  static uint8_t rk[AES_RKSIZE];

  assert(iv!=NULL);
  aeshw_init();
  memcpy(iv_tmp, iv, 16);
  if(key!= NULL)
	  setkey_enc(rk, key);
  size_t offset=0;
  encrypt_cfb(rk, length,&offset, iv_tmp, input, output);
}

void AES_CFB_decrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv)
{
  uint8_t iv_tmp[16];
  static uint8_t rk[AES_RKSIZE];

  assert(iv!=NULL);
  aeshw_init();
  memcpy(iv_tmp, iv, 16);
  if(key!= NULL)
  {
	  setkey_enc(rk, key);//its enc again,not typo
  }
  size_t offset=0;
  decrypt_cfb(rk, length,&offset, iv_tmp, input, output);
}


================================================
FILE: lib/aes_acc/aesarm.c
================================================
/*
 * This file is adapted from https://github.com/CriticalBlue/mbedtls
 */

/*
 *  ARMv8-A Cryptography Extension AES support functions
 *
 *  Copyright (C) 2016, CriticalBlue Limited, All Rights Reserved
 *  SPDX-License-Identifier: Apache-2.0
 *
 *  Licensed under the Apache License, Version 2.0 (the "License"); you may
 *  not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 *  This file is part of mbed TLS (https://tls.mbed.org)
 */

#include "aesarm.h"

#if defined(HAVE_ARM64)

#include <sys/auxv.h>
#include <asm/hwcap.h>
#include <arm_neon.h>

/*
 * ARMv8a Crypto Extension support detection routine
 */
int aesarm_supported( void )
{
    static int done = 0;
    static unsigned int c = 0;

    if ( ! done )
    {
        c = getauxval(AT_HWCAP);
        done = 1;
    }

    return ( c & HWCAP_AES ) != 0;
}

/*
 * ARMv8a AES-ECB block en(de)cryption
 */
void aesarm_crypt_ecb( int nr,
                       unsigned char *rk,
                       int mode,
                       const unsigned char input[16],
                       unsigned char output[16] )
{
    int i;
    uint8x16_t state_vec, roundkey_vec;
    uint8_t *RK = (uint8_t *) rk;

    // Load input and round key into into their vectors
    state_vec = vld1q_u8( input );

    if ( mode == AES_ENCRYPT )
    {
        // Initial AddRoundKey is in the loop due to AES instruction always doing AddRoundKey first
        for( i = 0; i < nr - 1; i++ )
        {
            // Load Round Key
            roundkey_vec = vld1q_u8( RK );
            // Forward (AESE) round (AddRoundKey, SubBytes and ShiftRows)
            state_vec = vaeseq_u8( state_vec, roundkey_vec );
            // Mix Columns (AESMC)
            state_vec = vaesmcq_u8( state_vec );
            // Move pointer ready to load next round key
            RK += 16;
        }

        // Final Forward (AESE) round (AddRoundKey, SubBytes and ShiftRows). No Mix columns
        roundkey_vec = vld1q_u8( RK ); /* RK already moved in loop */
        state_vec = vaeseq_u8( state_vec, roundkey_vec );
    }
    else
    {
        // Initial AddRoundKey is in the loop due to AES instruction always doing AddRoundKey first
        for( i = 0; i < nr - 1; i++ )
        {
            // Load Round Key
            roundkey_vec = vld1q_u8( RK );
            // Reverse (AESD) round (AddRoundKey, SubBytes and ShiftRows)
            state_vec = vaesdq_u8( state_vec, roundkey_vec );
            // Inverse Mix Columns (AESIMC)
            state_vec = vaesimcq_u8( state_vec );
            // Move pointer ready to load next round key
            RK += 16;
        }

        // Final Reverse (AESD) round (AddRoundKey, SubBytes and ShiftRows). No Mix columns
        roundkey_vec = vld1q_u8( RK ); /* RK already moved in loop */
        state_vec = vaesdq_u8( state_vec, roundkey_vec );
    }

    // Manually apply final Add RoundKey step (EOR)
    RK += 16;
    roundkey_vec = vld1q_u8( RK );
    state_vec = veorq_u8( state_vec, roundkey_vec );

    // Write results back to output array
    vst1q_u8( output, state_vec );
}

#endif /* HAVE_ARM64 */


================================================
FILE: lib/aes_acc/aesarm.h
================================================
/*
 * This file is adapted from https://github.com/CriticalBlue/mbedtls
 */

/**
 * \file aes_armv8a_ce.h
 *
 * \brief AES support functions using the ARMv8-A Cryptography Extension for
 * hardware acceleration on some ARM processors.
 *
 *  Copyright (C) 2016, CriticalBlue Limited, All Rights Reserved
 *  SPDX-License-Identifier: Apache-2.0
 *
 *  Licensed under the Apache License, Version 2.0 (the "License"); you may
 *  not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 *  This file is part of mbed TLS (https://tls.mbed.org)
 */

#ifndef UDP2RAW_AESARM_H_
#define UDP2RAW_AESARM_H_

#ifndef AES_ENCRYPT
#define AES_ENCRYPT     1
#endif

#ifndef AES_DECRYPT
#define AES_DECRYPT     0
#endif

#if defined(__GNUC__) && \
    __ARM_ARCH >= 8 && \
    __ARM_ARCH_PROFILE == 'A' && \
    defined(__aarch64__) &&  \
    defined(__ARM_FEATURE_CRYPTO) && \
    defined(__linux__) && \
    !defined(NO_AESACC)
#define HAVE_ARM64
#endif

#if defined(HAVE_ARM64)

#ifdef __cplusplus
extern "C" {
#endif

/**
 * \brief          ARMv8-A features detection routine
 *
 * \return         1 if the CPU has support for the feature, 0 otherwise
 */
int aesarm_supported( void );

/**
 * \brief          AES ARMv8-A Cryptography Extension AES-ECB block en(de)cryption
 *
 * \param nr       number of rounds
 * \param rk       AES round keys
 * \param mode     AESARM_ENCRYPT or AESARM_DECRYPT
 * \param input    16-byte input block
 * \param output   16-byte output block
 */
void aesarm_crypt_ecb( int nr,
                       unsigned char *rk,
                       int mode,
                       const unsigned char input[16],
                       unsigned char output[16] );

#ifdef __cplusplus
}
#endif 

#endif /* HAVE_ARM64 */

#endif /* _AESARM_H_ */


================================================
FILE: lib/aes_acc/aesarm_table.h
================================================
/*
 * This file is adapted from PolarSSL 1.3.19 (GPL)
 */

/*
 * Forward S-box
 */
static const unsigned char FSb[256] =
{
    0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
    0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
    0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
    0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
    0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
    0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
    0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
    0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
    0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
    0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
    0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
    0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
    0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
    0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
    0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
    0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
    0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
    0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
    0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
    0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
    0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
    0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
    0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
    0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
    0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
    0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
    0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
    0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
    0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
    0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
    0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
    0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16
};

/*
 * Round constants
 */
static const uint32_t RCON[10] =
{
    0x00000001, 0x00000002, 0x00000004, 0x00000008,
    0x00000010, 0x00000020, 0x00000040, 0x00000080,
    0x0000001B, 0x00000036
};

/*
 * Reverse tables
 */
#define RT \
\
    V(50,A7,F4,51), V(53,65,41,7E), V(C3,A4,17,1A), V(96,5E,27,3A), \
    V(CB,6B,AB,3B), V(F1,45,9D,1F), V(AB,58,FA,AC), V(93,03,E3,4B), \
    V(55,FA,30,20), V(F6,6D,76,AD), V(91,76,CC,88), V(25,4C,02,F5), \
    V(FC,D7,E5,4F), V(D7,CB,2A,C5), V(80,44,35,26), V(8F,A3,62,B5), \
    V(49,5A,B1,DE), V(67,1B,BA,25), V(98,0E,EA,45), V(E1,C0,FE,5D), \
    V(02,75,2F,C3), V(12,F0,4C,81), V(A3,97,46,8D), V(C6,F9,D3,6B), \
    V(E7,5F,8F,03), V(95,9C,92,15), V(EB,7A,6D,BF), V(DA,59,52,95), \
    V(2D,83,BE,D4), V(D3,21,74,58), V(29,69,E0,49), V(44,C8,C9,8E), \
    V(6A,89,C2,75), V(78,79,8E,F4), V(6B,3E,58,99), V(DD,71,B9,27), \
    V(B6,4F,E1,BE), V(17,AD,88,F0), V(66,AC,20,C9), V(B4,3A,CE,7D), \
    V(18,4A,DF,63), V(82,31,1A,E5), V(60,33,51,97), V(45,7F,53,62), \
    V(E0,77,64,B1), V(84,AE,6B,BB), V(1C,A0,81,FE), V(94,2B,08,F9), \
    V(58,68,48,70), V(19,FD,45,8F), V(87,6C,DE,94), V(B7,F8,7B,52), \
    V(23,D3,73,AB), V(E2,02,4B,72), V(57,8F,1F,E3), V(2A,AB,55,66), \
    V(07,28,EB,B2), V(03,C2,B5,2F), V(9A,7B,C5,86), V(A5,08,37,D3), \
    V(F2,87,28,30), V(B2,A5,BF,23), V(BA,6A,03,02), V(5C,82,16,ED), \
    V(2B,1C,CF,8A), V(92,B4,79,A7), V(F0,F2,07,F3), V(A1,E2,69,4E), \
    V(CD,F4,DA,65), V(D5,BE,05,06), V(1F,62,34,D1), V(8A,FE,A6,C4), \
    V(9D,53,2E,34), V(A0,55,F3,A2), V(32,E1,8A,05), V(75,EB,F6,A4), \
    V(39,EC,83,0B), V(AA,EF,60,40), V(06,9F,71,5E), V(51,10,6E,BD), \
    V(F9,8A,21,3E), V(3D,06,DD,96), V(AE,05,3E,DD), V(46,BD,E6,4D), \
    V(B5,8D,54,91), V(05,5D,C4,71), V(6F,D4,06,04), V(FF,15,50,60), \
    V(24,FB,98,19), V(97,E9,BD,D6), V(CC,43,40,89), V(77,9E,D9,67), \
    V(BD,42,E8,B0), V(88,8B,89,07), V(38,5B,19,E7), V(DB,EE,C8,79), \
    V(47,0A,7C,A1), V(E9,0F,42,7C), V(C9,1E,84,F8), V(00,00,00,00), \
    V(83,86,80,09), V(48,ED,2B,32), V(AC,70,11,1E), V(4E,72,5A,6C), \
    V(FB,FF,0E,FD), V(56,38,85,0F), V(1E,D5,AE,3D), V(27,39,2D,36), \
    V(64,D9,0F,0A), V(21,A6,5C,68), V(D1,54,5B,9B), V(3A,2E,36,24), \
    V(B1,67,0A,0C), V(0F,E7,57,93), V(D2,96,EE,B4), V(9E,91,9B,1B), \
    V(4F,C5,C0,80), V(A2,20,DC,61), V(69,4B,77,5A), V(16,1A,12,1C), \
    V(0A,BA,93,E2), V(E5,2A,A0,C0), V(43,E0,22,3C), V(1D,17,1B,12), \
    V(0B,0D,09,0E), V(AD,C7,8B,F2), V(B9,A8,B6,2D), V(C8,A9,1E,14), \
    V(85,19,F1,57), V(4C,07,75,AF), V(BB,DD,99,EE), V(FD,60,7F,A3), \
    V(9F,26,01,F7), V(BC,F5,72,5C), V(C5,3B,66,44), V(34,7E,FB,5B), \
    V(76,29,43,8B), V(DC,C6,23,CB), V(68,FC,ED,B6), V(63,F1,E4,B8), \
    V(CA,DC,31,D7), V(10,85,63,42), V(40,22,97,13), V(20,11,C6,84), \
    V(7D,24,4A,85), V(F8,3D,BB,D2), V(11,32,F9,AE), V(6D,A1,29,C7), \
    V(4B,2F,9E,1D), V(F3,30,B2,DC), V(EC,52,86,0D), V(D0,E3,C1,77), \
    V(6C,16,B3,2B), V(99,B9,70,A9), V(FA,48,94,11), V(22,64,E9,47), \
    V(C4,8C,FC,A8), V(1A,3F,F0,A0), V(D8,2C,7D,56), V(EF,90,33,22), \
    V(C7,4E,49,87), V(C1,D1,38,D9), V(FE,A2,CA,8C), V(36,0B,D4,98), \
    V(CF,81,F5,A6), V(28,DE,7A,A5), V(26,8E,B7,DA), V(A4,BF,AD,3F), \
    V(E4,9D,3A,2C), V(0D,92,78,50), V(9B,CC,5F,6A), V(62,46,7E,54), \
    V(C2,13,8D,F6), V(E8,B8,D8,90), V(5E,F7,39,2E), V(F5,AF,C3,82), \
    V(BE,80,5D,9F), V(7C,93,D0,69), V(A9,2D,D5,6F), V(B3,12,25,CF), \
    V(3B,99,AC,C8), V(A7,7D,18,10), V(6E,63,9C,E8), V(7B,BB,3B,DB), \
    V(09,78,26,CD), V(F4,18,59,6E), V(01,B7,9A,EC), V(A8,9A,4F,83), \
    V(65,6E,95,E6), V(7E,E6,FF,AA), V(08,CF,BC,21), V(E6,E8,15,EF), \
    V(D9,9B,E7,BA), V(CE,36,6F,4A), V(D4,09,9F,EA), V(D6,7C,B0,29), \
    V(AF,B2,A4,31), V(31,23,3F,2A), V(30,94,A5,C6), V(C0,66,A2,35), \
    V(37,BC,4E,74), V(A6,CA,82,FC), V(B0,D0,90,E0), V(15,D8,A7,33), \
    V(4A,98,04,F1), V(F7,DA,EC,41), V(0E,50,CD,7F), V(2F,F6,91,17), \
    V(8D,D6,4D,76), V(4D,B0,EF,43), V(54,4D,AA,CC), V(DF,04,96,E4), \
    V(E3,B5,D1,9E), V(1B,88,6A,4C), V(B8,1F,2C,C1), V(7F,51,65,46), \
    V(04,EA,5E,9D), V(5D,35,8C,01), V(73,74,87,FA), V(2E,41,0B,FB), \
    V(5A,1D,67,B3), V(52,D2,DB,92), V(33,56,10,E9), V(13,47,D6,6D), \
    V(8C,61,D7,9A), V(7A,0C,A1,37), V(8E,14,F8,59), V(89,3C,13,EB), \
    V(EE,27,A9,CE), V(35,C9,61,B7), V(ED,E5,1C,E1), V(3C,B1,47,7A), \
    V(59,DF,D2,9C), V(3F,73,F2,55), V(79,CE,14,18), V(BF,37,C7,73), \
    V(EA,CD,F7,53), V(5B,AA,FD,5F), V(14,6F,3D,DF), V(86,DB,44,78), \
    V(81,F3,AF,CA), V(3E,C4,68,B9), V(2C,34,24,38), V(5F,40,A3,C2), \
    V(72,C3,1D,16), V(0C,25,E2,BC), V(8B,49,3C,28), V(41,95,0D,FF), \
    V(71,01,A8,39), V(DE,B3,0C,08), V(9C,E4,B4,D8), V(90,C1,56,64), \
    V(61,84,CB,7B), V(70,B6,32,D5), V(74,5C,6C,48), V(42,57,B8,D0)

#define V(a,b,c,d) 0x##a##b##c##d
static const uint32_t RT0[256] = { RT };
#undef V

#define V(a,b,c,d) 0x##b##c##d##a
static const uint32_t RT1[256] = { RT };
#undef V

#define V(a,b,c,d) 0x##c##d##a##b
static const uint32_t RT2[256] = { RT };
#undef V

#define V(a,b,c,d) 0x##d##a##b##c
static const uint32_t RT3[256] = { RT };
#undef V

#undef RT


================================================
FILE: lib/aes_acc/aesni.c
================================================
/*
 * This file is adapted from PolarSSL 1.3.19 (GPL)
 */

/*
 *  AES-NI support functions
 *
 *  Copyright (C) 2006-2014, ARM Limited, All Rights Reserved
 *
 *  This file is part of mbed TLS (https://tls.mbed.org)
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

/*
 * [AES-WP] http://software.intel.com/en-us/articles/intel-advanced-encryption-standard-aes-instructions-set
 * [CLMUL-WP] http://software.intel.com/en-us/articles/intel-carry-less-multiplication-instruction-and-its-usage-for-computing-the-gcm-mode/
 */

#include <string.h>
#include "aesni.h"

#if defined(HAVE_AMD64)

/*
 * AES-NI support detection routine
 */
#define AESNI_AES 0x02000000u

int aesni_supported( void )
{
    static int done = 0;
    static unsigned int c = 0;

    if( ! done )
    {
        asm( "movl  $1, %%eax   \n\t"
             "cpuid             \n\t"
             : "=c" (c)
             :
             : "eax", "ebx", "edx" );
        done = 1;
    }

    return( ( c & AESNI_AES ) != 0 );
}

/*
 * Binutils needs to be at least 2.19 to support AES-NI instructions.
 * Unfortunately, a lot of users have a lower version now (2014-04).
 * Emit bytecode directly in order to support "old" version of gas.
 *
 * Opcodes from the Intel architecture reference manual, vol. 3.
 * We always use registers, so we don't need prefixes for memory operands.
 * Operand macros are in gas order (src, dst) as opposed to Intel order
 * (dst, src) in order to blend better into the surrounding assembly code.
 */
#define AESDEC      ".byte 0x66,0x0F,0x38,0xDE,"
#define AESDECLAST  ".byte 0x66,0x0F,0x38,0xDF,"
#define AESENC      ".byte 0x66,0x0F,0x38,0xDC,"
#define AESENCLAST  ".byte 0x66,0x0F,0x38,0xDD,"
#define AESIMC      ".byte 0x66,0x0F,0x38,0xDB,"
#define AESKEYGENA  ".byte 0x66,0x0F,0x3A,0xDF,"
#define PCLMULQDQ   ".byte 0x66,0x0F,0x3A,0x44,"

#define xmm0_xmm0   "0xC0"
#define xmm0_xmm1   "0xC8"
#define xmm0_xmm2   "0xD0"
#define xmm0_xmm3   "0xD8"
#define xmm0_xmm4   "0xE0"
#define xmm1_xmm0   "0xC1"
#define xmm1_xmm2   "0xD1"

/*
 * AES-NI AES-ECB block en(de)cryption
 */
void aesni_crypt_ecb( int nr,
                      unsigned char *rk,
                      int mode,
                      const unsigned char input[16],
                      unsigned char output[16] )
{
    asm( "movdqu    (%3), %%xmm0    \n\t" // load input
         "movdqu    (%1), %%xmm1    \n\t" // load round key 0
         "pxor      %%xmm1, %%xmm0  \n\t" // round 0
         "addq      $16, %1         \n\t" // point to next round key
         "subl      $1, %0          \n\t" // normal rounds = nr - 1
         "test      %2, %2          \n\t" // mode?
         "jz        2f              \n\t" // 0 = decrypt

         "1:                        \n\t" // encryption loop
         "movdqu    (%1), %%xmm1    \n\t" // load round key
         AESENC     xmm1_xmm0      "\n\t" // do round
         "addq      $16, %1         \n\t" // point to next round key
         "subl      $1, %0          \n\t" // loop
         "jnz       1b              \n\t"
         "movdqu    (%1), %%xmm1    \n\t" // load round key
         AESENCLAST xmm1_xmm0      "\n\t" // last round
         "jmp       3f              \n\t"

         "2:                        \n\t" // decryption loop
         "movdqu    (%1), %%xmm1    \n\t"
         AESDEC     xmm1_xmm0      "\n\t" // do round
         "addq      $16, %1         \n\t"
         "subl      $1, %0          \n\t"
         "jnz       2b              \n\t"
         "movdqu    (%1), %%xmm1    \n\t" // load round key
         AESDECLAST xmm1_xmm0      "\n\t" // last round

         "3:                        \n\t"
         "movdqu    %%xmm0, (%4)    \n\t" // export output
         :
         : "r" (nr), "r" (rk), "r" (mode), "r" (input), "r" (output)
         : "memory", "cc", "xmm0", "xmm1" );
}

/*
 * Compute decryption round keys from encryption round keys
 */
void aesni_inverse_key( unsigned char *invkey,
                        const unsigned char *fwdkey, int nr )
{
    unsigned char *ik = invkey;
    const unsigned char *fk = fwdkey + 16 * nr;

    memcpy( ik, fk, 16 );

    for( fk -= 16, ik += 16; fk > fwdkey; fk -= 16, ik += 16 )
        asm( "movdqu (%0), %%xmm0       \n\t"
             AESIMC  xmm0_xmm0         "\n\t"
             "movdqu %%xmm0, (%1)       \n\t"
             :
             : "r" (fk), "r" (ik)
             : "memory", "xmm0" );

    memcpy( ik, fk, 16 );
}

/*
 * Key expansion, 128-bit case
 */
void aesni_setkey_enc_128( unsigned char *rk,
                           const unsigned char *key )
{
    asm( "movdqu (%1), %%xmm0               \n\t" // copy the original key
         "movdqu %%xmm0, (%0)               \n\t" // as round key 0
         "jmp 2f                            \n\t" // skip auxiliary routine

         /*
          * Finish generating the next round key.
          *
          * On entry xmm0 is r3:r2:r1:r0 and xmm1 is X:stuff:stuff:stuff
          * with X = rot( sub( r3 ) ) ^ RCON.
          *
          * On exit, xmm0 is r7:r6:r5:r4
          * with r4 = X + r0, r5 = r4 + r1, r6 = r5 + r2, r7 = r6 + r3
          * and those are written to the round key buffer.
          */
         "1:                                \n\t"
         "pshufd $0xff, %%xmm1, %%xmm1      \n\t" // X:X:X:X
         "pxor %%xmm0, %%xmm1               \n\t" // X+r3:X+r2:X+r1:r4
         "pslldq $4, %%xmm0                 \n\t" // r2:r1:r0:0
         "pxor %%xmm0, %%xmm1               \n\t" // X+r3+r2:X+r2+r1:r5:r4
         "pslldq $4, %%xmm0                 \n\t" // etc
         "pxor %%xmm0, %%xmm1               \n\t"
         "pslldq $4, %%xmm0                 \n\t"
         "pxor %%xmm1, %%xmm0               \n\t" // update xmm0 for next time!
         "add $16, %0                       \n\t" // point to next round key
         "movdqu %%xmm0, (%0)               \n\t" // write it
         "ret                               \n\t"

         /* Main "loop" */
         "2:                                \n\t"
         AESKEYGENA xmm0_xmm1 ",0x01        \n\tcall 1b \n\t"
         AESKEYGENA xmm0_xmm1 ",0x02        \n\tcall 1b \n\t"
         AESKEYGENA xmm0_xmm1 ",0x04        \n\tcall 1b \n\t"
         AESKEYGENA xmm0_xmm1 ",0x08        \n\tcall 1b \n\t"
         AESKEYGENA xmm0_xmm1 ",0x10        \n\tcall 1b \n\t"
         AESKEYGENA xmm0_xmm1 ",0x20        \n\tcall 1b \n\t"
         AESKEYGENA xmm0_xmm1 ",0x40        \n\tcall 1b \n\t"
         AESKEYGENA xmm0_xmm1 ",0x80        \n\tcall 1b \n\t"
         AESKEYGENA xmm0_xmm1 ",0x1B        \n\tcall 1b \n\t"
         AESKEYGENA xmm0_xmm1 ",0x36        \n\tcall 1b \n\t"
         :
         : "r" (rk), "r" (key)
         : "memory", "cc", "0" );
}

/*
 * Key expansion, 192-bit case
 */
void aesni_setkey_enc_192( unsigned char *rk,
                           const unsigned char *key )
{
    asm( "movdqu (%1), %%xmm0   \n\t" // copy original round key
         "movdqu %%xmm0, (%0)   \n\t"
         "add $16, %0           \n\t"
         "movq 16(%1), %%xmm1   \n\t"
         "movq %%xmm1, (%0)     \n\t"
         "add $8, %0            \n\t"
         "jmp 2f                \n\t" // skip auxiliary routine

         /*
          * Finish generating the next 6 quarter-keys.
          *
          * On entry xmm0 is r3:r2:r1:r0, xmm1 is stuff:stuff:r5:r4
          * and xmm2 is stuff:stuff:X:stuff with X = rot( sub( r3 ) ) ^ RCON.
          *
          * On exit, xmm0 is r9:r8:r7:r6 and xmm1 is stuff:stuff:r11:r10
          * and those are written to the round key buffer.
          */
         "1:                            \n\t"
         "pshufd $0x55, %%xmm2, %%xmm2  \n\t" // X:X:X:X
         "pxor %%xmm0, %%xmm2           \n\t" // X+r3:X+r2:X+r1:r4
         "pslldq $4, %%xmm0             \n\t" // etc
         "pxor %%xmm0, %%xmm2           \n\t"
         "pslldq $4, %%xmm0             \n\t"
         "pxor %%xmm0, %%xmm2           \n\t"
         "pslldq $4, %%xmm0             \n\t"
         "pxor %%xmm2, %%xmm0           \n\t" // update xmm0 = r9:r8:r7:r6
         "movdqu %%xmm0, (%0)           \n\t"
         "add $16, %0                   \n\t"
         "pshufd $0xff, %%xmm0, %%xmm2  \n\t" // r9:r9:r9:r9
         "pxor %%xmm1, %%xmm2           \n\t" // stuff:stuff:r9+r5:r10
         "pslldq $4, %%xmm1             \n\t" // r2:r1:r0:0
         "pxor %%xmm2, %%xmm1           \n\t" // xmm1 = stuff:stuff:r11:r10
         "movq %%xmm1, (%0)             \n\t"
         "add $8, %0                    \n\t"
         "ret                           \n\t"

         "2:                            \n\t"
         AESKEYGENA xmm1_xmm2 ",0x01    \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x02    \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x04    \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x08    \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x10    \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x20    \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x40    \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x80    \n\tcall 1b \n\t"

         :
         : "r" (rk), "r" (key)
         : "memory", "cc", "0" );
}

/*
 * Key expansion, 256-bit case
 */
void aesni_setkey_enc_256( unsigned char *rk,
                           const unsigned char *key )
{
    asm( "movdqu (%1), %%xmm0           \n\t"
         "movdqu %%xmm0, (%0)           \n\t"
         "add $16, %0                   \n\t"
         "movdqu 16(%1), %%xmm1         \n\t"
         "movdqu %%xmm1, (%0)           \n\t"
         "jmp 2f                        \n\t" // skip auxiliary routine

         /*
          * Finish generating the next two round keys.
          *
          * On entry xmm0 is r3:r2:r1:r0, xmm1 is r7:r6:r5:r4 and
          * xmm2 is X:stuff:stuff:stuff with X = rot( sub( r7 )) ^ RCON
          *
          * On exit, xmm0 is r11:r10:r9:r8 and xmm1 is r15:r14:r13:r12
          * and those have been written to the output buffer.
          */
         "1:                                \n\t"
         "pshufd $0xff, %%xmm2, %%xmm2      \n\t"
         "pxor %%xmm0, %%xmm2               \n\t"
         "pslldq $4, %%xmm0                 \n\t"
         "pxor %%xmm0, %%xmm2               \n\t"
         "pslldq $4, %%xmm0                 \n\t"
         "pxor %%xmm0, %%xmm2               \n\t"
         "pslldq $4, %%xmm0                 \n\t"
         "pxor %%xmm2, %%xmm0               \n\t"
         "add $16, %0                       \n\t"
         "movdqu %%xmm0, (%0)               \n\t"

         /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 )
          * and proceed to generate next round key from there */
         AESKEYGENA xmm0_xmm2 ",0x00        \n\t"
         "pshufd $0xaa, %%xmm2, %%xmm2      \n\t"
         "pxor %%xmm1, %%xmm2               \n\t"
         "pslldq $4, %%xmm1                 \n\t"
         "pxor %%xmm1, %%xmm2               \n\t"
         "pslldq $4, %%xmm1                 \n\t"
         "pxor %%xmm1, %%xmm2               \n\t"
         "pslldq $4, %%xmm1                 \n\t"
         "pxor %%xmm2, %%xmm1               \n\t"
         "add $16, %0                       \n\t"
         "movdqu %%xmm1, (%0)               \n\t"
         "ret                               \n\t"

         /*
          * Main "loop" - Generating one more key than necessary,
          * see definition of aes_context.buf
          */
         "2:                                \n\t"
         AESKEYGENA xmm1_xmm2 ",0x01        \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x02        \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x04        \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x08        \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x10        \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x20        \n\tcall 1b \n\t"
         AESKEYGENA xmm1_xmm2 ",0x40        \n\tcall 1b \n\t"
         :
         : "r" (rk), "r" (key)
         : "memory", "cc", "0" );
}

#endif /* HAVE_AMD64 */


================================================
FILE: lib/aes_acc/aesni.h
================================================
/*
 * This file is adapted from PolarSSL 1.3.19 (GPL)
 */

/**
 * \file aesni.h
 *
 * \brief AES-NI for hardware AES acceleration on some Intel processors
 *
 *  Copyright (C) 2013, ARM Limited, All Rights Reserved
 *
 *  This file is part of mbed TLS (https://tls.mbed.org)
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#ifndef UDP2RAW_AESNI_H_
#define UDP2RAW_AESNI_H_

#ifndef AES_ENCRYPT
#define AES_ENCRYPT     1
#endif

#ifndef AES_DECRYPT
#define AES_DECRYPT     0
#endif

#if defined(__GNUC__) &&  \
    ( defined(__amd64__) || defined(__x86_64__) ) && \
    !defined(NO_AESACC)
#define HAVE_AMD64
#endif

#if defined(HAVE_AMD64)

#ifdef __cplusplus
extern "C" {
#endif

/**
 * \brief          AES-NI features detection routine
 *
 * \return         1 if CPU has support for AES-NI, 0 otherwise
 */
int aesni_supported( void );

/**
 * \brief          AES-NI AES-ECB block en(de)cryption
 *
 * \param nr       number of rounds
 * \param rk       AES round keys
 * \param mode     AES_ENCRYPT or AES_DECRYPT
 * \param input    16-byte input block
 * \param output   16-byte output block
 */
void aesni_crypt_ecb( int nr,
                      unsigned char *rk,
                      int mode,
                      const unsigned char input[16],
                      unsigned char output[16] );

/**
 * \brief           Compute decryption round keys from encryption round keys
 *
 * \param invkey    Round keys for the equivalent inverse cipher
 * \param fwdkey    Original round keys (for encryption)
 * \param nr        Number of rounds (that is, number of round keys minus one)
 */
void aesni_inverse_key( unsigned char *invkey,
                        const unsigned char *fwdkey, int nr );

/**
 * \brief           Perform 128-bit key expansion (for encryption)
 *
 * \param rk        Destination buffer where the round keys are written
 * \param key       Encryption key
 */
void aesni_setkey_enc_128( unsigned char *rk,
                           const unsigned char *key );

/**
 * \brief           Perform 192-bit key expansion (for encryption)
 *
 * \param rk        Destination buffer where the round keys are written
 * \param key       Encryption key
 */
void aesni_setkey_enc_192( unsigned char *rk,
                           const unsigned char *key );

/**
 * \brief           Perform 256-bit key expansion (for encryption)
 *
 * \param rk        Destination buffer where the round keys are written
 * \param key       Encryption key
 */
void aesni_setkey_enc_256( unsigned char *rk,
                           const unsigned char *key );

#ifdef __cplusplus
}
#endif 

#endif /* HAVE_AMD64 */

#endif /* _AESNI_H_ */


================================================
FILE: lib/aes_acc/asm/arm.S
================================================
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License").  You may not use
@ this file except in compliance with the License.  You can obtain a copy
@ in the file LICENSE in the source distribution or at
@ https://www.openssl.org/source/license.html


@ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
@ CRYPTOGAMS licenses depending on where you obtain it. For further
@ details see http://www.openssl.org/~appro/cryptogams/.
@ ====================================================================

@ AES for ARMv4

@ January 2007.
@
@ Code uses single 1K S-box and is >2 times faster than code generated
@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
@ allows to merge logical or arithmetic operation with shift or rotate
@ in one instruction and emit combined result every cycle. The module
@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit
@ key [on single-issue Xscale PXA250 core].

@ May 2007.
@
@ AES_set_[en|de]crypt_key is added.

@ July 2010.
@
@ Rescheduling for dual-issue pipeline resulted in 12% improvement on
@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key.

@ February 2011.
@
@ Profiler-assisted and platform-specific optimization resulted in 16%
@ improvement on Cortex A8 core and ~21.5 cycles per byte.

#ifndef __KERNEL__
# include "arm_arch.h"
#else
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
#endif

.text
#if defined(__thumb2__) && !defined(__APPLE__)
.syntax	unified
.thumb
#else
.code	32
#undef __thumb2__
#endif


.align	5
AES_Te:
.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
@ Te4[256]
.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
@ rcon[]
.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0


@ void AES_encrypt(const unsigned char *in, unsigned char *out,
@ 		 const AES_KEY *key) {
.globl	AES_encrypt

.align	5
AES_encrypt:
#ifndef	__thumb2__
	sub	r3,pc,#8		@ AES_encrypt
#else
	adr	r3,AES_encrypt
#endif
	stmdb	sp!,{r1,r4-r12,lr}
#ifdef	__APPLE__
	adr	r10,AES_Te
#else
	sub	r10,r3,#AES_encrypt-AES_Te	@ Te
#endif
	mov	r12,r0		@ inp
	mov	r11,r2
#if __ARM_ARCH__<7
	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
	ldrb	r4,[r12,#2]	@ manner...
	ldrb	r5,[r12,#1]
	ldrb	r6,[r12,#0]
	orr	r0,r0,r4,lsl#8
	ldrb	r1,[r12,#7]
	orr	r0,r0,r5,lsl#16
	ldrb	r4,[r12,#6]
	orr	r0,r0,r6,lsl#24
	ldrb	r5,[r12,#5]
	ldrb	r6,[r12,#4]
	orr	r1,r1,r4,lsl#8
	ldrb	r2,[r12,#11]
	orr	r1,r1,r5,lsl#16
	ldrb	r4,[r12,#10]
	orr	r1,r1,r6,lsl#24
	ldrb	r5,[r12,#9]
	ldrb	r6,[r12,#8]
	orr	r2,r2,r4,lsl#8
	ldrb	r3,[r12,#15]
	orr	r2,r2,r5,lsl#16
	ldrb	r4,[r12,#14]
	orr	r2,r2,r6,lsl#24
	ldrb	r5,[r12,#13]
	ldrb	r6,[r12,#12]
	orr	r3,r3,r4,lsl#8
	orr	r3,r3,r5,lsl#16
	orr	r3,r3,r6,lsl#24
#else
	ldr	r0,[r12,#0]
	ldr	r1,[r12,#4]
	ldr	r2,[r12,#8]
	ldr	r3,[r12,#12]
#ifdef __ARMEL__
	rev	r0,r0
	rev	r1,r1
	rev	r2,r2
	rev	r3,r3
#endif
#endif
	bl	_armv4_AES_encrypt

	ldr	r12,[sp],#4		@ pop out
#if __ARM_ARCH__>=7
#ifdef __ARMEL__
	rev	r0,r0
	rev	r1,r1
	rev	r2,r2
	rev	r3,r3
#endif
	str	r0,[r12,#0]
	str	r1,[r12,#4]
	str	r2,[r12,#8]
	str	r3,[r12,#12]
#else
	mov	r4,r0,lsr#24		@ write output in endian-neutral
	mov	r5,r0,lsr#16		@ manner...
	mov	r6,r0,lsr#8
	strb	r4,[r12,#0]
	strb	r5,[r12,#1]
	mov	r4,r1,lsr#24
	strb	r6,[r12,#2]
	mov	r5,r1,lsr#16
	strb	r0,[r12,#3]
	mov	r6,r1,lsr#8
	strb	r4,[r12,#4]
	strb	r5,[r12,#5]
	mov	r4,r2,lsr#24
	strb	r6,[r12,#6]
	mov	r5,r2,lsr#16
	strb	r1,[r12,#7]
	mov	r6,r2,lsr#8
	strb	r4,[r12,#8]
	strb	r5,[r12,#9]
	mov	r4,r3,lsr#24
	strb	r6,[r12,#10]
	mov	r5,r3,lsr#16
	strb	r2,[r12,#11]
	mov	r6,r3,lsr#8
	strb	r4,[r12,#12]
	strb	r5,[r12,#13]
	strb	r6,[r12,#14]
	strb	r3,[r12,#15]
#endif
#if __ARM_ARCH__>=5
	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
#else
	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
	tst	lr,#1
	moveq	pc,lr			@ be binary compatible with V4, yet
.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
#endif


.align	2
_armv4_AES_encrypt:
	str	lr,[sp,#-4]!		@ push lr
	ldmia	r11!,{r4,r5,r6,r7}
	eor	r0,r0,r4
	ldr	r12,[r11,#240-16]
	eor	r1,r1,r5
	eor	r2,r2,r6
	eor	r3,r3,r7
	sub	r12,r12,#1
	mov	lr,#255

	and	r7,lr,r0
	and	r8,lr,r0,lsr#8
	and	r9,lr,r0,lsr#16
	mov	r0,r0,lsr#24
Lenc_loop:
	ldr	r4,[r10,r7,lsl#2]	@ Te3[s0>>0]
	and	r7,lr,r1,lsr#16	@ i0
	ldr	r5,[r10,r8,lsl#2]	@ Te2[s0>>8]
	and	r8,lr,r1
	ldr	r6,[r10,r9,lsl#2]	@ Te1[s0>>16]
	and	r9,lr,r1,lsr#8
	ldr	r0,[r10,r0,lsl#2]	@ Te0[s0>>24]
	mov	r1,r1,lsr#24

	ldr	r7,[r10,r7,lsl#2]	@ Te1[s1>>16]
	ldr	r8,[r10,r8,lsl#2]	@ Te3[s1>>0]
	ldr	r9,[r10,r9,lsl#2]	@ Te2[s1>>8]
	eor	r0,r0,r7,ror#8
	ldr	r1,[r10,r1,lsl#2]	@ Te0[s1>>24]
	and	r7,lr,r2,lsr#8	@ i0
	eor	r5,r5,r8,ror#8
	and	r8,lr,r2,lsr#16	@ i1
	eor	r6,r6,r9,ror#8
	and	r9,lr,r2
	ldr	r7,[r10,r7,lsl#2]	@ Te2[s2>>8]
	eor	r1,r1,r4,ror#24
	ldr	r8,[r10,r8,lsl#2]	@ Te1[s2>>16]
	mov	r2,r2,lsr#24

	ldr	r9,[r10,r9,lsl#2]	@ Te3[s2>>0]
	eor	r0,r0,r7,ror#16
	ldr	r2,[r10,r2,lsl#2]	@ Te0[s2>>24]
	and	r7,lr,r3		@ i0
	eor	r1,r1,r8,ror#8
	and	r8,lr,r3,lsr#8	@ i1
	eor	r6,r6,r9,ror#16
	and	r9,lr,r3,lsr#16	@ i2
	ldr	r7,[r10,r7,lsl#2]	@ Te3[s3>>0]
	eor	r2,r2,r5,ror#16
	ldr	r8,[r10,r8,lsl#2]	@ Te2[s3>>8]
	mov	r3,r3,lsr#24

	ldr	r9,[r10,r9,lsl#2]	@ Te1[s3>>16]
	eor	r0,r0,r7,ror#24
	ldr	r7,[r11],#16
	eor	r1,r1,r8,ror#16
	ldr	r3,[r10,r3,lsl#2]	@ Te0[s3>>24]
	eor	r2,r2,r9,ror#8
	ldr	r4,[r11,#-12]
	eor	r3,r3,r6,ror#8

	ldr	r5,[r11,#-8]
	eor	r0,r0,r7
	ldr	r6,[r11,#-4]
	and	r7,lr,r0
	eor	r1,r1,r4
	and	r8,lr,r0,lsr#8
	eor	r2,r2,r5
	and	r9,lr,r0,lsr#16
	eor	r3,r3,r6
	mov	r0,r0,lsr#24

	subs	r12,r12,#1
	bne	Lenc_loop

	add	r10,r10,#2

	ldrb	r4,[r10,r7,lsl#2]	@ Te4[s0>>0]
	and	r7,lr,r1,lsr#16	@ i0
	ldrb	r5,[r10,r8,lsl#2]	@ Te4[s0>>8]
	and	r8,lr,r1
	ldrb	r6,[r10,r9,lsl#2]	@ Te4[s0>>16]
	and	r9,lr,r1,lsr#8
	ldrb	r0,[r10,r0,lsl#2]	@ Te4[s0>>24]
	mov	r1,r1,lsr#24

	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s1>>16]
	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s1>>0]
	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s1>>8]
	eor	r0,r7,r0,lsl#8
	ldrb	r1,[r10,r1,lsl#2]	@ Te4[s1>>24]
	and	r7,lr,r2,lsr#8	@ i0
	eor	r5,r8,r5,lsl#8
	and	r8,lr,r2,lsr#16	@ i1
	eor	r6,r9,r6,lsl#8
	and	r9,lr,r2
	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s2>>8]
	eor	r1,r4,r1,lsl#24
	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s2>>16]
	mov	r2,r2,lsr#24

	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s2>>0]
	eor	r0,r7,r0,lsl#8
	ldrb	r2,[r10,r2,lsl#2]	@ Te4[s2>>24]
	and	r7,lr,r3		@ i0
	eor	r1,r1,r8,lsl#16
	and	r8,lr,r3,lsr#8	@ i1
	eor	r6,r9,r6,lsl#8
	and	r9,lr,r3,lsr#16	@ i2
	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s3>>0]
	eor	r2,r5,r2,lsl#24
	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s3>>8]
	mov	r3,r3,lsr#24

	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s3>>16]
	eor	r0,r7,r0,lsl#8
	ldr	r7,[r11,#0]
	ldrb	r3,[r10,r3,lsl#2]	@ Te4[s3>>24]
	eor	r1,r1,r8,lsl#8
	ldr	r4,[r11,#4]
	eor	r2,r2,r9,lsl#16
	ldr	r5,[r11,#8]
	eor	r3,r6,r3,lsl#24
	ldr	r6,[r11,#12]

	eor	r0,r0,r7
	eor	r1,r1,r4
	eor	r2,r2,r5
	eor	r3,r3,r6

	sub	r10,r10,#2
	ldr	pc,[sp],#4		@ pop and return


.globl	AES_set_encrypt_key

.align	5
AES_set_encrypt_key:
_armv4_AES_set_encrypt_key:
#ifndef	__thumb2__
	sub	r3,pc,#8		@ AES_set_encrypt_key
#else
	adr	r3,AES_set_encrypt_key
#endif
	teq	r0,#0
#ifdef	__thumb2__
	itt	eq			@ Thumb2 thing, sanity check in ARM
#endif
	moveq	r0,#-1
	beq	Labrt
	teq	r2,#0
#ifdef	__thumb2__
	itt	eq			@ Thumb2 thing, sanity check in ARM
#endif
	moveq	r0,#-1
	beq	Labrt

	teq	r1,#128
	beq	Lok
	teq	r1,#192
	beq	Lok
	teq	r1,#256
#ifdef	__thumb2__
	itt	ne			@ Thumb2 thing, sanity check in ARM
#endif
	movne	r0,#-1
	bne	Labrt

Lok:	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
	mov	r12,r0		@ inp
	mov	lr,r1			@ bits
	mov	r11,r2			@ key

#ifdef	__APPLE__
	adr	r10,AES_Te+1024				@ Te4
#else
	sub	r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
#endif

#if __ARM_ARCH__<7
	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
	ldrb	r4,[r12,#2]	@ manner...
	ldrb	r5,[r12,#1]
	ldrb	r6,[r12,#0]
	orr	r0,r0,r4,lsl#8
	ldrb	r1,[r12,#7]
	orr	r0,r0,r5,lsl#16
	ldrb	r4,[r12,#6]
	orr	r0,r0,r6,lsl#24
	ldrb	r5,[r12,#5]
	ldrb	r6,[r12,#4]
	orr	r1,r1,r4,lsl#8
	ldrb	r2,[r12,#11]
	orr	r1,r1,r5,lsl#16
	ldrb	r4,[r12,#10]
	orr	r1,r1,r6,lsl#24
	ldrb	r5,[r12,#9]
	ldrb	r6,[r12,#8]
	orr	r2,r2,r4,lsl#8
	ldrb	r3,[r12,#15]
	orr	r2,r2,r5,lsl#16
	ldrb	r4,[r12,#14]
	orr	r2,r2,r6,lsl#24
	ldrb	r5,[r12,#13]
	ldrb	r6,[r12,#12]
	orr	r3,r3,r4,lsl#8
	str	r0,[r11],#16
	orr	r3,r3,r5,lsl#16
	str	r1,[r11,#-12]
	orr	r3,r3,r6,lsl#24
	str	r2,[r11,#-8]
	str	r3,[r11,#-4]
#else
	ldr	r0,[r12,#0]
	ldr	r1,[r12,#4]
	ldr	r2,[r12,#8]
	ldr	r3,[r12,#12]
#ifdef __ARMEL__
	rev	r0,r0
	rev	r1,r1
	rev	r2,r2
	rev	r3,r3
#endif
	str	r0,[r11],#16
	str	r1,[r11,#-12]
	str	r2,[r11,#-8]
	str	r3,[r11,#-4]
#endif

	teq	lr,#128
	bne	Lnot128
	mov	r12,#10
	str	r12,[r11,#240-16]
	add	r6,r10,#256			@ rcon
	mov	lr,#255

L128_loop:
	and	r5,lr,r3,lsr#24
	and	r7,lr,r3,lsr#16
	ldrb	r5,[r10,r5]
	and	r8,lr,r3,lsr#8
	ldrb	r7,[r10,r7]
	and	r9,lr,r3
	ldrb	r8,[r10,r8]
	orr	r5,r5,r7,lsl#24
	ldrb	r9,[r10,r9]
	orr	r5,r5,r8,lsl#16
	ldr	r4,[r6],#4			@ rcon[i++]
	orr	r5,r5,r9,lsl#8
	eor	r5,r5,r4
	eor	r0,r0,r5			@ rk[4]=rk[0]^...
	eor	r1,r1,r0			@ rk[5]=rk[1]^rk[4]
	str	r0,[r11],#16
	eor	r2,r2,r1			@ rk[6]=rk[2]^rk[5]
	str	r1,[r11,#-12]
	eor	r3,r3,r2			@ rk[7]=rk[3]^rk[6]
	str	r2,[r11,#-8]
	subs	r12,r12,#1
	str	r3,[r11,#-4]
	bne	L128_loop
	sub	r2,r11,#176
	b	Ldone

Lnot128:
#if __ARM_ARCH__<7
	ldrb	r8,[r12,#19]
	ldrb	r4,[r12,#18]
	ldrb	r5,[r12,#17]
	ldrb	r6,[r12,#16]
	orr	r8,r8,r4,lsl#8
	ldrb	r9,[r12,#23]
	orr	r8,r8,r5,lsl#16
	ldrb	r4,[r12,#22]
	orr	r8,r8,r6,lsl#24
	ldrb	r5,[r12,#21]
	ldrb	r6,[r12,#20]
	orr	r9,r9,r4,lsl#8
	orr	r9,r9,r5,lsl#16
	str	r8,[r11],#8
	orr	r9,r9,r6,lsl#24
	str	r9,[r11,#-4]
#else
	ldr	r8,[r12,#16]
	ldr	r9,[r12,#20]
#ifdef __ARMEL__
	rev	r8,r8
	rev	r9,r9
#endif
	str	r8,[r11],#8
	str	r9,[r11,#-4]
#endif

	teq	lr,#192
	bne	Lnot192
	mov	r12,#12
	str	r12,[r11,#240-24]
	add	r6,r10,#256			@ rcon
	mov	lr,#255
	mov	r12,#8

L192_loop:
	and	r5,lr,r9,lsr#24
	and	r7,lr,r9,lsr#16
	ldrb	r5,[r10,r5]
	and	r8,lr,r9,lsr#8
	ldrb	r7,[r10,r7]
	and	r9,lr,r9
	ldrb	r8,[r10,r8]
	orr	r5,r5,r7,lsl#24
	ldrb	r9,[r10,r9]
	orr	r5,r5,r8,lsl#16
	ldr	r4,[r6],#4			@ rcon[i++]
	orr	r5,r5,r9,lsl#8
	eor	r9,r5,r4
	eor	r0,r0,r9			@ rk[6]=rk[0]^...
	eor	r1,r1,r0			@ rk[7]=rk[1]^rk[6]
	str	r0,[r11],#24
	eor	r2,r2,r1			@ rk[8]=rk[2]^rk[7]
	str	r1,[r11,#-20]
	eor	r3,r3,r2			@ rk[9]=rk[3]^rk[8]
	str	r2,[r11,#-16]
	subs	r12,r12,#1
	str	r3,[r11,#-12]
#ifdef	__thumb2__
	itt	eq				@ Thumb2 thing, sanity check in ARM
#endif
	subeq	r2,r11,#216
	beq	Ldone

	ldr	r7,[r11,#-32]
	ldr	r8,[r11,#-28]
	eor	r7,r7,r3			@ rk[10]=rk[4]^rk[9]
	eor	r9,r8,r7			@ rk[11]=rk[5]^rk[10]
	str	r7,[r11,#-8]
	str	r9,[r11,#-4]
	b	L192_loop

Lnot192:
#if __ARM_ARCH__<7
	ldrb	r8,[r12,#27]
	ldrb	r4,[r12,#26]
	ldrb	r5,[r12,#25]
	ldrb	r6,[r12,#24]
	orr	r8,r8,r4,lsl#8
	ldrb	r9,[r12,#31]
	orr	r8,r8,r5,lsl#16
	ldrb	r4,[r12,#30]
	orr	r8,r8,r6,lsl#24
	ldrb	r5,[r12,#29]
	ldrb	r6,[r12,#28]
	orr	r9,r9,r4,lsl#8
	orr	r9,r9,r5,lsl#16
	str	r8,[r11],#8
	orr	r9,r9,r6,lsl#24
	str	r9,[r11,#-4]
#else
	ldr	r8,[r12,#24]
	ldr	r9,[r12,#28]
#ifdef __ARMEL__
	rev	r8,r8
	rev	r9,r9
#endif
	str	r8,[r11],#8
	str	r9,[r11,#-4]
#endif

	mov	r12,#14
	str	r12,[r11,#240-32]
	add	r6,r10,#256			@ rcon
	mov	lr,#255
	mov	r12,#7

L256_loop:
	and	r5,lr,r9,lsr#24
	and	r7,lr,r9,lsr#16
	ldrb	r5,[r10,r5]
	and	r8,lr,r9,lsr#8
	ldrb	r7,[r10,r7]
	and	r9,lr,r9
	ldrb	r8,[r10,r8]
	orr	r5,r5,r7,lsl#24
	ldrb	r9,[r10,r9]
	orr	r5,r5,r8,lsl#16
	ldr	r4,[r6],#4			@ rcon[i++]
	orr	r5,r5,r9,lsl#8
	eor	r9,r5,r4
	eor	r0,r0,r9			@ rk[8]=rk[0]^...
	eor	r1,r1,r0			@ rk[9]=rk[1]^rk[8]
	str	r0,[r11],#32
	eor	r2,r2,r1			@ rk[10]=rk[2]^rk[9]
	str	r1,[r11,#-28]
	eor	r3,r3,r2			@ rk[11]=rk[3]^rk[10]
	str	r2,[r11,#-24]
	subs	r12,r12,#1
	str	r3,[r11,#-20]
#ifdef	__thumb2__
	itt	eq				@ Thumb2 thing, sanity check in ARM
#endif
	subeq	r2,r11,#256
	beq	Ldone

	and	r5,lr,r3
	and	r7,lr,r3,lsr#8
	ldrb	r5,[r10,r5]
	and	r8,lr,r3,lsr#16
	ldrb	r7,[r10,r7]
	and	r9,lr,r3,lsr#24
	ldrb	r8,[r10,r8]
	orr	r5,r5,r7,lsl#8
	ldrb	r9,[r10,r9]
	orr	r5,r5,r8,lsl#16
	ldr	r4,[r11,#-48]
	orr	r5,r5,r9,lsl#24

	ldr	r7,[r11,#-44]
	ldr	r8,[r11,#-40]
	eor	r4,r4,r5			@ rk[12]=rk[4]^...
	ldr	r9,[r11,#-36]
	eor	r7,r7,r4			@ rk[13]=rk[5]^rk[12]
	str	r4,[r11,#-16]
	eor	r8,r8,r7			@ rk[14]=rk[6]^rk[13]
	str	r7,[r11,#-12]
	eor	r9,r9,r8			@ rk[15]=rk[7]^rk[14]
	str	r8,[r11,#-8]
	str	r9,[r11,#-4]
	b	L256_loop

.align	2
Ldone:	mov	r0,#0
	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
Labrt:
#if __ARM_ARCH__>=5
	bx	lr				@ .word	0xe12fff1e
#else
	tst	lr,#1
	moveq	pc,lr			@ be binary compatible with V4, yet
.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
#endif


.globl	AES_set_decrypt_key

.align	5
AES_set_decrypt_key:
	str	lr,[sp,#-4]!            @ push lr
	bl	_armv4_AES_set_encrypt_key
	teq	r0,#0
	ldr	lr,[sp],#4              @ pop lr
	bne	Labrt

	mov	r0,r2			@ AES_set_encrypt_key preserves r2,
	mov	r1,r2			@ which is AES_KEY *key
	b	_armv4_AES_set_enc2dec_key


@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
.globl	AES_set_enc2dec_key

.align	5
AES_set_enc2dec_key:
_armv4_AES_set_enc2dec_key:
	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}

	ldr	r12,[r0,#240]
	mov	r7,r0			@ input
	add	r8,r0,r12,lsl#4
	mov	r11,r1			@ output
	add	r10,r1,r12,lsl#4
	str	r12,[r1,#240]

Linv:	ldr	r0,[r7],#16
	ldr	r1,[r7,#-12]
	ldr	r2,[r7,#-8]
	ldr	r3,[r7,#-4]
	ldr	r4,[r8],#-16
	ldr	r5,[r8,#16+4]
	ldr	r6,[r8,#16+8]
	ldr	r9,[r8,#16+12]
	str	r0,[r10],#-16
	str	r1,[r10,#16+4]
	str	r2,[r10,#16+8]
	str	r3,[r10,#16+12]
	str	r4,[r11],#16
	str	r5,[r11,#-12]
	str	r6,[r11,#-8]
	str	r9,[r11,#-4]
	teq	r7,r8
	bne	Linv

	ldr	r0,[r7]
	ldr	r1,[r7,#4]
	ldr	r2,[r7,#8]
	ldr	r3,[r7,#12]
	str	r0,[r11]
	str	r1,[r11,#4]
	str	r2,[r11,#8]
	str	r3,[r11,#12]
	sub	r11,r11,r12,lsl#3
	ldr	r0,[r11,#16]!		@ prefetch tp1
	mov	r7,#0x80
	mov	r8,#0x1b
	orr	r7,r7,#0x8000
	orr	r8,r8,#0x1b00
	orr	r7,r7,r7,lsl#16
	orr	r8,r8,r8,lsl#16
	sub	r12,r12,#1
	mvn	r9,r7
	mov	r12,r12,lsl#2	@ (rounds-1)*4

Lmix:	and	r4,r0,r7
	and	r1,r0,r9
	sub	r4,r4,r4,lsr#7
	and	r4,r4,r8
	eor	r1,r4,r1,lsl#1	@ tp2

	and	r4,r1,r7
	and	r2,r1,r9
	sub	r4,r4,r4,lsr#7
	and	r4,r4,r8
	eor	r2,r4,r2,lsl#1	@ tp4

	and	r4,r2,r7
	and	r3,r2,r9
	sub	r4,r4,r4,lsr#7
	and	r4,r4,r8
	eor	r3,r4,r3,lsl#1	@ tp8

	eor	r4,r1,r2
	eor	r5,r0,r3		@ tp9
	eor	r4,r4,r3		@ tpe
	eor	r4,r4,r1,ror#24
	eor	r4,r4,r5,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
	eor	r4,r4,r2,ror#16
	eor	r4,r4,r5,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
	eor	r4,r4,r5,ror#8	@ ^= ROTATE(tp9,24)

	ldr	r0,[r11,#4]		@ prefetch tp1
	str	r4,[r11],#4
	subs	r12,r12,#1
	bne	Lmix

	mov	r0,#0
#if __ARM_ARCH__>=5
	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
#else
	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
	tst	lr,#1
	moveq	pc,lr			@ be binary compatible with V4, yet
.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
#endif


.align	5
AES_Td:
.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
@ Td4[256]
.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d


@ void AES_decrypt(const unsigned char *in, unsigned char *out,
@ 		 const AES_KEY *key) {
.globl	AES_decrypt

.align	5
AES_decrypt:
#ifndef	__thumb2__
	sub	r3,pc,#8		@ AES_decrypt
#else
	adr	r3,AES_decrypt
#endif
	stmdb	sp!,{r1,r4-r12,lr}
#ifdef	__APPLE__
	adr	r10,AES_Td
#else
	sub	r10,r3,#AES_decrypt-AES_Td	@ Td
#endif
	mov	r12,r0		@ inp
	mov	r11,r2
#if __ARM_ARCH__<7
	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
	ldrb	r4,[r12,#2]	@ manner...
	ldrb	r5,[r12,#1]
	ldrb	r6,[r12,#0]
	orr	r0,r0,r4,lsl#8
	ldrb	r1,[r12,#7]
	orr	r0,r0,r5,lsl#16
	ldrb	r4,[r12,#6]
	orr	r0,r0,r6,lsl#24
	ldrb	r5,[r12,#5]
	ldrb	r6,[r12,#4]
	orr	r1,r1,r4,lsl#8
	ldrb	r2,[r12,#11]
	orr	r1,r1,r5,lsl#16
	ldrb	r4,[r12,#10]
	orr	r1,r1,r6,lsl#24
	ldrb	r5,[r12,#9]
	ldrb	r6,[r12,#8]
	orr	r2,r2,r4,lsl#8
	ldrb	r3,[r12,#15]
	orr	r2,r2,r5,lsl#16
	ldrb	r4,[r12,#14]
	orr	r2,r2,r6,lsl#24
	ldrb	r5,[r12,#13]
	ldrb	r6,[r12,#12]
	orr	r3,r3,r4,lsl#8
	orr	r3,r3,r5,lsl#16
	orr	r3,r3,r6,lsl#24
#else
	ldr	r0,[r12,#0]
	ldr	r1,[r12,#4]
	ldr	r2,[r12,#8]
	ldr	r3,[r12,#12]
#ifdef __ARMEL__
	rev	r0,r0
	rev	r1,r1
	rev	r2,r2
	rev	r3,r3
#endif
#endif
	bl	_armv4_AES_decrypt

	ldr	r12,[sp],#4		@ pop out
#if __ARM_ARCH__>=7
#ifdef __ARMEL__
	rev	r0,r0
	rev	r1,r1
	rev	r2,r2
	rev	r3,r3
#endif
	str	r0,[r12,#0]
	str	r1,[r12,#4]
	str	r2,[r12,#8]
	str	r3,[r12,#12]
#else
	mov	r4,r0,lsr#24		@ write output in endian-neutral
	mov	r5,r0,lsr#16		@ manner...
	mov	r6,r0,lsr#8
	strb	r4,[r12,#0]
	strb	r5,[r12,#1]
	mov	r4,r1,lsr#24
	strb	r6,[r12,#2]
	mov	r5,r1,lsr#16
	strb	r0,[r12,#3]
	mov	r6,r1,lsr#8
	strb	r4,[r12,#4]
	strb	r5,[r12,#5]
	mov	r4,r2,lsr#24
	strb	r6,[r12,#6]
	mov	r5,r2,lsr#16
	strb	r1,[r12,#7]
	mov	r6,r2,lsr#8
	strb	r4,[r12,#8]
	strb	r5,[r12,#9]
	mov	r4,r3,lsr#24
	strb	r6,[r12,#10]
	mov	r5,r3,lsr#16
	strb	r2,[r12,#11]
	mov	r6,r3,lsr#8
	strb	r4,[r12,#12]
	strb	r5,[r12,#13]
	strb	r6,[r12,#14]
	strb	r3,[r12,#15]
#endif
#if __ARM_ARCH__>=5
	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
#else
	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
	tst	lr,#1
	moveq	pc,lr			@ be binary compatible with V4, yet
.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
#endif


.align	2
_armv4_AES_decrypt:
	str	lr,[sp,#-4]!		@ push lr
	ldmia	r11!,{r4,r5,r6,r7}
	eor	r0,r0,r4
	ldr	r12,[r11,#240-16]
	eor	r1,r1,r5
	eor	r2,r2,r6
	eor	r3,r3,r7
	sub	r12,r12,#1
	mov	lr,#255

	and	r7,lr,r0,lsr#16
	and	r8,lr,r0,lsr#8
	and	r9,lr,r0
	mov	r0,r0,lsr#24
Ldec_loop:
	ldr	r4,[r10,r7,lsl#2]	@ Td1[s0>>16]
	and	r7,lr,r1		@ i0
	ldr	r5,[r10,r8,lsl#2]	@ Td2[s0>>8]
	and	r8,lr,r1,lsr#16
	ldr	r6,[r10,r9,lsl#2]	@ Td3[s0>>0]
	and	r9,lr,r1,lsr#8
	ldr	r0,[r10,r0,lsl#2]	@ Td0[s0>>24]
	mov	r1,r1,lsr#24

	ldr	r7,[r10,r7,lsl#2]	@ Td3[s1>>0]
	ldr	r8,[r10,r8,lsl#2]	@ Td1[s1>>16]
	ldr	r9,[r10,r9,lsl#2]	@ Td2[s1>>8]
	eor	r0,r0,r7,ror#24
	ldr	r1,[r10,r1,lsl#2]	@ Td0[s1>>24]
	and	r7,lr,r2,lsr#8	@ i0
	eor	r5,r8,r5,ror#8
	and	r8,lr,r2		@ i1
	eor	r6,r9,r6,ror#8
	and	r9,lr,r2,lsr#16
	ldr	r7,[r10,r7,lsl#2]	@ Td2[s2>>8]
	eor	r1,r1,r4,ror#8
	ldr	r8,[r10,r8,lsl#2]	@ Td3[s2>>0]
	mov	r2,r2,lsr#24

	ldr	r9,[r10,r9,lsl#2]	@ Td1[s2>>16]
	eor	r0,r0,r7,ror#16
	ldr	r2,[r10,r2,lsl#2]	@ Td0[s2>>24]
	and	r7,lr,r3,lsr#16	@ i0
	eor	r1,r1,r8,ror#24
	and	r8,lr,r3,lsr#8	@ i1
	eor	r6,r9,r6,ror#8
	and	r9,lr,r3		@ i2
	ldr	r7,[r10,r7,lsl#2]	@ Td1[s3>>16]
	eor	r2,r2,r5,ror#8
	ldr	r8,[r10,r8,lsl#2]	@ Td2[s3>>8]
	mov	r3,r3,lsr#24

	ldr	r9,[r10,r9,lsl#2]	@ Td3[s3>>0]
	eor	r0,r0,r7,ror#8
	ldr	r7,[r11],#16
	eor	r1,r1,r8,ror#16
	ldr	r3,[r10,r3,lsl#2]	@ Td0[s3>>24]
	eor	r2,r2,r9,ror#24

	ldr	r4,[r11,#-12]
	eor	r0,r0,r7
	ldr	r5,[r11,#-8]
	eor	r3,r3,r6,ror#8
	ldr	r6,[r11,#-4]
	and	r7,lr,r0,lsr#16
	eor	r1,r1,r4
	and	r8,lr,r0,lsr#8
	eor	r2,r2,r5
	and	r9,lr,r0
	eor	r3,r3,r6
	mov	r0,r0,lsr#24

	subs	r12,r12,#1
	bne	Ldec_loop

	add	r10,r10,#1024

	ldr	r5,[r10,#0]		@ prefetch Td4
	ldr	r6,[r10,#32]
	ldr	r4,[r10,#64]
	ldr	r5,[r10,#96]
	ldr	r6,[r10,#128]
	ldr	r4,[r10,#160]
	ldr	r5,[r10,#192]
	ldr	r6,[r10,#224]

	ldrb	r0,[r10,r0]		@ Td4[s0>>24]
	ldrb	r4,[r10,r7]		@ Td4[s0>>16]
	and	r7,lr,r1		@ i0
	ldrb	r5,[r10,r8]		@ Td4[s0>>8]
	and	r8,lr,r1,lsr#16
	ldrb	r6,[r10,r9]		@ Td4[s0>>0]
	and	r9,lr,r1,lsr#8

	add	r1,r10,r1,lsr#24
	ldrb	r7,[r10,r7]		@ Td4[s1>>0]
	ldrb	r1,[r1]		@ Td4[s1>>24]
	ldrb	r8,[r10,r8]		@ Td4[s1>>16]
	eor	r0,r7,r0,lsl#24
	ldrb	r9,[r10,r9]		@ Td4[s1>>8]
	eor	r1,r4,r1,lsl#8
	and	r7,lr,r2,lsr#8	@ i0
	eor	r5,r5,r8,lsl#8
	and	r8,lr,r2		@ i1
	ldrb	r7,[r10,r7]		@ Td4[s2>>8]
	eor	r6,r6,r9,lsl#8
	ldrb	r8,[r10,r8]		@ Td4[s2>>0]
	and	r9,lr,r2,lsr#16

	add	r2,r10,r2,lsr#24
	ldrb	r2,[r2]		@ Td4[s2>>24]
	eor	r0,r0,r7,lsl#8
	ldrb	r9,[r10,r9]		@ Td4[s2>>16]
	eor	r1,r8,r1,lsl#16
	and	r7,lr,r3,lsr#16	@ i0
	eor	r2,r5,r2,lsl#16
	and	r8,lr,r3,lsr#8	@ i1
	ldrb	r7,[r10,r7]		@ Td4[s3>>16]
	eor	r6,r6,r9,lsl#16
	ldrb	r8,[r10,r8]		@ Td4[s3>>8]
	and	r9,lr,r3		@ i2

	add	r3,r10,r3,lsr#24
	ldrb	r9,[r10,r9]		@ Td4[s3>>0]
	ldrb	r3,[r3]		@ Td4[s3>>24]
	eor	r0,r0,r7,lsl#16
	ldr	r7,[r11,#0]
	eor	r1,r1,r8,lsl#8
	ldr	r4,[r11,#4]
	eor	r2,r9,r2,lsl#8
	ldr	r5,[r11,#8]
	eor	r3,r6,r3,lsl#24
	ldr	r6,[r11,#12]

	eor	r0,r0,r7
	eor	r1,r1,r4
	eor	r2,r2,r5
	eor	r3,r3,r6

	sub	r10,r10,#1024
	ldr	pc,[sp],#4		@ pop and return

.byte	65,69,83,32,102,111,114,32,65,82,77,118,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align	2
.align	2


================================================
FILE: lib/aes_acc/asm/arm64.S
================================================
.text

.type	_vpaes_consts,%object
.align	7	// totally strategic alignment
_vpaes_consts:
.Lk_mc_forward:	//	mc_forward
.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
.quad	0x080B0A0904070605, 0x000302010C0F0E0D
.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
.quad	0x000302010C0F0E0D, 0x080B0A0904070605
.Lk_mc_backward:	//	mc_backward
.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
.quad	0x020100030E0D0C0F, 0x0A09080B06050407
.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
.quad	0x0A09080B06050407, 0x020100030E0D0C0F
.Lk_sr:	//	sr
.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
.quad	0x030E09040F0A0500, 0x0B06010C07020D08
.quad	0x0F060D040B020900, 0x070E050C030A0108
.quad	0x0B0E0104070A0D00, 0x0306090C0F020508

//
// "Hot" constants
//
.Lk_inv:	//	inv, inva
.quad	0x0E05060F0D080180, 0x040703090A0B0C02
.quad	0x01040A060F0B0780, 0x030D0E0C02050809
.Lk_ipt:	//	input transform (lo, hi)
.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
.Lk_sbo:	//	sbou, sbot
.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
.Lk_sb1:	//	sb1u, sb1t
.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
.Lk_sb2:	//	sb2u, sb2t
.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD

//
//  Decryption stuff
//
.Lk_dipt:	//	decryption input transform
.quad	0x0F505B040B545F00, 0x154A411E114E451A
.quad	0x86E383E660056500, 0x12771772F491F194
.Lk_dsbo:	//	decryption sbox final output
.quad	0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
.quad	0x12D7560F93441D00, 0xCA4B8159D8C58E9C
.Lk_dsb9:	//	decryption sbox output *9*u, *9*t
.quad	0x851C03539A86D600, 0xCAD51F504F994CC9
.quad	0xC03B1789ECD74900, 0x725E2C9EB2FBA565
.Lk_dsbd:	//	decryption sbox output *D*u, *D*t
.quad	0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
.quad	0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
.Lk_dsbb:	//	decryption sbox output *B*u, *B*t
.quad	0xD022649296B44200, 0x602646F6B0F2D404
.quad	0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
.Lk_dsbe:	//	decryption sbox output *E*u, *E*t
.quad	0x46F2929626D4D000, 0x2242600464B4F6B0
.quad	0x0C55A6CDFFAAC100, 0x9467F36B98593E32

//
//  Key schedule constants
//
.Lk_dksd:	//	decryption key schedule: invskew x*D
.quad	0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
.quad	0x41C277F4B5368300, 0x5FDC69EAAB289D1E
.Lk_dksb:	//	decryption key schedule: invskew x*B
.quad	0x9A4FCA1F8550D500, 0x03D653861CC94C99
.quad	0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
.Lk_dkse:	//	decryption key schedule: invskew x*E + 0x63
.quad	0xD5031CCA1FC9D600, 0x53859A4C994F5086
.quad	0xA23196054FDC7BE8, 0xCD5EF96A20B31487
.Lk_dks9:	//	decryption key schedule: invskew x*9
.quad	0xB6116FC87ED9A700, 0x4AED933482255BFC
.quad	0x4576516227143300, 0x8BB89FACE9DAFDCE

.Lk_rcon:	//	rcon
.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81

.Lk_opt:	//	output transform
.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
.Lk_deskew:	//	deskew tables: inverts the sbox's "skew"
.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77

.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,105,111,110,32,65,69,83,32,102,111,114,32,65,82,77,118,56,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
.align	2
.size	_vpaes_consts,.-_vpaes_consts
.align	6
##
##  _aes_preheat
##
##  Fills register %r10 -> .aes_consts (so you can -fPIC)
##  and %xmm9-%xmm15 as specified below.
##
.type	_vpaes_encrypt_preheat,%function
.align	4
_vpaes_encrypt_preheat:
	adr	x10, .Lk_inv
	movi	v17.16b, #0x0f
	ld1	{v18.2d,v19.2d}, [x10],#32	// .Lk_inv
	ld1	{v20.2d,v21.2d,v22.2d,v23.2d}, [x10],#64	// .Lk_ipt, .Lk_sbo
	ld1	{v24.2d,v25.2d,v26.2d,v27.2d}, [x10]		// .Lk_sb1, .Lk_sb2
	ret
.size	_vpaes_encrypt_preheat,.-_vpaes_encrypt_preheat

##
##  _aes_encrypt_core
##
##  AES-encrypt %xmm0.
##
##  Inputs:
##     %xmm0 = input
##     %xmm9-%xmm15 as in _vpaes_preheat
##    (%rdx) = scheduled keys
##
##  Output in %xmm0
##  Clobbers  %xmm1-%xmm5, %r9, %r10, %r11, %rax
##  Preserves %xmm6 - %xmm8 so you get some local vectors
##
##
.type	_vpaes_encrypt_core,%function
.align	4
_vpaes_encrypt_core:
	mov	x9, x2
	ldr	w8, [x2,#240]			// pull rounds
	adr	x11, .Lk_mc_forward+16
						// vmovdqa	.Lk_ipt(%rip),	%xmm2	# iptlo
	ld1	{v16.2d}, [x9], #16		// vmovdqu	(%r9),	%xmm5		# round0 key
	and	v1.16b, v7.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1
	ushr	v0.16b, v7.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0
	tbl	v1.16b, {v20.16b}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm1
						// vmovdqa	.Lk_ipt+16(%rip), %xmm3	# ipthi
	tbl	v2.16b, {v21.16b}, v0.16b	// vpshufb	%xmm0,	%xmm3,	%xmm2
	eor	v0.16b, v1.16b, v16.16b		// vpxor	%xmm5,	%xmm1,	%xmm0
	eor	v0.16b, v0.16b, v2.16b		// vpxor	%xmm2,	%xmm0,	%xmm0
	b	.Lenc_entry

.align	4
.Lenc_loop:
	// middle of middle round
	add	x10, x11, #0x40
	tbl	v4.16b, {v25.16b}, v2.16b		// vpshufb	%xmm2,	%xmm13,	%xmm4	# 4 = sb1u
	ld1	{v1.2d}, [x11], #16		// vmovdqa	-0x40(%r11,%r10), %xmm1	# .Lk_mc_forward[]
	tbl	v0.16b, {v24.16b}, v3.16b		// vpshufb	%xmm3,	%xmm12,	%xmm0	# 0 = sb1t
	eor	v4.16b, v4.16b, v16.16b		// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
	tbl	v5.16b,	{v27.16b}, v2.16b		// vpshufb	%xmm2,	%xmm15,	%xmm5	# 4 = sb2u
	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
	tbl	v2.16b, {v26.16b}, v3.16b		// vpshufb	%xmm3,	%xmm14,	%xmm2	# 2 = sb2t
	ld1	{v4.2d}, [x10]			// vmovdqa	(%r11,%r10), %xmm4	# .Lk_mc_backward[]
	tbl	v3.16b, {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm3	# 0 = B
	eor	v2.16b, v2.16b, v5.16b		// vpxor	%xmm5,	%xmm2,	%xmm2	# 2 = 2A
	tbl	v0.16b, {v0.16b}, v4.16b	// vpshufb	%xmm4,	%xmm0,	%xmm0	# 3 = D
	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3	# 0 = 2A+B
	tbl	v4.16b, {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm4	# 0 = 2B+C
	eor	v0.16b, v0.16b, v3.16b		// vpxor	%xmm3,	%xmm0,	%xmm0	# 3 = 2A+B+D
	and	x11, x11, #~(1<<6)		// and		$0x30,	%r11		# ... mod 4
	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0, %xmm0	# 0 = 2A+3B+C+D
	sub	w8, w8, #1			// nr--

.Lenc_entry:
	// top of round
	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm0,	%xmm9,	%xmm1   # 0 = k
	ushr	v0.16b, v0.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0	# 1 = i
	tbl	v5.16b, {v19.16b}, v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm5	# 2 = a/k
	eor	v1.16b, v1.16b, v0.16b		// vpxor	%xmm0,	%xmm1,	%xmm1	# 0 = j
	tbl	v3.16b, {v18.16b}, v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3  	# 3 = 1/i
	tbl	v4.16b, {v18.16b}, v1.16b	// vpshufb	%xmm1, 	%xmm10,	%xmm4  	# 4 = 1/j
	eor	v3.16b, v3.16b, v5.16b		// vpxor	%xmm5,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
	eor	v4.16b, v4.16b, v5.16b		// vpxor	%xmm5,	%xmm4,	%xmm4  	# 4 = jak = 1/j + a/k
	tbl	v2.16b, {v18.16b}, v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm2  	# 2 = 1/iak
	tbl	v3.16b, {v18.16b}, v4.16b	// vpshufb	%xmm4,	%xmm10,	%xmm3	# 3 = 1/jak
	eor	v2.16b, v2.16b, v1.16b		// vpxor	%xmm1,	%xmm2,	%xmm2  	# 2 = io
	eor	v3.16b, v3.16b, v0.16b		// vpxor	%xmm0,	%xmm3,	%xmm3	# 3 = jo
	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm5
	cbnz	w8, .Lenc_loop

	// middle of last round
	add	x10, x11, #0x80
						// vmovdqa	-0x60(%r10), %xmm4	# 3 : sbou	.Lk_sbo
						// vmovdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
	tbl	v4.16b, {v22.16b}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
	ld1	{v1.2d}, [x10]			// vmovdqa	0x40(%r11,%r10), %xmm1	# .Lk_sr[]
	tbl	v0.16b, {v23.16b}, v3.16b		// vpshufb	%xmm3,	%xmm0,	%xmm0	# 0 = sb1t
	eor	v4.16b, v4.16b, v16.16b		// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
	tbl	v0.16b, {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm0
	ret
.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core

.globl	vpaes_encrypt
.type	vpaes_encrypt,%function
.align	4
vpaes_encrypt:
	stp	x29,x30,[sp,#-16]!
	add	x29,sp,#0

	ld1	{v7.16b}, [x0]
	bl	_vpaes_encrypt_preheat
	bl	_vpaes_encrypt_core
	st1	{v0.16b}, [x1]

	ldp	x29,x30,[sp],#16
	ret
.size	vpaes_encrypt,.-vpaes_encrypt

.type	_vpaes_encrypt_2x,%function
.align	4
_vpaes_encrypt_2x:
	mov	x9, x2
	ldr	w8, [x2,#240]			// pull rounds
	adr	x11, .Lk_mc_forward+16
						// vmovdqa	.Lk_ipt(%rip),	%xmm2	# iptlo
	ld1	{v16.2d}, [x9], #16		// vmovdqu	(%r9),	%xmm5		# round0 key
	and	v1.16b,  v14.16b,  v17.16b	// vpand	%xmm9,	%xmm0,	%xmm1
	ushr	v0.16b,  v14.16b,  #4		// vpsrlb	$4,	%xmm0,	%xmm0
	and	v9.16b,  v15.16b,  v17.16b
	ushr	v8.16b,  v15.16b,  #4
	tbl	v1.16b,  {v20.16b}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm1
	tbl	v9.16b,  {v20.16b}, v9.16b
						// vmovdqa	.Lk_ipt+16(%rip), %xmm3	# ipthi
	tbl	v2.16b,  {v21.16b}, v0.16b	// vpshufb	%xmm0,	%xmm3,	%xmm2
	tbl	v10.16b, {v21.16b}, v8.16b
	eor	v0.16b,  v1.16b,   v16.16b	// vpxor	%xmm5,	%xmm1,	%xmm0
	eor	v8.16b,  v9.16b,   v16.16b
	eor	v0.16b,  v0.16b,   v2.16b	// vpxor	%xmm2,	%xmm0,	%xmm0
	eor	v8.16b,  v8.16b,   v10.16b
	b	.Lenc_2x_entry

.align	4
.Lenc_2x_loop:
	// middle of middle round
	add	x10, x11, #0x40
	tbl	v4.16b,  {v25.16b}, v2.16b	// vpshufb	%xmm2,	%xmm13,	%xmm4	# 4 = sb1u
	tbl	v12.16b, {v25.16b}, v10.16b
	ld1	{v1.2d}, [x11], #16		// vmovdqa	-0x40(%r11,%r10), %xmm1	# .Lk_mc_forward[]
	tbl	v0.16b,  {v24.16b}, v3.16b	// vpshufb	%xmm3,	%xmm12,	%xmm0	# 0 = sb1t
	tbl	v8.16b,  {v24.16b}, v11.16b
	eor	v4.16b,  v4.16b,  v16.16b	// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
	eor	v12.16b, v12.16b, v16.16b
	tbl	v5.16b,	 {v27.16b}, v2.16b	// vpshufb	%xmm2,	%xmm15,	%xmm5	# 4 = sb2u
	tbl	v13.16b, {v27.16b}, v10.16b
	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
	eor	v8.16b,  v8.16b,  v12.16b
	tbl	v2.16b,  {v26.16b}, v3.16b	// vpshufb	%xmm3,	%xmm14,	%xmm2	# 2 = sb2t
	tbl	v10.16b, {v26.16b}, v11.16b
	ld1	{v4.2d}, [x10]			// vmovdqa	(%r11,%r10), %xmm4	# .Lk_mc_backward[]
	tbl	v3.16b,  {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm3	# 0 = B
	tbl	v11.16b, {v8.16b}, v1.16b
	eor	v2.16b,  v2.16b,  v5.16b	// vpxor	%xmm5,	%xmm2,	%xmm2	# 2 = 2A
	eor	v10.16b, v10.16b, v13.16b
	tbl	v0.16b,  {v0.16b}, v4.16b	// vpshufb	%xmm4,	%xmm0,	%xmm0	# 3 = D
	tbl	v8.16b,  {v8.16b}, v4.16b
	eor	v3.16b,  v3.16b,  v2.16b	// vpxor	%xmm2,	%xmm3,	%xmm3	# 0 = 2A+B
	eor	v11.16b, v11.16b, v10.16b
	tbl	v4.16b,  {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm4	# 0 = 2B+C
	tbl	v12.16b, {v11.16b},v1.16b
	eor	v0.16b,  v0.16b,  v3.16b	// vpxor	%xmm3,	%xmm0,	%xmm0	# 3 = 2A+B+D
	eor	v8.16b,  v8.16b,  v11.16b
	and	x11, x11, #~(1<<6)		// and		$0x30,	%r11		# ... mod 4
	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0, %xmm0	# 0 = 2A+3B+C+D
	eor	v8.16b,  v8.16b,  v12.16b
	sub	w8, w8, #1			// nr--

.Lenc_2x_entry:
	// top of round
	and	v1.16b,  v0.16b, v17.16b	// vpand	%xmm0,	%xmm9,	%xmm1   # 0 = k
	ushr	v0.16b,  v0.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0	# 1 = i
	and	v9.16b,  v8.16b, v17.16b
	ushr	v8.16b,  v8.16b, #4
	tbl	v5.16b,  {v19.16b},v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm5	# 2 = a/k
	tbl	v13.16b, {v19.16b},v9.16b
	eor	v1.16b,  v1.16b,  v0.16b	// vpxor	%xmm0,	%xmm1,	%xmm1	# 0 = j
	eor	v9.16b,  v9.16b,  v8.16b
	tbl	v3.16b,  {v18.16b},v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3  	# 3 = 1/i
	tbl	v11.16b, {v18.16b},v8.16b
	tbl	v4.16b,  {v18.16b},v1.16b	// vpshufb	%xmm1, 	%xmm10,	%xmm4  	# 4 = 1/j
	tbl	v12.16b, {v18.16b},v9.16b
	eor	v3.16b,  v3.16b,  v5.16b	// vpxor	%xmm5,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
	eor	v11.16b, v11.16b, v13.16b
	eor	v4.16b,  v4.16b,  v5.16b	// vpxor	%xmm5,	%xmm4,	%xmm4  	# 4 = jak = 1/j + a/k
	eor	v12.16b, v12.16b, v13.16b
	tbl	v2.16b,  {v18.16b},v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm2  	# 2 = 1/iak
	tbl	v10.16b, {v18.16b},v11.16b
	tbl	v3.16b,  {v18.16b},v4.16b	// vpshufb	%xmm4,	%xmm10,	%xmm3	# 3 = 1/jak
	tbl	v11.16b, {v18.16b},v12.16b
	eor	v2.16b,  v2.16b,  v1.16b	// vpxor	%xmm1,	%xmm2,	%xmm2  	# 2 = io
	eor	v10.16b, v10.16b, v9.16b
	eor	v3.16b,  v3.16b,  v0.16b	// vpxor	%xmm0,	%xmm3,	%xmm3	# 3 = jo
	eor	v11.16b, v11.16b, v8.16b
	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm5
	cbnz	w8, .Lenc_2x_loop

	// middle of last round
	add	x10, x11, #0x80
						// vmovdqa	-0x60(%r10), %xmm4	# 3 : sbou	.Lk_sbo
						// vmovdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
	tbl	v4.16b,  {v22.16b}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
	tbl	v12.16b, {v22.16b}, v10.16b
	ld1	{v1.2d}, [x10]			// vmovdqa	0x40(%r11,%r10), %xmm1	# .Lk_sr[]
	tbl	v0.16b,  {v23.16b}, v3.16b	// vpshufb	%xmm3,	%xmm0,	%xmm0	# 0 = sb1t
	tbl	v8.16b,  {v23.16b}, v11.16b
	eor	v4.16b,  v4.16b,  v16.16b	// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
	eor	v12.16b, v12.16b, v16.16b
	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
	eor	v8.16b,  v8.16b,  v12.16b
	tbl	v0.16b,  {v0.16b},v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm0
	tbl	v1.16b,  {v8.16b},v1.16b
	ret
.size	_vpaes_encrypt_2x,.-_vpaes_encrypt_2x

.type	_vpaes_decrypt_preheat,%function
.align	4
_vpaes_decrypt_preheat:
	adr	x10, .Lk_inv
	movi	v17.16b, #0x0f
	adr	x11, .Lk_dipt
	ld1	{v18.2d,v19.2d}, [x10],#32	// .Lk_inv
	ld1	{v20.2d,v21.2d,v22.2d,v23.2d}, [x11],#64	// .Lk_dipt, .Lk_dsbo
	ld1	{v24.2d,v25.2d,v26.2d,v27.2d}, [x11],#64	// .Lk_dsb9, .Lk_dsbd
	ld1	{v28.2d,v29.2d,v30.2d,v31.2d}, [x11]		// .Lk_dsbb, .Lk_dsbe
	ret
.size	_vpaes_decrypt_preheat,.-_vpaes_decrypt_preheat

##
##  Decryption core
##
##  Same API as encryption core.
##
.type	_vpaes_decrypt_core,%function
.align	4
_vpaes_decrypt_core:
	mov	x9, x2
	ldr	w8, [x2,#240]			// pull rounds

						// vmovdqa	.Lk_dipt(%rip), %xmm2	# iptlo
	lsl	x11, x8, #4			// mov	%rax,	%r11;	shl	$4, %r11
	eor	x11, x11, #0x30			// xor		$0x30,	%r11
	adr	x10, .Lk_sr
	and	x11, x11, #0x30			// and		$0x30,	%r11
	add	x11, x11, x10
	adr	x10, .Lk_mc_forward+48

	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm4		# round0 key
	and	v1.16b, v7.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1
	ushr	v0.16b, v7.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0
	tbl	v2.16b, {v20.16b}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm2
	ld1	{v5.2d}, [x10]			// vmovdqa	.Lk_mc_forward+48(%rip), %xmm5
						// vmovdqa	.Lk_dipt+16(%rip), %xmm1 # ipthi
	tbl	v0.16b, {v21.16b}, v0.16b	// vpshufb	%xmm0,	%xmm1,	%xmm0
	eor	v2.16b, v2.16b, v16.16b		// vpxor	%xmm4,	%xmm2,	%xmm2
	eor	v0.16b, v0.16b, v2.16b		// vpxor	%xmm2,	%xmm0,	%xmm0
	b	.Ldec_entry

.align	4
.Ldec_loop:
//
//  Inverse mix columns
//
						// vmovdqa	-0x20(%r10),%xmm4		# 4 : sb9u
						// vmovdqa	-0x10(%r10),%xmm1		# 0 : sb9t
	tbl	v4.16b, {v24.16b}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sb9u
	tbl	v1.16b, {v25.16b}, v3.16b		// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sb9t
	eor	v0.16b, v4.16b, v16.16b		// vpxor	%xmm4,	%xmm0,	%xmm0
						// vmovdqa	0x00(%r10),%xmm4		# 4 : sbdu
	eor	v0.16b, v0.16b, v1.16b		// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
						// vmovdqa	0x10(%r10),%xmm1		# 0 : sbdt

	tbl	v4.16b, {v26.16b}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbdu
	tbl	v0.16b, {v0.16b}, v5.16b	// vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
	tbl	v1.16b, {v27.16b}, v3.16b		// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbdt
	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0		# 4 = ch
						// vmovdqa	0x20(%r10),	%xmm4		# 4 : sbbu
	eor	v0.16b, v0.16b, v1.16b		// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
						// vmovdqa	0x30(%r10),	%xmm1		# 0 : sbbt

	tbl	v4.16b, {v28.16b}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbbu
	tbl	v0.16b, {v0.16b}, v5.16b	// vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
	tbl	v1.16b, {v29.16b}, v3.16b		// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbbt
	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0		# 4 = ch
						// vmovdqa	0x40(%r10),	%xmm4		# 4 : sbeu
	eor	v0.16b, v0.16b, v1.16b		// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
						// vmovdqa	0x50(%r10),	%xmm1		# 0 : sbet

	tbl	v4.16b, {v30.16b}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbeu
	tbl	v0.16b, {v0.16b}, v5.16b	// vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
	tbl	v1.16b, {v31.16b}, v3.16b		// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbet
	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0		# 4 = ch
	ext	v5.16b, v5.16b, v5.16b, #12	// vpalignr $12,	%xmm5,	%xmm5,	%xmm5
	eor	v0.16b, v0.16b, v1.16b		// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
	sub	w8, w8, #1			// sub		$1,%rax			# nr--

.Ldec_entry:
	// top of round
	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1	# 0 = k
	ushr	v0.16b, v0.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0	# 1 = i
	tbl	v2.16b, {v19.16b}, v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm2	# 2 = a/k
	eor	v1.16b,	v1.16b, v0.16b		// vpxor	%xmm0,	%xmm1,	%xmm1	# 0 = j
	tbl	v3.16b, {v18.16b}, v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3	# 3 = 1/i
	tbl	v4.16b, {v18.16b}, v1.16b	// vpshufb	%xmm1,	%xmm10,	%xmm4	# 4 = 1/j
	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
	eor	v4.16b, v4.16b, v2.16b		// vpxor	%xmm2, 	%xmm4,	%xmm4	# 4 = jak = 1/j + a/k
	tbl	v2.16b, {v18.16b}, v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm2	# 2 = 1/iak
	tbl	v3.16b, {v18.16b}, v4.16b	// vpshufb	%xmm4,  %xmm10,	%xmm3	# 3 = 1/jak
	eor	v2.16b, v2.16b, v1.16b		// vpxor	%xmm1,	%xmm2,	%xmm2	# 2 = io
	eor	v3.16b, v3.16b, v0.16b		// vpxor	%xmm0,  %xmm3,	%xmm3	# 3 = jo
	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm0
	cbnz	w8, .Ldec_loop

	// middle of last round
						// vmovdqa	0x60(%r10),	%xmm4	# 3 : sbou
	tbl	v4.16b, {v22.16b}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
						// vmovdqa	0x70(%r10),	%xmm1	# 0 : sbot
	ld1	{v2.2d}, [x11]			// vmovdqa	-0x160(%r11),	%xmm2	# .Lk_sr-.Lk_dsbd=-0x160
	tbl	v1.16b, {v23.16b}, v3.16b		// vpshufb	%xmm3,	%xmm1,	%xmm1	# 0 = sb1t
	eor	v4.16b, v4.16b, v16.16b		// vpxor	%xmm0,	%xmm4,	%xmm4	# 4 = sb1u + k
	eor	v0.16b, v1.16b, v4.16b		// vpxor	%xmm4,	%xmm1,	%xmm0	# 0 = A
	tbl	v0.16b, {v0.16b}, v2.16b	// vpshufb	%xmm2,	%xmm0,	%xmm0
	ret
.size	_vpaes_decrypt_core,.-_vpaes_decrypt_core

.globl	vpaes_decrypt
.type	vpaes_decrypt,%function
.align	4
vpaes_decrypt:
	stp	x29,x30,[sp,#-16]!
	add	x29,sp,#0

	ld1	{v7.16b}, [x0]
	bl	_vpaes_decrypt_preheat
	bl	_vpaes_decrypt_core
	st1	{v0.16b}, [x1]

	ldp	x29,x30,[sp],#16
	ret
.size	vpaes_decrypt,.-vpaes_decrypt

// v14-v15 input, v0-v1 output
.type	_vpaes_decrypt_2x,%function
.align	4
_vpaes_decrypt_2x:
	mov	x9, x2
	ldr	w8, [x2,#240]			// pull rounds

						// vmovdqa	.Lk_dipt(%rip), %xmm2	# iptlo
	lsl	x11, x8, #4			// mov	%rax,	%r11;	shl	$4, %r11
	eor	x11, x11, #0x30			// xor		$0x30,	%r11
	adr	x10, .Lk_sr
	and	x11, x11, #0x30			// and		$0x30,	%r11
	add	x11, x11, x10
	adr	x10, .Lk_mc_forward+48

	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm4		# round0 key
	and	v1.16b,  v14.16b, v17.16b	// vpand	%xmm9,	%xmm0,	%xmm1
	ushr	v0.16b,  v14.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0
	and	v9.16b,  v15.16b, v17.16b
	ushr	v8.16b,  v15.16b, #4
	tbl	v2.16b,  {v20.16b},v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm2
	tbl	v10.16b, {v20.16b},v9.16b
	ld1	{v5.2d}, [x10]			// vmovdqa	.Lk_mc_forward+48(%rip), %xmm5
						// vmovdqa	.Lk_dipt+16(%rip), %xmm1 # ipthi
	tbl	v0.16b,  {v21.16b},v0.16b	// vpshufb	%xmm0,	%xmm1,	%xmm0
	tbl	v8.16b,  {v21.16b},v8.16b
	eor	v2.16b,  v2.16b,  v16.16b	// vpxor	%xmm4,	%xmm2,	%xmm2
	eor	v10.16b, v10.16b, v16.16b
	eor	v0.16b,  v0.16b,  v2.16b	// vpxor	%xmm2,	%xmm0,	%xmm0
	eor	v8.16b,  v8.16b,  v10.16b
	b	.Ldec_2x_entry

.align	4
.Ldec_2x_loop:
//
//  Inverse mix columns
//
						// vmovdqa	-0x20(%r10),%xmm4		# 4 : sb9u
						// vmovdqa	-0x10(%r10),%xmm1		# 0 : sb9t
	tbl	v4.16b,  {v24.16b}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sb9u
	tbl	v12.16b, {v24.16b}, v10.16b
	tbl	v1.16b,  {v25.16b}, v3.16b	// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sb9t
	tbl	v9.16b,  {v25.16b}, v11.16b
	eor	v0.16b,  v4.16b,  v16.16b	// vpxor	%xmm4,	%xmm0,	%xmm0
	eor	v8.16b,  v12.16b, v16.16b
						// vmovdqa	0x00(%r10),%xmm4		# 4 : sbdu
	eor	v0.16b,  v0.16b,  v1.16b	// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
	eor	v8.16b,  v8.16b,  v9.16b	// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
						// vmovdqa	0x10(%r10),%xmm1		# 0 : sbdt

	tbl	v4.16b,  {v26.16b}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbdu
	tbl	v12.16b, {v26.16b}, v10.16b
	tbl	v0.16b,  {v0.16b},v5.16b	// vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
	tbl	v8.16b,  {v8.16b},v5.16b
	tbl	v1.16b,  {v27.16b}, v3.16b	// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbdt
	tbl	v9.16b,  {v27.16b}, v11.16b
	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0		# 4 = ch
	eor	v8.16b,  v8.16b,  v12.16b
						// vmovdqa	0x20(%r10),	%xmm4		# 4 : sbbu
	eor	v0.16b,  v0.16b,  v1.16b	// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
	eor	v8.16b,  v8.16b,  v9.16b
						// vmovdqa	0x30(%r10),	%xmm1		# 0 : sbbt

	tbl	v4.16b,  {v28.16b}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbbu
	tbl	v12.16b, {v28.16b}, v10.16b
	tbl	v0.16b,  {v0.16b},v5.16b	// vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
	tbl	v8.16b,  {v8.16b},v5.16b
	tbl	v1.16b,  {v29.16b}, v3.16b	// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbbt
	tbl	v9.16b,  {v29.16b}, v11.16b
	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0		# 4 = ch
	eor	v8.16b,  v8.16b,  v12.16b
						// vmovdqa	0x40(%r10),	%xmm4		# 4 : sbeu
	eor	v0.16b,  v0.16b,  v1.16b	// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
	eor	v8.16b,  v8.16b,  v9.16b
						// vmovdqa	0x50(%r10),	%xmm1		# 0 : sbet

	tbl	v4.16b,  {v30.16b}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbeu
	tbl	v12.16b, {v30.16b}, v10.16b
	tbl	v0.16b,  {v0.16b},v5.16b	// vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
	tbl	v8.16b,  {v8.16b},v5.16b
	tbl	v1.16b,  {v31.16b}, v3.16b	// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbet
	tbl	v9.16b,  {v31.16b}, v11.16b
	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0		# 4 = ch
	eor	v8.16b,  v8.16b,  v12.16b
	ext	v5.16b,  v5.16b,  v5.16b, #12	// vpalignr $12,	%xmm5,	%xmm5,	%xmm5
	eor	v0.16b,  v0.16b,  v1.16b	// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
	eor	v8.16b,  v8.16b,  v9.16b
	sub	w8, w8, #1			// sub		$1,%rax			# nr--

.Ldec_2x_entry:
	// top of round
	and	v1.16b,  v0.16b,  v17.16b	// vpand	%xmm9,	%xmm0,	%xmm1	# 0 = k
	ushr	v0.16b,  v0.16b,  #4		// vpsrlb	$4,	%xmm0,	%xmm0	# 1 = i
	and	v9.16b,  v8.16b,  v17.16b
	ushr	v8.16b,  v8.16b,  #4
	tbl	v2.16b,  {v19.16b},v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm2	# 2 = a/k
	tbl	v10.16b, {v19.16b},v9.16b
	eor	v1.16b,	 v1.16b,  v0.16b	// vpxor	%xmm0,	%xmm1,	%xmm1	# 0 = j
	eor	v9.16b,	 v9.16b,  v8.16b
	tbl	v3.16b,  {v18.16b},v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3	# 3 = 1/i
	tbl	v11.16b, {v18.16b},v8.16b
	tbl	v4.16b,  {v18.16b},v1.16b	// vpshufb	%xmm1,	%xmm10,	%xmm4	# 4 = 1/j
	tbl	v12.16b, {v18.16b},v9.16b
	eor	v3.16b,  v3.16b,  v2.16b	// vpxor	%xmm2,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
	eor	v11.16b, v11.16b, v10.16b
	eor	v4.16b,  v4.16b,  v2.16b	// vpxor	%xmm2, 	%xmm4,	%xmm4	# 4 = jak = 1/j + a/k
	eor	v12.16b, v12.16b, v10.16b
	tbl	v2.16b,  {v18.16b},v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm2	# 2 = 1/iak
	tbl	v10.16b, {v18.16b},v11.16b
	tbl	v3.16b,  {v18.16b},v4.16b	// vpshufb	%xmm4,  %xmm10,	%xmm3	# 3 = 1/jak
	tbl	v11.16b, {v18.16b},v12.16b
	eor	v2.16b,  v2.16b,  v1.16b	// vpxor	%xmm1,	%xmm2,	%xmm2	# 2 = io
	eor	v10.16b, v10.16b, v9.16b
	eor	v3.16b,  v3.16b,  v0.16b	// vpxor	%xmm0,  %xmm3,	%xmm3	# 3 = jo
	eor	v11.16b, v11.16b, v8.16b
	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm0
	cbnz	w8, .Ldec_2x_loop

	// middle of last round
						// vmovdqa	0x60(%r10),	%xmm4	# 3 : sbou
	tbl	v4.16b,  {v22.16b}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
	tbl	v12.16b, {v22.16b}, v10.16b
						// vmovdqa	0x70(%r10),	%xmm1	# 0 : sbot
	tbl	v1.16b,  {v23.16b}, v3.16b	// vpshufb	%xmm3,	%xmm1,	%xmm1	# 0 = sb1t
	tbl	v9.16b,  {v23.16b}, v11.16b
	ld1	{v2.2d}, [x11]			// vmovdqa	-0x160(%r11),	%xmm2	# .Lk_sr-.Lk_dsbd=-0x160
	eor	v4.16b,  v4.16b,  v16.16b	// vpxor	%xmm0,	%xmm4,	%xmm4	# 4 = sb1u + k
	eor	v12.16b, v12.16b, v16.16b
	eor	v0.16b,  v1.16b,  v4.16b	// vpxor	%xmm4,	%xmm1,	%xmm0	# 0 = A
	eor	v8.16b,  v9.16b,  v12.16b
	tbl	v0.16b,  {v0.16b},v2.16b	// vpshufb	%xmm2,	%xmm0,	%xmm0
	tbl	v1.16b,  {v8.16b},v2.16b
	ret
.size	_vpaes_decrypt_2x,.-_vpaes_decrypt_2x
########################################################
##                                                    ##
##                  AES key schedule                  ##
##                                                    ##
########################################################
.type	_vpaes_key_preheat,%function
.align	4
_vpaes_key_preheat:
	adr	x10, .Lk_inv
	movi	v16.16b, #0x5b			// .Lk_s63
	adr	x11, .Lk_sb1
	movi	v17.16b, #0x0f			// .Lk_s0F
	ld1	{v18.2d,v19.2d,v20.2d,v21.2d}, [x10]		// .Lk_inv, .Lk_ipt
	adr	x10, .Lk_dksd
	ld1	{v22.2d,v23.2d}, [x11]		// .Lk_sb1
	adr	x11, .Lk_mc_forward
	ld1	{v24.2d,v25.2d,v26.2d,v27.2d}, [x10],#64	// .Lk_dksd, .Lk_dksb
	ld1	{v28.2d,v29.2d,v30.2d,v31.2d}, [x10],#64	// .Lk_dkse, .Lk_dks9
	ld1	{v8.2d}, [x10]			// .Lk_rcon
	ld1	{v9.2d}, [x11]			// .Lk_mc_forward[0]
	ret
.size	_vpaes_key_preheat,.-_vpaes_key_preheat

.type	_vpaes_schedule_core,%function
.align	4
_vpaes_schedule_core:
	stp	x29, x30, [sp,#-16]!
	add	x29,sp,#0

	bl	_vpaes_key_preheat		// load the tables

	ld1	{v0.16b}, [x0],#16		// vmovdqu	(%rdi),	%xmm0		# load key (unaligned)

	// input transform
	mov	v3.16b, v0.16b			// vmovdqa	%xmm0,	%xmm3
	bl	_vpaes_schedule_transform
	mov	v7.16b, v0.16b			// vmovdqa	%xmm0,	%xmm7

	adr	x10, .Lk_sr			// lea	.Lk_sr(%rip),%r10
	add	x8, x8, x10
	cbnz	w3, .Lschedule_am_decrypting

	// encrypting, output zeroth round key after transform
	st1	{v0.2d}, [x2]			// vmovdqu	%xmm0,	(%rdx)
	b	.Lschedule_go

.Lschedule_am_decrypting:
	// decrypting, output zeroth round key after shiftrows
	ld1	{v1.2d}, [x8]			// vmovdqa	(%r8,%r10),	%xmm1
	tbl	v3.16b, {v3.16b}, v1.16b	// vpshufb  %xmm1,	%xmm3,	%xmm3
	st1	{v3.2d}, [x2]			// vmovdqu	%xmm3,	(%rdx)
	eor	x8, x8, #0x30			// xor	$0x30, %r8

.Lschedule_go:
	cmp	w1, #192			// cmp	$192,	%esi
	b.hi	.Lschedule_256
	b.eq	.Lschedule_192
	// 128: fall though

##
##  .schedule_128
##
##  128-bit specific part of key schedule.
##
##  This schedule is really simple, because all its parts
##  are accomplished by the subroutines.
##
.Lschedule_128:
	mov	x0, #10			// mov	$10, %esi

.Loop_schedule_128:
	sub	x0, x0, #1			// dec	%esi
	bl	_vpaes_schedule_round
	cbz	x0, .Lschedule_mangle_last
	bl	_vpaes_schedule_mangle		// write output
	b	.Loop_schedule_128

##
##  .aes_schedule_192
##
##  192-bit specific part of key schedule.
##
##  The main body of this schedule is the same as the 128-bit
##  schedule, but with more smearing.  The long, high side is
##  stored in %xmm7 as before, and the short, low side is in
##  the high bits of %xmm6.
##
##  This schedule is somewhat nastier, however, because each
##  round produces 192 bits of key material, or 1.5 round keys.
##  Therefore, on each cycle we do 2 rounds and produce 3 round
##  keys.
##
.align	4
.Lschedule_192:
	sub	x0, x0, #8
	ld1	{v0.16b}, [x0]		// vmovdqu	8(%rdi),%xmm0		# load key part 2 (very unaligned)
	bl	_vpaes_schedule_transform	// input transform
	mov	v6.16b, v0.16b			// vmovdqa	%xmm0,	%xmm6		# save short part
	eor	v4.16b, v4.16b, v4.16b		// vpxor	%xmm4,	%xmm4, %xmm4	# clear 4
	ins	v6.d[0], v4.d[0]		// vmovhlps	%xmm4,	%xmm6,	%xmm6		# clobber low side with zeros
	mov	x0, #4			// mov	$4,	%esi

.Loop_schedule_192:
	sub	x0, x0, #1			// dec	%esi
	bl	_vpaes_schedule_round
	ext	v0.16b, v6.16b, v0.16b, #8	// vpalignr	$8,%xmm6,%xmm0,%xmm0
	bl	_vpaes_schedule_mangle		// save key n
	bl	_vpaes_schedule_192_smear
	bl	_vpaes_schedule_mangle		// save key n+1
	bl	_vpaes_schedule_round
	cbz	x0, .Lschedule_mangle_last
	bl	_vpaes_schedule_mangle		// save key n+2
	bl	_vpaes_schedule_192_smear
	b	.Loop_schedule_192

##
##  .aes_schedule_256
##
##  256-bit specific part of key schedule.
##
##  The structure here is very similar to the 128-bit
##  schedule, but with an additional "low side" in
##  %xmm6.  The low side's rounds are the same as the
##  high side's, except no rcon and no rotation.
##
.align	4
.Lschedule_256:
	ld1	{v0.16b}, [x0]		// vmovdqu	16(%rdi),%xmm0		# load key part 2 (unaligned)
	bl	_vpaes_schedule_transform	// input transform
	mov	x0, #7			// mov	$7, %esi

.Loop_schedule_256:
	sub	x0, x0, #1			// dec	%esi
	bl	_vpaes_schedule_mangle		// output low result
	mov	v6.16b, v0.16b			// vmovdqa	%xmm0,	%xmm6		# save cur_lo in xmm6

	// high round
	bl	_vpaes_schedule_round
	cbz	x0, .Lschedule_mangle_last
	bl	_vpaes_schedule_mangle

	// low round. swap xmm7 and xmm6
	dup	v0.4s, v0.s[3]			// vpshufd	$0xFF,	%xmm0,	%xmm0
	movi	v4.16b, #0
	mov	v5.16b, v7.16b			// vmovdqa	%xmm7,	%xmm5
	mov	v7.16b, v6.16b			// vmovdqa	%xmm6,	%xmm7
	bl	_vpaes_schedule_low_round
	mov	v7.16b, v5.16b			// vmovdqa	%xmm5,	%xmm7

	b	.Loop_schedule_256

##
##  .aes_schedule_mangle_last
##
##  Mangler for last round of key schedule
##  Mangles %xmm0
##    when encrypting, outputs out(%xmm0) ^ 63
##    when decrypting, outputs unskew(%xmm0)
##
##  Always called right before return... jumps to cleanup and exits
##
.align	4
.Lschedule_mangle_last:
	// schedule last round key from xmm0
	adr	x11, .Lk_deskew			// lea	.Lk_deskew(%rip),%r11	# prepare to deskew
	cbnz	w3, .Lschedule_mangle_last_dec

	// encrypting
	ld1	{v1.2d}, [x8]			// vmovdqa	(%r8,%r10),%xmm1
	adr	x11, .Lk_opt			// lea	.Lk_opt(%rip),	%r11		# prepare to output transform
	add	x2, x2, #32			// add	$32,	%rdx
	tbl	v0.16b, {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm0		# output permute

.Lschedule_mangle_last_dec:
	ld1	{v20.2d,v21.2d}, [x11]		// reload constants
	sub	x2, x2, #16			// add	$-16,	%rdx
	eor	v0.16b, v0.16b, v16.16b		// vpxor	.Lk_s63(%rip),	%xmm0,	%xmm0
	bl	_vpaes_schedule_transform	// output transform
	st1	{v0.2d}, [x2]			// vmovdqu	%xmm0,	(%rdx)		# save last key

	// cleanup
	eor	v0.16b, v0.16b, v0.16b		// vpxor	%xmm0,	%xmm0,	%xmm0
	eor	v1.16b, v1.16b, v1.16b		// vpxor	%xmm1,	%xmm1,	%xmm1
	eor	v2.16b, v2.16b, v2.16b		// vpxor	%xmm2,	%xmm2,	%xmm2
	eor	v3.16b, v3.16b, v3.16b		// vpxor	%xmm3,	%xmm3,	%xmm3
	eor	v4.16b, v4.16b, v4.16b		// vpxor	%xmm4,	%xmm4,	%xmm4
	eor	v5.16b, v5.16b, v5.16b		// vpxor	%xmm5,	%xmm5,	%xmm5
	eor	v6.16b, v6.16b, v6.16b		// vpxor	%xmm6,	%xmm6,	%xmm6
	eor	v7.16b, v7.16b, v7.16b		// vpxor	%xmm7,	%xmm7,	%xmm7
	ldp	x29, x30, [sp],#16
	ret
.size	_vpaes_schedule_core,.-_vpaes_schedule_core

##
##  .aes_schedule_192_smear
##
##  Smear the short, low side in the 192-bit key schedule.
##
##  Inputs:
##    %xmm7: high side, b  a  x  y
##    %xmm6:  low side, d  c  0  0
##    %xmm13: 0
##
##  Outputs:
##    %xmm6: b+c+d  b+c  0  0
##    %xmm0: b+c+d  b+c  b  a
##
.type	_vpaes_schedule_192_smear,%function
.align	4
_vpaes_schedule_192_smear:
	movi	v1.16b, #0
	dup	v0.4s, v7.s[3]
	ins	v1.s[3], v6.s[2]	// vpshufd	$0x80,	%xmm6,	%xmm1	# d c 0 0 -> c 0 0 0
	ins	v0.s[0], v7.s[2]	// vpshufd	$0xFE,	%xmm7,	%xmm0	# b a _ _ -> b b b a
	eor	v6.16b, v6.16b, v1.16b	// vpxor	%xmm1,	%xmm6,	%xmm6	# -> c+d c 0 0
	eor	v1.16b, v1.16b, v1.16b	// vpxor	%xmm1,	%xmm1,	%xmm1
	eor	v6.16b, v6.16b, v0.16b	// vpxor	%xmm0,	%xmm6,	%xmm6	# -> b+c+d b+c b a
	mov	v0.16b, v6.16b		// vmovdqa	%xmm6,	%xmm0
	ins	v6.d[0], v1.d[0]	// vmovhlps	%xmm1,	%xmm6,	%xmm6	# clobber low side with zeros
	ret
.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear

##
##  .aes_schedule_round
##
##  Runs one main round of the key schedule on %xmm0, %xmm7
##
##  Specifically, runs subbytes on the high dword of %xmm0
##  then rotates it by one byte and xors into the low dword of
##  %xmm7.
##
##  Adds rcon from low byte of %xmm8, then rotates %xmm8 for
##  next rcon.
##
##  Smears the dwords of %xmm7 by xoring the low into the
##  second low, result into third, result into highest.
##
##  Returns results in %xmm7 = %xmm0.
##  Clobbers %xmm1-%xmm4, %r11.
##
.type	_vpaes_schedule_round,%function
.align	4
_vpaes_schedule_round:
	// extract rcon from xmm8
	movi	v4.16b, #0			// vpxor	%xmm4,	%xmm4,	%xmm4
	ext	v1.16b, v8.16b, v4.16b, #15	// vpalignr	$15,	%xmm8,	%xmm4,	%xmm1
	ext	v8.16b, v8.16b, v8.16b, #15	// vpalignr	$15,	%xmm8,	%xmm8,	%xmm8
	eor	v7.16b, v7.16b, v1.16b		// vpxor	%xmm1,	%xmm7,	%xmm7

	// rotate
	dup	v0.4s, v0.s[3]			// vpshufd	$0xFF,	%xmm0,	%xmm0
	ext	v0.16b, v0.16b, v0.16b, #1	// vpalignr	$1,	%xmm0,	%xmm0,	%xmm0

	// fall through...

	// low round: same as high round, but no rotation and no rcon.
_vpaes_schedule_low_round:
	// smear xmm7
	ext	v1.16b, v4.16b, v7.16b, #12	// vpslldq	$4,	%xmm7,	%xmm1
	eor	v7.16b, v7.16b, v1.16b		// vpxor	%xmm1,	%xmm7,	%xmm7
	ext	v4.16b, v4.16b, v7.16b, #8	// vpslldq	$8,	%xmm7,	%xmm4

	// subbytes
	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1		# 0 = k
	ushr	v0.16b, v0.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0		# 1 = i
	eor	v7.16b, v7.16b, v4.16b		// vpxor	%xmm4,	%xmm7,	%xmm7
	tbl	v2.16b, {v19.16b}, v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm2		# 2 = a/k
	eor	v1.16b, v1.16b, v0.16b		// vpxor	%xmm0,	%xmm1,	%xmm1		# 0 = j
	tbl	v3.16b, {v18.16b}, v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3		# 3 = 1/i
	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3		# 3 = iak = 1/i + a/k
	tbl	v4.16b, {v18.16b}, v1.16b	// vpshufb	%xmm1,	%xmm10,	%xmm4		# 4 = 1/j
	eor	v7.16b, v7.16b, v16.16b		// vpxor	.Lk_s63(%rip),	%xmm7,	%xmm7
	tbl	v3.16b, {v18.16b}, v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm3		# 2 = 1/iak
	eor	v4.16b, v4.16b, v2.16b		// vpxor	%xmm2,	%xmm4,	%xmm4		# 4 = jak = 1/j + a/k
	tbl	v2.16b, {v18.16b}, v4.16b	// vpshufb	%xmm4,	%xmm10,	%xmm2		# 3 = 1/jak
	eor	v3.16b, v3.16b, v1.16b		// vpxor	%xmm1,	%xmm3,	%xmm3		# 2 = io
	eor	v2.16b, v2.16b, v0.16b		// vpxor	%xmm0,	%xmm2,	%xmm2		# 3 = jo
	tbl	v4.16b, {v23.16b}, v3.16b	// vpshufb	%xmm3,	%xmm13,	%xmm4		# 4 = sbou
	tbl	v1.16b, {v22.16b}, v2.16b	// vpshufb	%xmm2,	%xmm12,	%xmm1		# 0 = sb1t
	eor	v1.16b, v1.16b, v4.16b		// vpxor	%xmm4,	%xmm1,	%xmm1		# 0 = sbox output

	// add in smeared stuff
	eor	v0.16b, v1.16b, v7.16b		// vpxor	%xmm7,	%xmm1,	%xmm0
	eor	v7.16b, v1.16b, v7.16b		// vmovdqa	%xmm0,	%xmm7
	ret
.size	_vpaes_schedule_round,.-_vpaes_schedule_round

##
##  .aes_schedule_transform
##
##  Linear-transform %xmm0 according to tables at (%r11)
##
##  Requires that %xmm9 = 0x0F0F... as in preheat
##  Output in %xmm0
##  Clobbers %xmm1, %xmm2
##
.type	_vpaes_schedule_transform,%function
.align	4
_vpaes_schedule_transform:
	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1
	ushr	v0.16b, v0.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0
						// vmovdqa	(%r11),	%xmm2 	# lo
	tbl	v2.16b, {v20.16b}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm2
						// vmovdqa	16(%r11),	%xmm1 # hi
	tbl	v0.16b, {v21.16b}, v0.16b	// vpshufb	%xmm0,	%xmm1,	%xmm0
	eor	v0.16b, v0.16b, v2.16b		// vpxor	%xmm2,	%xmm0,	%xmm0
	ret
.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform

##
##  .aes_schedule_mangle
##
##  Mangle xmm0 from (basis-transformed) standard version
##  to our version.
##
##  On encrypt,
##    xor with 0x63
##    multiply by circulant 0,1,1,1
##    apply shiftrows transform
##
##  On decrypt,
##    xor with 0x63
##    multiply by "inverse mixcolumns" circulant E,B,D,9
##    deskew
##    apply shiftrows transform
##
##
##  Writes out to (%rdx), and increments or decrements it
##  Keeps track of round number mod 4 in %r8
##  Preserves xmm0
##  Clobbers xmm1-xmm5
##
.type	_vpaes_schedule_mangle,%function
.align	4
_vpaes_schedule_mangle:
	mov	v4.16b, v0.16b			// vmovdqa	%xmm0,	%xmm4	# save xmm0 for later
						// vmovdqa	.Lk_mc_forward(%rip),%xmm5
	cbnz	w3, .Lschedule_mangle_dec

	// encrypting
	eor	v4.16b, v0.16b, v16.16b		// vpxor	.Lk_s63(%rip),	%xmm0,	%xmm4
	add	x2, x2, #16			// add	$16,	%rdx
	tbl	v4.16b, {v4.16b}, v9.16b	// vpshufb	%xmm5,	%xmm4,	%xmm4
	tbl	v1.16b, {v4.16b}, v9.16b	// vpshufb	%xmm5,	%xmm4,	%xmm1
	tbl	v3.16b, {v1.16b}, v9.16b	// vpshufb	%xmm5,	%xmm1,	%xmm3
	eor	v4.16b, v4.16b, v1.16b		// vpxor	%xmm1,	%xmm4,	%xmm4
	ld1	{v1.2d}, [x8]			// vmovdqa	(%r8,%r10),	%xmm1
	eor	v3.16b, v3.16b, v4.16b		// vpxor	%xmm4,	%xmm3,	%xmm3

	b	.Lschedule_mangle_both
.align	4
.Lschedule_mangle_dec:
	// inverse mix columns
						// lea	.Lk_dksd(%rip),%r11
	ushr	v1.16b, v4.16b, #4		// vpsrlb	$4,	%xmm4,	%xmm1	# 1 = hi
	and	v4.16b, v4.16b, v17.16b		// vpand	%xmm9,	%xmm4,	%xmm4	# 4 = lo

						// vmovdqa	0x00(%r11),	%xmm2
	tbl	v2.16b, {v24.16b}, v4.16b	// vpshufb	%xmm4,	%xmm2,	%xmm2
						// vmovdqa	0x10(%r11),	%xmm3
	tbl	v3.16b,	{v25.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3
	tbl	v3.16b, {v3.16b}, v9.16b	// vpshufb	%xmm5,	%xmm3,	%xmm3

						// vmovdqa	0x20(%r11),	%xmm2
	tbl	v2.16b, {v26.16b}, v4.16b	// vpshufb	%xmm4,	%xmm2,	%xmm2
	eor	v2.16b, v2.16b, v3.16b		// vpxor	%xmm3,	%xmm2,	%xmm2
						// vmovdqa	0x30(%r11),	%xmm3
	tbl	v3.16b, {v27.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3
	tbl	v3.16b, {v3.16b}, v9.16b	// vpshufb	%xmm5,	%xmm3,	%xmm3

						// vmovdqa	0x40(%r11),	%xmm2
	tbl	v2.16b, {v28.16b}, v4.16b	// vpshufb	%xmm4,	%xmm2,	%xmm2
	eor	v2.16b, v2.16b, v3.16b		// vpxor	%xmm3,	%xmm2,	%xmm2
						// vmovdqa	0x50(%r11),	%xmm3
	tbl	v3.16b, {v29.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3

						// vmovdqa	0x60(%r11),	%xmm2
	tbl	v2.16b, {v30.16b}, v4.16b	// vpshufb	%xmm4,	%xmm2,	%xmm2
	tbl	v3.16b, {v3.16b}, v9.16b	// vpshufb	%xmm5,	%xmm3,	%xmm3
						// vmovdqa	0x70(%r11),	%xmm4
	tbl	v4.16b, {v31.16b}, v1.16b	// vpshufb	%xmm1,	%xmm4,	%xmm4
	ld1	{v1.2d}, [x8]			// vmovdqa	(%r8,%r10),	%xmm1
	eor	v2.16b, v2.16b, v3.16b		// vpxor	%xmm3,	%xmm2,	%xmm2
	eor	v3.16b, v4.16b, v2.16b		// vpxor	%xmm2,	%xmm4,	%xmm3

	sub	x2, x2, #16			// add	$-16,	%rdx

.Lschedule_mangle_both:
	tbl	v3.16b, {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
	add	x8, x8, #64-16			// add	$-16,	%r8
	and	x8, x8, #~(1<<6)		// and	$0x30,	%r8
	st1	{v3.2d}, [x2]			// vmovdqu	%xmm3,	(%rdx)
	ret
.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle

.globl	vpaes_set_encrypt_key
.type	vpaes_set_encrypt_key,%function
.align	4
vpaes_set_encrypt_key:
	stp	x29,x30,[sp,#-16]!
	add	x29,sp,#0
	stp	d8,d9,[sp,#-16]!	// ABI spec says so

	lsr	w9, w1, #5		// shr	$5,%eax
	add	w9, w9, #5		// $5,%eax
	str	w9, [x2,#240]		// mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;

	mov	w3, #0		// mov	$0,%ecx
	mov	x8, #0x30		// mov	$0x30,%r8d
	bl	_vpaes_schedule_core
	eor	x0, x0, x0

	ldp	d8,d9,[sp],#16
	ldp	x29,x30,[sp],#16
	ret
.size	vpaes_set_encrypt_key,.-vpaes_set_encrypt_key

.globl	vpaes_set_decrypt_key
.type	vpaes_set_decrypt_key,%function
.align	4
vpaes_set_decrypt_key:
	stp	x29,x30,[sp,#-16]!
	add	x29,sp,#0
	stp	d8,d9,[sp,#-16]!	// ABI spec says so

	lsr	w9, w1, #5		// shr	$5,%eax
	add	w9, w9, #5		// $5,%eax
	str	w9, [x2,#240]		// mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
	lsl	w9, w9, #4		// shl	$4,%eax
	add	x2, x2, #16		// lea	16(%rdx,%rax),%rdx
	add	x2, x2, x9

	mov	w3, #1		// mov	$1,%ecx
	lsr	w8, w1, #1		// shr	$1,%r8d
	and	x8, x8, #32		// and	$32,%r8d
	eor	x8, x8, #32		// xor	$32,%r8d	# nbits==192?0:32
	bl	_vpaes_schedule_core

	ldp	d8,d9,[sp],#16
	ldp	x29,x30,[sp],#16
	ret
.size	vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
.globl	vpaes_cbc_encrypt
.type	vpaes_cbc_encrypt,%function
.align	4
vpaes_cbc_encrypt:
	cbz	x2, .Lcbc_abort
	cmp	w5, #0			// check direction
	b.eq	vpaes_cbc_decrypt

	stp	x29,x30,[sp,#-16]!
	add	x29,sp,#0

	mov	x17, x2		// reassign
	mov	x2,  x3		// reassign

	ld1	{v0.16b}, [x4]	// load ivec
	bl	_vpaes_encrypt_preheat
	b	.Lcbc_enc_loop

.align	4
.Lcbc_enc_loop:
	ld1	{v7.16b}, [x0],#16	// load input
	eor	v7.16b, v7.16b, v0.16b	// xor with ivec
	bl	_vpaes_encrypt_core
	st1	{v0.16b}, [x1],#16	// save output
	subs	x17, x17, #16
	b.hi	.Lcbc_enc_loop

	st1	{v0.16b}, [x4]	// write ivec

	ldp	x29,x30,[sp],#16
.Lcbc_abort:
	ret
.size	vpaes_cbc_encrypt,.-vpaes_cbc_encrypt

.type	vpaes_cbc_decrypt,%function
.align	4
vpaes_cbc_decrypt:
	stp	x29,x30,[sp,#-16]!
	add	x29,sp,#0
	stp	d8,d9,[sp,#-16]!	// ABI spec says so
	stp	d10,d11,[sp,#-16]!
	stp	d12,d13,[sp,#-16]!
	stp	d14,d15,[sp,#-16]!

	mov	x17, x2		// reassign
	mov	x2,  x3		// reassign
	ld1	{v6.16b}, [x4]	// load ivec
	bl	_vpaes_decrypt_preheat
	tst	x17, #16
	b.eq	.Lcbc_dec_loop2x

	ld1	{v7.16b}, [x0], #16	// load input
	bl	_vpaes_decrypt_core
	eor	v0.16b, v0.16b, v6.16b	// xor with ivec
	orr	v6.16b, v7.16b, v7.16b	// next ivec value
	st1	{v0.16b}, [x1], #16
	subs	x17, x17, #16
	b.ls	.Lcbc_dec_done

.align	4
.Lcbc_dec_loop2x:
	ld1	{v14.16b,v15.16b}, [x0], #32
	bl	_vpaes_decrypt_2x
	eor	v0.16b, v0.16b, v6.16b	// xor with ivec
	eor	v1.16b, v1.16b, v14.16b
	orr	v6.16b, v15.16b, v15.16b
	st1	{v0.16b,v1.16b}, [x1], #32
	subs	x17, x17, #32
	b.hi	.Lcbc_dec_loop2x

.Lcbc_dec_done:
	st1	{v6.16b}, [x4]

	ldp	d14,d15,[sp],#16
	ldp	d12,d13,[sp],#16
	ldp	d10,d11,[sp],#16
	ldp	d8,d9,[sp],#16
	ldp	x29,x30,[sp],#16
	ret
.size	vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
.globl	vpaes_ecb_encrypt
.type	vpaes_ecb_encrypt,%function
.align	4
vpaes_ecb_encrypt:
	stp	x29,x30,[sp,#-16]!
	add	x29,sp,#0
	stp	d8,d9,[sp,#-16]!	// ABI spec says so
	stp	d10,d11,[sp,#-16]!
	stp	d12,d13,[sp,#-16]!
	stp	d14,d15,[sp,#-16]!

	mov	x17, x2
	mov	x2,  x3
	bl	_vpaes_encrypt_preheat
	tst	x17, #16
	b.eq	.Lecb_enc_loop

	ld1	{v7.16b}, [x0],#16
	bl	_vpaes_encrypt_core
	st1	{v0.16b}, [x1],#16
	subs	x17, x17, #16
	b.ls	.Lecb_enc_done

.align	4
.Lecb_enc_loop:
	ld1	{v14.16b,v15.16b}, [x0], #32
	bl	_vpaes_encrypt_2x
	st1	{v0.16b,v1.16b}, [x1], #32
	subs	x17, x17, #32
	b.hi	.Lecb_enc_loop

.Lecb_enc_done:
	ldp	d14,d15,[sp],#16
	ldp	d12,d13,[sp],#16
	ldp	d10,d11,[sp],#16
	ldp	d8,d9,[sp],#16
	ldp	x29,x30,[sp],#16
	ret
.size	vpaes_ecb_encrypt,.-vpaes_ecb_encrypt

.globl	vpaes_ecb_decrypt
.type	vpaes_ecb_decrypt,%function
.align	4
vpaes_ecb_decrypt:
	stp	x29,x30,[sp,#-16]!
	add	x29,sp,#0
	stp	d8,d9,[sp,#-16]!	// ABI spec says so
	stp	d10,d11,[sp,#-16]!
	stp	d12,d13,[sp,#-16]!
	stp	d14,d15,[sp,#-16]!

	mov	x17, x2
	mov	x2,  x3
	bl	_vpaes_decrypt_preheat
	tst	x17, #16
	b.eq	.Lecb_dec_loop

	ld1	{v7.16b}, [x0],#16
	bl	_vpaes_encrypt_core
	st1	{v0.16b}, [x1],#16
	subs	x17, x17, #16
	b.ls	.Lecb_dec_done

.align	4
.Lecb_dec_loop:
	ld1	{v14.16b,v15.16b}, [x0], #32
	bl	_vpaes_decrypt_2x
	st1	{v0.16b,v1.16b}, [x1], #32
	subs	x17, x17, #32
	b.hi	.Lecb_dec_loop

.Lecb_dec_done:
	ldp	d14,d15,[sp],#16
	ldp	d12,d13,[sp],#16
	ldp	d10,d11,[sp],#16
	ldp	d8,d9,[sp],#16
	ldp	x29,x30,[sp],#16
	ret
.size	vpaes_ecb_decrypt,.-vpaes_ecb_decrypt


================================================
FILE: lib/aes_acc/asm/arm_arch.h
================================================
/*
 * Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
 *
 * Licensed under the OpenSSL license (the "License").  You may not use
 * this file except in compliance with the License.  You can obtain a copy
 * in the file LICENSE in the source distribution or at
 * https://www.openssl.org/source/license.html
 */

#ifndef UDP2RAW_ARM_ARCH_H_
# define UDP2RAW_ARM_ARCH_H_

# if !defined(__ARM_ARCH__)
#  if defined(__CC_ARM)
#   define __ARM_ARCH__ __TARGET_ARCH_ARM
#   if defined(__BIG_ENDIAN)
#    define __ARMEB__
#   else
#    define __ARMEL__
#   endif
#  elif defined(__GNUC__)
#   if   defined(__aarch64__)
#    define __ARM_ARCH__ 8
#    if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
#     define __ARMEB__
#    else
#     define __ARMEL__
#    endif
  /*
   * Why doesn't gcc define __ARM_ARCH__? Instead it defines
   * bunch of below macros. See all_architectires[] table in
   * gcc/config/arm/arm.c. On a side note it defines
   * __ARMEL__/__ARMEB__ for little-/big-endian.
   */
#   elif defined(__ARM_ARCH)
#    define __ARM_ARCH__ __ARM_ARCH
#   elif defined(__ARM_ARCH_8A__)
#    define __ARM_ARCH__ 8
#   elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)     || \
        defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__)     || \
        defined(__ARM_ARCH_7EM__)
#    define __ARM_ARCH__ 7
#   elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__)     || \
        defined(__ARM_ARCH_6K__)|| defined(__ARM_ARCH_6M__)     || \
        defined(__ARM_ARCH_6Z__)|| defined(__ARM_ARCH_6ZK__)    || \
        defined(__ARM_ARCH_6T2__)
#    define __ARM_ARCH__ 6
#   elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__)     || \
        defined(__ARM_ARCH_5E__)|| defined(__ARM_ARCH_5TE__)    || \
        defined(__ARM_ARCH_5TEJ__)
#    define __ARM_ARCH__ 5
#   elif defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__)
#    define __ARM_ARCH__ 4
#   else
#    error "unsupported ARM architecture"
#   endif
#  endif
# endif

# if !defined(__ARM_MAX_ARCH__)
#  define __ARM_MAX_ARCH__ __ARM_ARCH__
# endif

# if __ARM_MAX_ARCH__<__ARM_ARCH__
#  error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__"
# elif __ARM_MAX_ARCH__!=__ARM_ARCH__
#  if __ARM_ARCH__<7 && __ARM_MAX_ARCH__>=7 && defined(__ARMEB__)
#   error "can't build universal big-endian binary"
#  endif
# endif

# if !__ASSEMBLER__
extern unsigned int OPENSSL_armcap_P;
# endif

# define ARMV7_NEON      (1<<0)
# define ARMV7_TICK      (1<<1)
# define ARMV8_AES       (1<<2)
# define ARMV8_SHA1      (1<<3)
# define ARMV8_SHA256    (1<<4)
# define ARMV8_PMULL     (1<<5)

#endif


================================================
FILE: lib/aes_acc/asm/mips.S
================================================
.text
#ifdef OPENSSL_FIPSCANISTER
# include <openssl/fipssyms.h>
#endif

#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
#define _MIPS_ARCH_MIPS32R2
#endif

#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
.option	pic2
#endif
.set	noat
.align	5
.ent	_mips_AES_encrypt
_mips_AES_encrypt:
	.frame	$29,0,$31
	.set	reorder
	lw	$12,0($6)
	lw	$13,4($6)
	lw	$14,8($6)
	lw	$15,12($6)
	lw	$30,240($6)
	add $3,$6,16

	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15

	sub	$30,1
#if defined(__mips_smartmips)
	ext	$1,$9,8,8
.Loop_enc:
	ext	$2,$10,8,8
	ext	$24,$11,8,8
	ext	$25,$8,8,8
	lwxs	$12,$1($7)		# Te1[s1>>16]
	ext	$1,$10,16,8
	lwxs	$13,$2($7)		# Te1[s2>>16]
	ext	$2,$11,16,8
	lwxs	$14,$24($7)		# Te1[s3>>16]
	ext	$24,$8,16,8
	lwxs	$15,$25($7)		# Te1[s0>>16]
	ext	$25,$9,16,8

	lwxs	$16,$1($7)		# Te2[s2>>8]
	ext	$1,$11,24,8
	lwxs	$17,$2($7)		# Te2[s3>>8]
	ext	$2,$8,24,8
	lwxs	$18,$24($7)		# Te2[s0>>8]
	ext	$24,$9,24,8
	lwxs	$19,$25($7)		# Te2[s1>>8]
	ext	$25,$10,24,8

	lwxs	$20,$1($7)		# Te3[s3]
	ext	$1,$8,0,8
	lwxs	$21,$2($7)		# Te3[s0]
	ext	$2,$9,0,8
	lwxs	$22,$24($7)		# Te3[s1]
	ext	$24,$10,0,8
	lwxs	$23,$25($7)		# Te3[s2]
	ext	$25,$11,0,8

	rotr	$12,$12,24
	rotr	$13,$13,24
	rotr	$14,$14,24
	rotr	$15,$15,24

	rotr	$16,$16,16
	rotr	$17,$17,16
	rotr	$18,$18,16
	rotr	$19,$19,16

	xor	$12,$16
	lwxs	$16,$1($7)		# Te0[s0>>24]
	xor	$13,$17
	lwxs	$17,$2($7)		# Te0[s1>>24]
	xor	$14,$18
	lwxs	$18,$24($7)		# Te0[s2>>24]
	xor	$15,$19
	lwxs	$19,$25($7)		# Te0[s3>>24]

	rotr	$20,$20,8
	lw	$8,0($3)
	rotr	$21,$21,8
	lw	$9,4($3)
	rotr	$22,$22,8
	lw	$10,8($3)
	rotr	$23,$23,8
	lw	$11,12($3)

	xor	$12,$20
	xor	$13,$21
	xor	$14,$22
	xor	$15,$23

	xor	$12,$16
	xor	$13,$17
	xor	$14,$18
	xor	$15,$19

	sub	$30,1
	add $3,16
	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15
	.set	noreorder
	bnez	$30,.Loop_enc
	ext	$1,$9,8,8

	srl	$1,$9,6
#else
	srl	$1,$9,6
.Loop_enc:
	srl	$2,$10,6
	srl	$24,$11,6
	srl	$25,$8,6
	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	lw	$12,0($1)		# Te1[s1>>16]
	srl	$1,$10,14
	lw	$13,0($2)		# Te1[s2>>16]
	srl	$2,$11,14
	lw	$14,0($24)		# Te1[s3>>16]
	srl	$24,$8,14
	lw	$15,0($25)		# Te1[s0>>16]
	srl	$25,$9,14
#else
	lwl	$12,2($1)		# Te1[s1>>16]
	lwl	$13,2($2)		# Te1[s2>>16]
	lwl	$14,2($24)		# Te1[s3>>16]
	lwl	$15,2($25)		# Te1[s0>>16]
	lwr	$12,3($1)		# Te1[s1>>16]
	srl	$1,$10,14
	lwr	$13,3($2)		# Te1[s2>>16]
	srl	$2,$11,14
	lwr	$14,3($24)		# Te1[s3>>16]
	srl	$24,$8,14
	lwr	$15,3($25)		# Te1[s0>>16]
	srl	$25,$9,14
#endif
	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	rotr	$12,$12,24
	rotr	$13,$13,24
	rotr	$14,$14,24
	rotr	$15,$15,24
# if defined(_MIPSEL)
	lw	$16,0($1)		# Te2[s2>>8]
	srl	$1,$11,22
	lw	$17,0($2)		# Te2[s3>>8]
	srl	$2,$8,22
	lw	$18,0($24)		# Te2[s0>>8]
	srl	$24,$9,22
	lw	$19,0($25)		# Te2[s1>>8]
	srl	$25,$10,22

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lw	$20,0($1)		# Te3[s3]
	ins $1,$8,2,8
	lw	$21,0($2)		# Te3[s0]
	ins $2,$9,2,8
	lw	$22,0($24)		# Te3[s1]
	ins $24,$10,2,8
	lw	$23,0($25)		# Te3[s2]
	ins $25,$11,2,8
# else
	lw	$16,0($1)		# Te2[s2>>8]
	ins $1,$11,2,8
	lw	$17,0($2)		# Te2[s3>>8]
	ins $2,$8,2,8
	lw	$18,0($24)		# Te2[s0>>8]
	ins $24,$9,2,8
	lw	$19,0($25)		# Te2[s1>>8]
	ins $25,$10,2,8

	lw	$20,0($1)		# Te3[s3]
	sll	$1,$8,2
	lw	$21,0($2)		# Te3[s0]
	sll	$2,$9,2
	lw	$22,0($24)		# Te3[s1]
	sll	$24,$10,2
	lw	$23,0($25)		# Te3[s2]
	sll	$25,$11,2

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
# endif
	rotr	$16,$16,16
	rotr	$17,$17,16
	rotr	$18,$18,16
	rotr	$19,$19,16

	rotr	$20,$20,8
	rotr	$21,$21,8
	rotr	$22,$22,8
	rotr	$23,$23,8
#else
	lwl	$16,1($1)		# Te2[s2>>8]
	lwl	$17,1($2)		# Te2[s3>>8]
	lwl	$18,1($24)		# Te2[s0>>8]
	lwl	$19,1($25)		# Te2[s1>>8]
	lwr	$16,2($1)		# Te2[s2>>8]
	srl	$1,$11,22
	lwr	$17,2($2)		# Te2[s3>>8]
	srl	$2,$8,22
	lwr	$18,2($24)		# Te2[s0>>8]
	srl	$24,$9,22
	lwr	$19,2($25)		# Te2[s1>>8]
	srl	$25,$10,22

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lwl	$20,0($1)		# Te3[s3]
	lwl	$21,0($2)		# Te3[s0]
	lwl	$22,0($24)		# Te3[s1]
	lwl	$23,0($25)		# Te3[s2]
	lwr	$20,1($1)		# Te3[s3]
	sll	$1,$8,2
	lwr	$21,1($2)		# Te3[s0]
	sll	$2,$9,2
	lwr	$22,1($24)		# Te3[s1]
	sll	$24,$10,2
	lwr	$23,1($25)		# Te3[s2]
	sll	$25,$11,2

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#endif
	xor	$12,$16
	lw	$16,0($1)		# Te0[s0>>24]
	xor	$13,$17
	lw	$17,0($2)		# Te0[s1>>24]
	xor	$14,$18
	lw	$18,0($24)		# Te0[s2>>24]
	xor	$15,$19
	lw	$19,0($25)		# Te0[s3>>24]

	xor	$12,$20
	lw	$8,0($3)
	xor	$13,$21
	lw	$9,4($3)
	xor	$14,$22
	lw	$10,8($3)
	xor	$15,$23
	lw	$11,12($3)

	xor	$12,$16
	xor	$13,$17
	xor	$14,$18
	xor	$15,$19

	sub	$30,1
	add $3,16
	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15
	.set	noreorder
	bnez	$30,.Loop_enc
	srl	$1,$9,6
#endif

	.set	reorder
	srl	$2,$10,6
	srl	$24,$11,6
	srl	$25,$8,6
	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$12,2($1)		# Te4[s1>>16]
	srl	$1,$10,14
	lbu	$13,2($2)		# Te4[s2>>16]
	srl	$2,$11,14
	lbu	$14,2($24)		# Te4[s3>>16]
	srl	$24,$8,14
	lbu	$15,2($25)		# Te4[s0>>16]
	srl	$25,$9,14

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
# if defined(_MIPSEL)
	lbu	$16,2($1)		# Te4[s2>>8]
	ins $1,$8,2,8
	lbu	$17,2($2)		# Te4[s3>>8]
	ins $2,$9,2,8
	lbu	$18,2($24)		# Te4[s0>>8]
	ins $24,$10,2,8
	lbu	$19,2($25)		# Te4[s1>>8]
	ins $25,$11,2,8

	lbu	$20,2($1)		# Te4[s0>>24]
	srl	$1,$11,22
	lbu	$21,2($2)		# Te4[s1>>24]
	srl	$2,$8,22
	lbu	$22,2($24)		# Te4[s2>>24]
	srl	$24,$9,22
	lbu	$23,2($25)		# Te4[s3>>24]
	srl	$25,$10,22

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
# else
	lbu	$16,2($1)		# Te4[s2>>8]
	sll	$1,$8,2
	lbu	$17,2($2)		# Te4[s3>>8]
	sll	$2,$9,2
	lbu	$18,2($24)		# Te4[s0>>8]
	sll	$24,$10,2
	lbu	$19,2($25)		# Te4[s1>>8]
	sll	$25,$11,2

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$20,2($1)		# Te4[s0>>24]
	ins $1,$11,2,8
	lbu	$21,2($2)		# Te4[s1>>24]
	ins $2,$8,2,8
	lbu	$22,2($24)		# Te4[s2>>24]
	ins $24,$9,2,8
	lbu	$23,2($25)		# Te4[s3>>24]
	ins $25,$10,2,8
# endif
	sll	$12,$12,8
	sll	$13,$13,8
	sll	$14,$14,8
	sll	$15,$15,8

	ins	$12,$16,16,8
	lbu	$16,2($1)		# Te4[s3]
	ins	$13,$17,16,8
	lbu	$17,2($2)		# Te4[s0]
	ins	$14,$18,16,8
	lbu	$18,2($24)		# Te4[s1]
	ins	$15,$19,16,8
	lbu	$19,2($25)		# Te4[s2]

	ins	$12,$20,0,8
	lw	$8,0($3)
	ins	$13,$21,0,8
	lw	$9,4($3)
	ins	$14,$22,0,8
	lw	$10,8($3)
	ins	$15,$23,0,8
	lw	$11,12($3)

	ins	$12,$16,24,8
	ins	$13,$17,24,8
	ins	$14,$18,24,8
	ins	$15,$19,24,8
#else
	lbu	$16,2($1)		# Te4[s2>>8]
	sll	$1,$8,2
	lbu	$17,2($2)		# Te4[s3>>8]
	sll	$2,$9,2
	lbu	$18,2($24)		# Te4[s0>>8]
	sll	$24,$10,2
	lbu	$19,2($25)		# Te4[s1>>8]
	sll	$25,$11,2

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$20,2($1)		# Te4[s0>>24]
	srl	$1,$11,22
	lbu	$21,2($2)		# Te4[s1>>24]
	srl	$2,$8,22
	lbu	$22,2($24)		# Te4[s2>>24]
	srl	$24,$9,22
	lbu	$23,2($25)		# Te4[s3>>24]
	srl	$25,$10,22

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7

	sll	$12,$12,8
	sll	$13,$13,8
	sll	$14,$14,8
	sll	$15,$15,8

	sll	$16,$16,16
	sll	$17,$17,16
	sll	$18,$18,16
	sll	$19,$19,16

	xor	$12,$16
	lbu	$16,2($1)		# Te4[s3]
	xor	$13,$17
	lbu	$17,2($2)		# Te4[s0]
	xor	$14,$18
	lbu	$18,2($24)		# Te4[s1]
	xor	$15,$19
	lbu	$19,2($25)		# Te4[s2]

	#sll	$20,$20,0
	lw	$8,0($3)
	#sll	$21,$21,0
	lw	$9,4($3)
	#sll	$22,$22,0
	lw	$10,8($3)
	#sll	$23,$23,0
	lw	$11,12($3)

	xor	$12,$20
	xor	$13,$21
	xor	$14,$22
	xor	$15,$23

	sll	$16,$16,24
	sll	$17,$17,24
	sll	$18,$18,24
	sll	$19,$19,24

	xor	$12,$16
	xor	$13,$17
	xor	$14,$18
	xor	$15,$19
#endif
	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15

	jr	$31
.end	_mips_AES_encrypt

.align	5
.globl	AES_encrypt
.ent	AES_encrypt
AES_encrypt:
	.frame	$29,64,$31
	.mask	0xc0ff0000,-4
	.set	noreorder
	.cpload	$25
	sub $29,64
	sw	$31,64-1*4($29)
	sw	$30,64-2*4($29)
	sw	$23,64-3*4($29)
	sw	$22,64-4*4($29)
	sw	$21,64-5*4($29)
	sw	$20,64-6*4($29)
	sw	$19,64-7*4($29)
	sw	$18,64-8*4($29)
	sw	$17,64-9*4($29)
	sw	$16,64-10*4($29)
	.set	reorder
	la	$7,AES_Te		# PIC-ified 'load address'

	lwl	$8,0+3($4)
	lwl	$9,4+3($4)
	lwl	$10,8+3($4)
	lwl	$11,12+3($4)
	lwr	$8,0+0($4)
	lwr	$9,4+0($4)
	lwr	$10,8+0($4)
	lwr	$11,12+0($4)

	bal	_mips_AES_encrypt

	swr	$8,0+0($5)
	swr	$9,4+0($5)
	swr	$10,8+0($5)
	swr	$11,12+0($5)
	swl	$8,0+3($5)
	swl	$9,4+3($5)
	swl	$10,8+3($5)
	swl	$11,12+3($5)

	.set	noreorder
	lw	$31,64-1*4($29)
	lw	$30,64-2*4($29)
	lw	$23,64-3*4($29)
	lw	$22,64-4*4($29)
	lw	$21,64-5*4($29)
	lw	$20,64-6*4($29)
	lw	$19,64-7*4($29)
	lw	$18,64-8*4($29)
	lw	$17,64-9*4($29)
	lw	$16,64-10*4($29)
	jr	$31
	add $29,64
.end	AES_encrypt
.align	5
.ent	_mips_AES_decrypt
_mips_AES_decrypt:
	.frame	$29,0,$31
	.set	reorder
	lw	$12,0($6)
	lw	$13,4($6)
	lw	$14,8($6)
	lw	$15,12($6)
	lw	$30,240($6)
	add $3,$6,16

	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15

	sub	$30,1
#if defined(__mips_smartmips)
	ext	$1,$11,8,8
.Loop_dec:
	ext	$2,$8,8,8
	ext	$24,$9,8,8
	ext	$25,$10,8,8
	lwxs	$12,$1($7)		# Td1[s3>>16]
	ext	$1,$10,16,8
	lwxs	$13,$2($7)		# Td1[s0>>16]
	ext	$2,$11,16,8
	lwxs	$14,$24($7)		# Td1[s1>>16]
	ext	$24,$8,16,8
	lwxs	$15,$25($7)		# Td1[s2>>16]
	ext	$25,$9,16,8

	lwxs	$16,$1($7)		# Td2[s2>>8]
	ext	$1,$9,24,8
	lwxs	$17,$2($7)		# Td2[s3>>8]
	ext	$2,$10,24,8
	lwxs	$18,$24($7)		# Td2[s0>>8]
	ext	$24,$11,24,8
	lwxs	$19,$25($7)		# Td2[s1>>8]
	ext	$25,$8,24,8

	lwxs	$20,$1($7)		# Td3[s1]
	ext	$1,$8,0,8
	lwxs	$21,$2($7)		# Td3[s2]
	ext	$2,$9,0,8
	lwxs	$22,$24($7)		# Td3[s3]
	ext	$24,$10,0,8
	lwxs	$23,$25($7)		# Td3[s0]
	ext	$25,$11,0,8

	rotr	$12,$12,24
	rotr	$13,$13,24
	rotr	$14,$14,24
	rotr	$15,$15,24

	rotr	$16,$16,16
	rotr	$17,$17,16
	rotr	$18,$18,16
	rotr	$19,$19,16

	xor	$12,$16
	lwxs	$16,$1($7)		# Td0[s0>>24]
	xor	$13,$17
	lwxs	$17,$2($7)		# Td0[s1>>24]
	xor	$14,$18
	lwxs	$18,$24($7)		# Td0[s2>>24]
	xor	$15,$19
	lwxs	$19,$25($7)		# Td0[s3>>24]

	rotr	$20,$20,8
	lw	$8,0($3)
	rotr	$21,$21,8
	lw	$9,4($3)
	rotr	$22,$22,8
	lw	$10,8($3)
	rotr	$23,$23,8
	lw	$11,12($3)

	xor	$12,$20
	xor	$13,$21
	xor	$14,$22
	xor	$15,$23

	xor	$12,$16
	xor	$13,$17
	xor	$14,$18
	xor	$15,$19

	sub	$30,1
	add $3,16
	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15
	.set	noreorder
	bnez	$30,.Loop_dec
	ext	$1,$11,8,8

	srl	$1,$11,6
#else
	srl	$1,$11,6
.Loop_dec:
	srl	$2,$8,6
	srl	$24,$9,6
	srl	$25,$10,6
	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	lw	$12,0($1)		# Td1[s3>>16]
	srl	$1,$10,14
	lw	$13,0($2)		# Td1[s0>>16]
	srl	$2,$11,14
	lw	$14,0($24)		# Td1[s1>>16]
	srl	$24,$8,14
	lw	$15,0($25)		# Td1[s2>>16]
	srl	$25,$9,14
#else
	lwl	$12,2($1)		# Td1[s3>>16]
	lwl	$13,2($2)		# Td1[s0>>16]
	lwl	$14,2($24)		# Td1[s1>>16]
	lwl	$15,2($25)		# Td1[s2>>16]
	lwr	$12,3($1)		# Td1[s3>>16]
	srl	$1,$10,14
	lwr	$13,3($2)		# Td1[s0>>16]
	srl	$2,$11,14
	lwr	$14,3($24)		# Td1[s1>>16]
	srl	$24,$8,14
	lwr	$15,3($25)		# Td1[s2>>16]
	srl	$25,$9,14
#endif

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	rotr	$12,$12,24
	rotr	$13,$13,24
	rotr	$14,$14,24
	rotr	$15,$15,24
# if defined(_MIPSEL)
	lw	$16,0($1)		# Td2[s2>>8]
	srl	$1,$9,22
	lw	$17,0($2)		# Td2[s3>>8]
	srl	$2,$10,22
	lw	$18,0($24)		# Td2[s0>>8]
	srl	$24,$11,22
	lw	$19,0($25)		# Td2[s1>>8]
	srl	$25,$8,22

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lw	$20,0($1)		# Td3[s1]
	ins $1,$8,2,8
	lw	$21,0($2)		# Td3[s2]
	ins $2,$9,2,8
	lw	$22,0($24)		# Td3[s3]
	ins $24,$10,2,8
	lw	$23,0($25)		# Td3[s0]
	ins $25,$11,2,8
#else
	lw	$16,0($1)		# Td2[s2>>8]
	ins $1,$9,2,8
	lw	$17,0($2)		# Td2[s3>>8]
	ins $2,$10,2,8
	lw	$18,0($24)		# Td2[s0>>8]
	ins $24,$11,2,8
	lw	$19,0($25)		# Td2[s1>>8]
	ins $25,$8,2,8

	lw	$20,0($1)		# Td3[s1]
	sll	$1,$8,2
	lw	$21,0($2)		# Td3[s2]
	sll	$2,$9,2
	lw	$22,0($24)		# Td3[s3]
	sll	$24,$10,2
	lw	$23,0($25)		# Td3[s0]
	sll	$25,$11,2

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#endif
	rotr	$16,$16,16
	rotr	$17,$17,16
	rotr	$18,$18,16
	rotr	$19,$19,16

	rotr	$20,$20,8
	rotr	$21,$21,8
	rotr	$22,$22,8
	rotr	$23,$23,8
#else
	lwl	$16,1($1)		# Td2[s2>>8]
	lwl	$17,1($2)		# Td2[s3>>8]
	lwl	$18,1($24)		# Td2[s0>>8]
	lwl	$19,1($25)		# Td2[s1>>8]
	lwr	$16,2($1)		# Td2[s2>>8]
	srl	$1,$9,22
	lwr	$17,2($2)		# Td2[s3>>8]
	srl	$2,$10,22
	lwr	$18,2($24)		# Td2[s0>>8]
	srl	$24,$11,22
	lwr	$19,2($25)		# Td2[s1>>8]
	srl	$25,$8,22

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lwl	$20,0($1)		# Td3[s1]
	lwl	$21,0($2)		# Td3[s2]
	lwl	$22,0($24)		# Td3[s3]
	lwl	$23,0($25)		# Td3[s0]
	lwr	$20,1($1)		# Td3[s1]
	sll	$1,$8,2
	lwr	$21,1($2)		# Td3[s2]
	sll	$2,$9,2
	lwr	$22,1($24)		# Td3[s3]
	sll	$24,$10,2
	lwr	$23,1($25)		# Td3[s0]
	sll	$25,$11,2

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#endif

	xor	$12,$16
	lw	$16,0($1)		# Td0[s0>>24]
	xor	$13,$17
	lw	$17,0($2)		# Td0[s1>>24]
	xor	$14,$18
	lw	$18,0($24)		# Td0[s2>>24]
	xor	$15,$19
	lw	$19,0($25)		# Td0[s3>>24]

	xor	$12,$20
	lw	$8,0($3)
	xor	$13,$21
	lw	$9,4($3)
	xor	$14,$22
	lw	$10,8($3)
	xor	$15,$23
	lw	$11,12($3)

	xor	$12,$16
	xor	$13,$17
	xor	$14,$18
	xor	$15,$19

	sub	$30,1
	add $3,16
	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15
	.set	noreorder
	bnez	$30,.Loop_dec
	srl	$1,$11,6
#endif

	.set	reorder
	lw	$16,1024($7)		# prefetch Td4
	srl	$1,$11,8
	lw	$17,1024+32($7)
	srl	$2,$8,8
	lw	$18,1024+64($7)
	srl	$24,$9,8
	lw	$19,1024+96($7)
	srl	$25,$10,8
	lw	$20,1024+128($7)
	and	$1,0xff
	lw	$21,1024+160($7)
	and	$2,0xff
	lw	$22,1024+192($7)
	and	$24,0xff
	lw	$23,1024+224($7)
	and	$25,0xff

	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$12,1024($1)		# Td4[s3>>16]
	srl	$1,$10,16
	lbu	$13,1024($2)		# Td4[s0>>16]
	srl	$2,$11,16
	lbu	$14,1024($24)		# Td4[s1>>16]
	srl	$24,$8,16
	lbu	$15,1024($25)		# Td4[s2>>16]
	srl	$25,$9,16

	and	$1,0xff
	and	$2,0xff
	and	$24,0xff
	and	$25,0xff
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
# if defined(_MIPSEL)
	lbu	$16,1024($1)		# Td4[s2>>8]
	ins $1,$8,0,8
	lbu	$17,1024($2)		# Td4[s3>>8]
	ins $2,$9,0,8
	lbu	$18,1024($24)		# Td4[s0>>8]
	ins $24,$10,0,8
	lbu	$19,1024($25)		# Td4[s1>>8]
	ins $25,$11,0,8

	lbu	$20,1024($1)		# Td4[s0>>24]
	srl	$1,$9,24
	lbu	$21,1024($2)		# Td4[s1>>24]
	srl	$2,$10,24
	lbu	$22,1024($24)		# Td4[s2>>24]
	srl	$24,$11,24
	lbu	$23,1024($25)		# Td4[s3>>24]
	srl	$25,$8,24

	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
# else
	lbu	$16,1024($1)		# Td4[s2>>8]
	and	$1,$8,0xff
	lbu	$17,1024($2)		# Td4[s3>>8]
	and	$2,$9,0xff
	lbu	$18,1024($24)		# Td4[s0>>8]
	and	$24,$10,0xff
	lbu	$19,1024($25)		# Td4[s1>>8]
	and	$25,$11,0xff

	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$20,1024($1)		# Td4[s0>>24]
	ins $1,$9,0,8
	lbu	$21,1024($2)		# Td4[s1>>24]
	ins $2,$10,0,8
	lbu	$22,1024($24)		# Td4[s2>>24]
	ins $24,$11,0,8
	lbu	$23,1024($25)		# Td4[s3>>24]
	ins $25,$8,0,8
# endif
	sll	$12,$12,8
	sll	$13,$13,8
	sll	$14,$14,8
	sll	$15,$15,8

	ins	$12,$16,16,8
	lbu	$16,1024($1)		# Td4[s1]
	ins	$13,$17,16,8
	lbu	$17,1024($2)		# Td4[s2]
	ins	$14,$18,16,8
	lbu	$18,1024($24)		# Td4[s3]
	ins	$15,$19,16,8
	lbu	$19,1024($25)		# Td4[s0]

	ins	$12,$20,0,8
	lw	$8,0($3)
	ins	$13,$21,0,8
	lw	$9,4($3)
	ins	$14,$22,0,8
	lw	$10,8($3)
	ins	$15,$23,0,8
	lw	$11,12($3)

	ins	$12,$16,24,8
	ins	$13,$17,24,8
	ins	$14,$18,24,8
	ins	$15,$19,24,8
#else
	lbu	$16,1024($1)		# Td4[s2>>8]
	and	$1,$8,0xff
	lbu	$17,1024($2)		# Td4[s3>>8]
	and	$2,$9,0xff
	lbu	$18,1024($24)		# Td4[s0>>8]
	and	$24,$10,0xff
	lbu	$19,1024($25)		# Td4[s1>>8]
	and	$25,$11,0xff

	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$20,1024($1)		# Td4[s0>>24]
	srl	$1,$9,24
	lbu	$21,1024($2)		# Td4[s1>>24]
	srl	$2,$10,24
	lbu	$22,1024($24)		# Td4[s2>>24]
	srl	$24,$11,24
	lbu	$23,1024($25)		# Td4[s3>>24]
	srl	$25,$8,24

	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7

	sll	$12,$12,8
	sll	$13,$13,8
	sll	$14,$14,8
	sll	$15,$15,8

	sll	$16,$16,16
	sll	$17,$17,16
	sll	$18,$18,16
	sll	$19,$19,16

	xor	$12,$16
	lbu	$16,1024($1)		# Td4[s1]
	xor	$13,$17
	lbu	$17,1024($2)		# Td4[s2]
	xor	$14,$18
	lbu	$18,1024($24)		# Td4[s3]
	xor	$15,$19
	lbu	$19,1024($25)		# Td4[s0]

	#sll	$20,$20,0
	lw	$8,0($3)
	#sll	$21,$21,0
	lw	$9,4($3)
	#sll	$22,$22,0
	lw	$10,8($3)
	#sll	$23,$23,0
	lw	$11,12($3)

	xor	$12,$20
	xor	$13,$21
	xor	$14,$22
	xor	$15,$23

	sll	$16,$16,24
	sll	$17,$17,24
	sll	$18,$18,24
	sll	$19,$19,24

	xor	$12,$16
	xor	$13,$17
	xor	$14,$18
	xor	$15,$19
#endif

	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15

	jr	$31
.end	_mips_AES_decrypt

.align	5
.globl	AES_decrypt
.ent	AES_decrypt
AES_decrypt:
	.frame	$29,64,$31
	.mask	0xc0ff0000,-4
	.set	noreorder
	.cpload	$25
	sub $29,64
	sw	$31,64-1*4($29)
	sw	$30,64-2*4($29)
	sw	$23,64-3*4($29)
	sw	$22,64-4*4($29)
	sw	$21,64-5*4($29)
	sw	$20,64-6*4($29)
	sw	$19,64-7*4($29)
	sw	$18,64-8*4($29)
	sw	$17,64-9*4($29)
	sw	$16,64-10*4($29)
	.set	reorder
	la	$7,AES_Td		# PIC-ified 'load address'

	lwl	$8,0+3($4)
	lwl	$9,4+3($4)
	lwl	$10,8+3($4)
	lwl	$11,12+3($4)
	lwr	$8,0+0($4)
	lwr	$9,4+0($4)
	lwr	$10,8+0($4)
	lwr	$11,12+0($4)

	bal	_mips_AES_decrypt

	swr	$8,0+0($5)
	swr	$9,4+0($5)
	swr	$10,8+0($5)
	swr	$11,12+0($5)
	swl	$8,0+3($5)
	swl	$9,4+3($5)
	swl	$10,8+3($5)
	swl	$11,12+3($5)

	.set	noreorder
	lw	$31,64-1*4($29)
	lw	$30,64-2*4($29)
	lw	$23,64-3*4($29)
	lw	$22,64-4*4($29)
	lw	$21,64-5*4($29)
	lw	$20,64-6*4($29)
	lw	$19,64-7*4($29)
	lw	$18,64-8*4($29)
	lw	$17,64-9*4($29)
	lw	$16,64-10*4($29)
	jr	$31
	add $29,64
.end	AES_decrypt
.align	5
.ent	_mips_AES_set_encrypt_key
_mips_AES_set_encrypt_key:
	.frame	$29,0,$31
	.set	noreorder
	beqz	$4,.Lekey_done
	li	$2,-1
	beqz	$6,.Lekey_done
	add $3,$7,256

	.set	reorder
	lwl	$8,0+3($4)	# load 128 bits
	lwl	$9,4+3($4)
	lwl	$10,8+3($4)
	lwl	$11,12+3($4)
	li	$1,128
	lwr	$8,0+0($4)
	lwr	$9,4+0($4)
	lwr	$10,8+0($4)
	lwr	$11,12+0($4)
	.set	noreorder
	beq	$5,$1,.L128bits
	li	$30,10

	.set	reorder
	lwl	$12,16+3($4)	# load 192 bits
	lwl	$13,20+3($4)
	li	$1,192
	lwr	$12,16+0($4)
	lwr	$13,20+0($4)
	.set	noreorder
	beq	$5,$1,.L192bits
	li	$30,8

	.set	reorder
	lwl	$14,24+3($4)	# load 256 bits
	lwl	$15,28+3($4)
	li	$1,256
	lwr	$14,24+0($4)
	lwr	$15,28+0($4)
	.set	noreorder
	beq	$5,$1,.L256bits
	li	$30,7

	b	.Lekey_done
	li	$2,-2

.align	4
.L128bits:
	.set	reorder
	srl	$1,$11,16
	srl	$2,$11,8
	and	$1,0xff
	and	$2,0xff
	and	$24,$11,0xff
	srl	$25,$11,24
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$1,0($1)
	lbu	$2,0($2)
	lbu	$24,0($24)
	lbu	$25,0($25)

	sw	$8,0($6)
	sw	$9,4($6)
	sw	$10,8($6)
	sw	$11,12($6)
	sub	$30,1
	add $6,16

	sll	$1,$1,8
	#sll	$2,$2,0
	sll	$24,$24,24
	sll	$25,$25,16

	xor	$8,$1
	lw	$1,0($3)
	xor	$8,$2
	xor	$8,$24
	xor	$8,$25
	xor	$8,$1

	xor	$9,$8
	xor	$10,$9
	xor	$11,$10

	.set	noreorder
	bnez	$30,.L128bits
	add $3,4

	sw	$8,0($6)
	sw	$9,4($6)
	sw	$10,8($6)
	li	$30,10
	sw	$11,12($6)
	li	$2,0
	sw	$30,80($6)
	b	.Lekey_done
	sub $6,10*16

.align	4
.L192bits:
	.set	reorder
	srl	$1,$13,16
	srl	$2,$13,8
	and	$1,0xff
	and	$2,0xff
	and	$24,$13,0xff
	srl	$25,$13,24
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$1,0($1)
	lbu	$2,0($2)
	lbu	$24,0($24)
	lbu	$25,0($25)

	sw	$8,0($6)
	sw	$9,4($6)
	sw	$10,8($6)
	sw	$11,12($6)
	sw	$12,16($6)
	sw	$13,20($6)
	sub	$30,1
	add $6,24

	sll	$1,$1,8
	#sll	$2,$2,0
	sll	$24,$24,24
	sll	$25,$25,16

	xor	$8,$1
	lw	$1,0($3)
	xor	$8,$2
	xor	$8,$24
	xor	$8,$25
	xor	$8,$1

	xor	$9,$8
	xor	$10,$9
	xor	$11,$10
	xor	$12,$11
	xor	$13,$12

	.set	noreorder
	bnez	$30,.L192bits
	add $3,4

	sw	$8,0($6)
	sw	$9,4($6)
	sw	$10,8($6)
	li	$30,12
	sw	$11,12($6)
	li	$2,0
	sw	$30,48($6)
	b	.Lekey_done
	sub $6,12*16

.align	4
.L256bits:
	.set	reorder
	srl	$1,$15,16
	srl	$2,$15,8
	and	$1,0xff
	and	$2,0xff
	and	$24,$15,0xff
	srl	$25,$15,24
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$1,0($1)
	lbu	$2,0($2)
	lbu	$24,0($24)
	lbu	$25,0($25)

	sw	$8,0($6)
	sw	$9,4($6)
	sw	$10,8($6)
	sw	$11,12($6)
	sw	$12,16($6)
	sw	$13,20($6)
	sw	$14,24($6)
	sw	$15,28($6)
	sub	$30,1

	sll	$1,$1,8
	#sll	$2,$2,0
	sll	$24,$24,24
	sll	$25,$25,16

	xor	$8,$1
	lw	$1,0($3)
	xor	$8,$2
	xor	$8,$24
	xor	$8,$25
	xor	$8,$1

	xor	$9,$8
	xor	$10,$9
	xor	$11,$10
	beqz	$30,.L256bits_done

	srl	$1,$11,24
	srl	$2,$11,16
	srl	$24,$11,8
	and	$25,$11,0xff
	and	$2,0xff
	and	$24,0xff
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$1,0($1)
	lbu	$2,0($2)
	lbu	$24,0($24)
	lbu	$25,0($25)
	sll	$1,24
	sll	$2,16
	sll	$24,8

	xor	$12,$1
	xor	$12,$2
	xor	$12,$24
	xor	$12,$25

	xor	$13,$12
	xor	$14,$13
	xor	$15,$14

	add $6,32
	.set	noreorder
	b	.L256bits
	add $3,4

.L256bits_done:
	sw	$8,32($6)
	sw	$9,36($6)
	sw	$10,40($6)
	li	$30,14
	sw	$11,44($6)
	li	$2,0
	sw	$30,48($6)
	sub $6,12*16

.Lekey_done:
	jr	$31
	nop
.end	_mips_AES_set_encrypt_key

.globl	AES_set_encrypt_key
.ent	AES_set_encrypt_key
AES_set_encrypt_key:
	.frame	$29,32,$31
	.mask	0xc0000000,-4
	.set	noreorder
	.cpload	$25
	sub $29,32
	sw	$31,32-1*4($29)
	sw	$30,32-2*4($29)
	.set	reorder
	la	$7,AES_Te4		# PIC-ified 'load address'

	bal	_mips_AES_set_encrypt_key

	.set	noreorder
	move	$4,$2
	lw	$31,32-1*4($29)
	lw	$30,32-2*4($29)
	jr	$31
	add $29,32
.end	AES_set_encrypt_key
.align	5
.globl	AES_set_decrypt_key
.ent	AES_set_decrypt_key
AES_set_decrypt_key:
	.frame	$29,32,$31
	.mask	0xc0000000,-4
	.set	noreorder
	.cpload	$25
	sub $29,32
	sw	$31,32-1*4($29)
	sw	$30,32-2*4($29)
	.set	reorder
	la	$7,AES_Te4		# PIC-ified 'load address'

	bal	_mips_AES_set_encrypt_key

	bltz	$2,.Ldkey_done

	sll	$1,$30,4
	add $4,$6,0
	add $5,$6,$1
.align	4
.Lswap:
	lw	$8,0($4)
	lw	$9,4($4)
	lw	$10,8($4)
	lw	$11,12($4)
	lw	$12,0($5)
	lw	$13,4($5)
	lw	$14,8($5)
	lw	$15,12($5)
	sw	$8,0($5)
	sw	$9,4($5)
	sw	$10,8($5)
	sw	$11,12($5)
	add $4,16
	sub $5,16
	sw	$12,-16($4)
	sw	$13,-12($4)
	sw	$14,-8($4)
	sw	$15,-4($4)
	bne	$4,$5,.Lswap

	lw	$8,16($6)		# modulo-scheduled
	lui	$2,0x8080
	sub	$30,1
	or	$2,0x8080
	sll	$30,2
	add $6,16
	lui	$25,0x1b1b
	nor	$24,$0,$2
	or	$25,0x1b1b
.align	4
.Lmix:
	and	$1,$8,$2
	and	$9,$8,$24
	srl	$10,$1,7
	addu	$9,$9		# tp2<<1
	subu	$1,$10
	and	$1,$25
	xor	$9,$1

	and	$1,$9,$2
	and	$10,$9,$24
	srl	$11,$1,7
	addu	$10,$10		# tp4<<1
	subu	$1,$11
	and	$1,$25
	xor	$10,$1

	and	$1,$10,$2
	and	$11,$10,$24
	srl	$12,$1,7
	addu	$11,$11		# tp8<<1
	subu	$1,$12
	and	$1,$25
	xor	$11,$1

	xor	$12,$11,$8
	xor	$15,$11,$10
	xor	$13,$12,$9
	xor	$14,$12,$10

#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	rotr	$8,$14,16
	 xor	$15,$9
	rotr	$9,$12,24
	xor	$15,$8
	rotr	$10,$13,8
	xor	$15,$9
	lw	$8,4($6)		# modulo-scheduled
	xor	$15,$10
#else
	sll	$8,$14,16
	 xor	$15,$9
	srl	$9,$14,16
	xor	$15,$8
	sll	$8,$12,8
	xor	$15,$9
	srl	$9,$12,24
	xor	$15,$8
	sll	$8,$13,24
	xor	$15,$9
	srl	$9,$13,8
	xor	$15,$8
	lw	$8,4($6)		# modulo-scheduled
	xor	$15,$9
#endif
	sub	$30,1
	sw	$15,0($6)
	add $6,4
	bnez	$30,.Lmix

	li	$2,0
.Ldkey_done:
	.set	noreorder
	move	$4,$2
	lw	$31,32-1*4($29)
	lw	$30,32-2*4($29)
	jr	$31
	add $29,32
.end	AES_set_decrypt_key
.rdata
.align	10
AES_Te:
.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84	# Te0
.byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
.byte	0xff,0xf2,0xf2,0x0d,	0xd6,0x6b,0x6b,0xbd
.byte	0xde,0x6f,0x6f,0xb1,	0x91,0xc5,0xc5,0x54
.byte	0x60,0x30,0x30,0x50,	0x02,0x01,0x01,0x03
.byte	0xce,0x67,0x67,0xa9,	0x56,0x2b,0x2b,0x7d
.byte	0xe7,0xfe,0xfe,0x19,	0xb5,0xd7,0xd7,0x62
.byte	0x4d,0xab,0xab,0xe6,	0xec,0x76,0x76,0x9a
.byte	0x8f,0xca,0xca,0x45,	0x1f,0x82,0x82,0x9d
.byte	0x89,0xc9,0xc9,0x40,	0xfa,0x7d,0x7d,0x87
.byte	0xef,0xfa,0xfa,0x15,	0xb2,0x59,0x59,0xeb
.byte	0x8e,0x47,0x47,0xc9,	0xfb,0xf0,0xf0,0x0b
.byte	0x41,0xad,0xad,0xec,	0xb3,0xd4,0xd4,0x67
.byte	0x5f,0xa2,0xa2,0xfd,	0x45,0xaf,0xaf,0xea
.byte	0x23,0x9c,0x9c,0xbf,	0x53,0xa4,0xa4,0xf7
.byte	0xe4,0x72,0x72,0x96,	0x9b,0xc0,0xc0,0x5b
.byte	0x75,0xb7,0xb7,0xc2,	0xe1,0xfd,0xfd,0x1c
.byte	0x3d,0x93,0x93,0xae,	0x4c,0x26,0x26,0x6a
.byte	0x6c,0x36,0x36,0x5a,	0x7e,0x3f,0x3f,0x41
.byte	0xf5,0xf7,0xf7,0x02,	0x83,0xcc,0xcc,0x4f
.byte	0x68,0x34,0x34,0x5c,	0x51,0xa5,0xa5,0xf4
.byte	0xd1,0xe5,0xe5,0x34,	0xf9,0xf1,0xf1,0x08
.byte	0xe2,0x71,0x71,0x93,	0xab,0xd8,0xd8,0x73
.byte	0x62,0x31,0x31,0x53,	0x2a,0x15,0x15,0x3f
.byte	0x08,0x04,0x04,0x0c,	0x95,0xc7,0xc7,0x52
.byte	0x46,0x23,0x23,0x65,	0x9d,0xc3,0xc3,0x5e
.byte	0x30,0x18,0x18,0x28,	0x37,0x96,0x96,0xa1
.byte	0x0a,0x05,0x05,0x0f,	0x2f,0x9a,0x9a,0xb5
.byte	0x0e,0x07,0x07,0x09,	0x24,0x12,0x12,0x36
.byte	0x1b,0x80,0x80,0x9b,	0xdf,0xe2,0xe2,0x3d
.byte	0xcd,0xeb,0xeb,0x26,	0x4e,0x27,0x27,0x69
.byte	0x7f,0xb2,0xb2,0xcd,	0xea,0x75,0x75,0x9f
.byte	0x12,0x09,0x09,0x1b,	0x1d,0x83,0x83,0x9e
.byte	0x58,0x2c,0x2c,0x74,	0x34,0x1a,0x1a,0x2e
.byte	0x36,0x1b,0x1b,0x2d,	0xdc,0x6e,0x6e,0xb2
.byte	0xb4,0x5a,0x5a,0xee,	0x5b,0xa0,0xa0,0xfb
.byte	0xa4,0x52,0x52,0xf6,	0x76,0x3b,0x3b,0x4d
.byte	0xb7,0xd6,0xd6,0x61,	0x7d,0xb3,0xb3,0xce
.byte	0x52,0x29,0x29,0x7b,	0xdd,0xe3,0xe3,0x3e
.byte	0x5e,0x2f,0x2f,0x71,	0x13,0x84,0x84,0x97
.byte	0xa6,0x53,0x53,0xf5,	0xb9,0xd1,0xd1,0x68
.byte	0x00,0x00,0x00,0x00,	0xc1,0xed,0xed,0x2c
.byte	0x40,0x20,0x20,0x60,	0xe3,0xfc,0xfc,0x1f
.byte	0x79,0xb1,0xb1,0xc8,	0xb6,0x5b,0x5b,0xed
.byte	0xd4,0x6a,0x6a,0xbe,	0x8d,0xcb,0xcb,0x46
.byte	0x67,0xbe,0xbe,0xd9,	0x72,0x39,0x39,0x4b
.byte	0x94,0x4a,0x4a,0xde,	0x98,0x4c,0x4c,0xd4
.byte	0xb0,0x58,0x58,0xe8,	0x85,0xcf,0xcf,0x4a
.byte	0xbb,0xd0,0xd0,0x6b,	0xc5,0xef,0xef,0x2a
.byte	0x4f,0xaa,0xaa,0xe5,	0xed,0xfb,0xfb,0x16
.byte	0x86,0x43,0x43,0xc5,	0x9a,0x4d,0x4d,0xd7
.byte	0x66,0x33,0x33,0x55,	0x11,0x85,0x85,0x94
.byte	0x8a,0x45,0x45,0xcf,	0xe9,0xf9,0xf9,0x10
.byte	0x04,0x02,0x02,0x06,	0xfe,0x7f,0x7f,0x81
.byte	0xa0,0x50,0x50,0xf0,	0x78,0x3c,0x3c,0x44
.byte	0x25,0x9f,0x9f,0xba,	0x4b,0xa8,0xa8,0xe3
.byte	0xa2,0x51,0x51,0xf3,	0x5d,0xa3,0xa3,0xfe
.byte	0x80,0x40,0x40,0xc0,	0x05,0x8f,0x8f,0x8a
.byte	0x3f,0x92,0x92,0xad,	0x21,0x9d,0x9d,0xbc
.byte	0x70,0x38,0x38,0x48,	0xf1,0xf5,0xf5,0x04
.byte	0x63,0xbc,0xbc,0xdf,	0x77,0xb6,0xb6,0xc1
.byte	0xaf,0xda,0xda,0x75,	0x42,0x21,0x21,0x63
.byte	0x20,0x10,0x10,0x30,	0xe5,0xff,0xff,0x1a
.byte	0xfd,0xf3,0xf3,0x0e,	0xbf,0xd2,0xd2,0x6d
.byte	0x81,0xcd,0xcd,0x4c,	0x18,0x0c,0x0c,0x14
.byte	0x26,0x13,0x13,0x35,	0xc3,0xec,0xec,0x2f
.byte	0xbe,0x5f,0x5f,0xe1,	0x35,0x97,0x97,0xa2
.byte	0x88,0x44,0x44,0xcc,	0x2e,0x17,0x17,0x39
.byte	0x93,0xc4,0xc4,0x57,	0x55,0xa7,0xa7,0xf2
.byte	0xfc,0x7e,0x7e,0x82,	0x7a,0x3d,0x3d,0x47
.byte	0xc8,0x64,0x64,0xac,	0xba,0x5d,0x5d,0xe7
.byte	0x32,0x19,0x19,0x2b,	0xe6,0x73,0x73,0x95
.byte	0xc0,0x60,0x60,0xa0,	0x19,0x81,0x81,0x98
.byte	0x9e,0x4f,0x4f,0xd1,	0xa3,0xdc,0xdc,0x7f
.byte	0x44,0x22,0x22,0x66,	0x54,0x2a,0x2a,0x7e
.byte	0x3b,0x90,0x90,0xab,	0x0b,0x88,0x88,0x83
.byte	0x8c,0x46,0x46,0xca,	0xc7,0xee,0xee,0x29
.byte	0x6b,0xb8,0xb8,0xd3,	0x28,0x14,0x14,0x3c
.byte	0xa7,0xde,0xde,0x79,	0xbc,0x5e,0x5e,0xe2
.byte	0x16,0x0b,0x0b,0x1d,	0xad,0xdb,0xdb,0x76
.byte	0xdb,0xe0,0xe0,0x3b,	0x64,0x32,0x32,0x56
.byte	0x74,0x3a,0x3a,0x4e,	0x14,0x0a,0x0a,0x1e
.byte	0x92,0x49,0x49,0xdb,	0x0c,0x06,0x06,0x0a
.byte	0x48,0x24,0x24,0x6c,	0xb8,0x5c,0x5c,0xe4
.byte	0x9f,0xc2,0xc2,0x5d,	0xbd,0xd3,0xd3,0x6e
.byte	0x43,0xac,0xac,0xef,	0xc4,0x62,0x62,0xa6
.byte	0x39,0x91,0x91,0xa8,	0x31,0x95,0x95,0xa4
.byte	0xd3,0xe4,0xe4,0x37,	0xf2,0x79,0x79,0x8b
.byte	0xd5,0xe7,0xe7,0x32,	0x8b,0xc8,0xc8,0x43
.byte	0x6e,0x37,0x37,0x59,	0xda,0x6d,0x6d,0xb7
.byte	0x01,0x8d,0x8d,0x8c,	0xb1,0xd5,0xd5,0x64
.byte	0x9c,0x4e,0x4e,0xd2,	0x49,0xa9,0xa9,0xe0
.byte	0xd8,0x6c,0x6c,0xb4,	0xac,0x56,0x56,0xfa
.byte	0xf3,0xf4,0xf4,0x07,	0xcf,0xea,0xea,0x25
.byte	0xca,0x65,0x65,0xaf,	0xf4,0x7a,0x7a,0x8e
.byte	0x47,0xae,0xae,0xe9,	0x10,0x08,0x08,0x18
.byte	0x6f,0xba,0xba,0xd5,	0xf0,0x78,0x78,0x88
.byte	0x4a,0x25,0x25,0x6f,	0x5c,0x2e,0x2e,0x72
.byte	0x38,0x1c,0x1c,0x24,	0x57,0xa6,0xa6,0xf1
.byte	0x73,0xb4,0xb4,0xc7,	0x97,0xc6,0xc6,0x51
.byte	0xcb,0xe8,0xe8,0x23,	0xa1,0xdd,0xdd,0x7c
.byte	0xe8,0x74,0x74,0x9c,	0x3e,0x1f,0x1f,0x21
.byte	0x96,0x4b,0x4b,0xdd,	0x61,0xbd,0xbd,0xdc
.byte	0x0d,0x8b,0x8b,0x86,	0x0f,0x8a,0x8a,0x85
.byte	0xe0,0x70,0x70,0x90,	0x7c,0x3e,0x3e,0x42
.byte	0x71,0xb5,0xb5,0xc4,	0xcc,0x66,0x66,0xaa
.byte	0x90,0x48,0x48,0xd8,	0x06,0x03,0x03,0x05
.byte	0xf7,0xf6,0xf6,0x01,	0x1c,0x0e,0x0e,0x12
.byte	0xc2,0x61,0x61,0xa3,	0x6a,0x35,0x35,0x5f
.byte	0xae,0x57,0x57,0xf9,	0x69,0xb9,0xb9,0xd0
.byte	0x17,0x86,0x86,0x91,	0x99,0xc1,0xc1,0x58
.byte	0x3a,0x1d,0x1d,0x27,	0x27,0x9e,0x9e,0xb9
.byte	0xd9,0xe1,0xe1,0x38,	0xeb,0xf8,0xf8,0x13
.byte	0x2b,0x98,0x98,0xb3,	0x22,0x11,0x11,0x33
.byte	0xd2,0x69,0x69,0xbb,	0xa9,0xd9,0xd9,0x70
.byte	0x07,0x8e,0x8e,0x89,	0x33,0x94,0x94,0xa7
.byte	0x2d,0x9b,0x9b,0xb6,	0x3c,0x1e,0x1e,0x22
.byte	0x15,0x87,0x87,0x92,	0xc9,0xe9,0xe9,0x20
.byte	0x87,0xce,0xce,0x49,	0xaa,0x55,0x55,0xff
.byte	0x50,0x28,0x28,0x78,	0xa5,0xdf,0xdf,0x7a
.byte	0x03,0x8c,0x8c,0x8f,	0x59,0xa1,0xa1,0xf8
.byte	0x09,0x89,0x89,0x80,	0x1a,0x0d,0x0d,0x17
.byte	0x65,0xbf,0xbf,0xda,	0xd7,0xe6,0xe6,0x31
.byte	0x84,0x42,0x42,0xc6,	0xd0,0x68,0x68,0xb8
.byte	0x82,0x41,0x41,0xc3,	0x29,0x99,0x99,0xb0
.byte	0x5a,0x2d,0x2d,0x77,	0x1e,0x0f,0x0f,0x11
.byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
.byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a

AES_Td:
.byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53	# Td0
.byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
.byte	0x3b,0xab,0x6b,0xcb,	0x1f,0x9d,0x45,0xf1
.byte	0xac,0xfa,0x58,0xab,	0x4b,0xe3,0x03,0x93
.byte	0x20,0x30,0xfa,0x55,	0xad,0x76,0x6d,0xf6
.byte	0x88,0xcc,0x76,0x91,	0xf5,0x02,0x4c,0x25
.byte	0x4f,0xe5,0xd7,0xfc,	0xc5,0x2a,0xcb,0xd7
.byte	0x26,0x35,0x44,0x80,	0xb5,0x62,0xa3,0x8f
.byte	0xde,0xb1,0x5a,0x49,	0x25,0xba,0x1b,0x67
.byte	0x45,0xea,0x0e,0x98,	0x5d,0xfe,0xc0,0xe1
.byte	0xc3,0x2f,0x75,0x02,	0x81,0x4c,0xf0,0x12
.byte	0x8d,0x46,0x97,0xa3,	0x6b,0xd3,0xf9,0xc6
.byte	0x03,0x8f,0x5f,0xe7,	0x15,0x92,0x9c,0x95
.byte	0xbf,0x6d,0x7a,0xeb,	0x95,0x52,0x59,0xda
.byte	0xd4,0xbe,0x83,0x2d,	0x58,0x74,0x21,0xd3
.byte	0x49,0xe0,0x69,0x29,	0x8e,0xc9,0xc8,0x44
.byte	0x75,0xc2,0x89,0x6a,	0xf4,0x8e,0x79,0x78
.byte	0x99,0x58,0x3e,0x6b,	0x27,0xb9,0x71,0xdd
.byte	0xbe,0xe1,0x4f,0xb6,	0xf0,0x88,0xad,0x17
.byte	0xc9,0x20,0xac,0x66,	0x7d,0xce,0x3a,0xb4
.byte	0x63,0xdf,0x4a,0x18,	0xe5,0x1a,0x31,0x82
.byte	0x97,0x51,0x33,0x60,	0x62,0x53,0x7f,0x45
.byte	0xb1,0x64,0x77,0xe0,	0xbb,0x6b,0xae,0x84
.byte	0xfe,0x81,0xa0,0x1c,	0xf9,0x08,0x2b,0x94
.byte	0x70,0x48,0x68,0x58,	0x8f,0x45,0xfd,0x19
.byte	0x94,0xde,0x6c,0x87,	0x52,0x7b,0xf8,0xb7
.byte	0xab,0x73,0xd3,0x23,	0x72,0x4b,0x02,0xe2
.byte	0xe3,0x1f,0x8f,0x57,	0x66,0x55,0xab,0x2a
.byte	0xb2,0xeb,0x28,0x07,	0x2f,0xb5,0xc2,0x03
.byte	0x86,0xc5,0x7b,0x9a,	0xd3,0x37,0x08,0xa5
.byte	0x30,0x28,0x87,0xf2,	0x23,0xbf,0xa5,0xb2
.byte	0x02,0x03,0x6a,0xba,	0xed,0x16,0x82,0x5c
.byte	0x8a,0xcf,0x1c,0x2b,	0xa7,0x79,0xb4,0x92
.byte	0xf3,0x07,0xf2,0xf0,	0x4e,0x69,0xe2,0xa1
.byte	0x65,0xda,0xf4,0xcd,	0x06,0x05,0xbe,0xd5
.byte	0xd1,0x34,0x62,0x1f,	0xc4,0xa6,0xfe,0x8a
.byte	0x34,0x2e,0x53,0x9d,	0xa2,0xf3,0x55,0xa0
.byte	0x05,0x8a,0xe1,0x32,	0xa4,0xf6,0xeb,0x75
.byte	0x0b,0x83,0xec,0x39,	0x40,0x60,0xef,0xaa
.byte	0x5e,0x71,0x9f,0x06,	0xbd,0x6e,0x10,0x51
.byte	0x3e,0x21,0x8a,0xf9,	0x96,0xdd,0x06,0x3d
.byte	0xdd,0x3e,0x05,0xae,	0x4d,0xe6,0xbd,0x46
.byte	0x91,0x54,0x8d,0xb5,	0x71,0xc4,0x5d,0x05
.byte	0x04,0x06,0xd4,0x6f,	0x60,0x50,0x15,0xff
.byte	0x19,0x98,0xfb,0x24,	0xd6,0xbd,0xe9,0x97
.byte	0x89,0x40,0x43,0xcc,	0x67,0xd9,0x9e,0x77
.byte	0xb0,0xe8,0x42,0xbd,	0x07,0x89,0x8b,0x88
.byte	0xe7,0x19,0x5b,0x38,	0x79,0xc8,0xee,0xdb
.byte	0xa1,0x7c,0x0a,0x47,	0x7c,0x42,0x0f,0xe9
.byte	0xf8,0x84,0x1e,0xc9,	0x00,0x00,0x00,0x00
.byte	0x09,0x80,0x86,0x83,	0x32,0x2b,0xed,0x48
.byte	0x1e,0x11,0x70,0xac,	0x6c,0x5a,0x72,0x4e
.byte	0xfd,0x0e,0xff,0xfb,	0x0f,0x85,0x38,0x56
.byte	0x3d,0xae,0xd5,0x1e,	0x36,0x2d,0x39,0x27
.byte	0x0a,0x0f,0xd9,0x64,	0x68,0x5c,0xa6,0x21
.byte	0x9b,0x5b,0x54,0xd1,	0x24,0x36,0x2e,0x3a
.byte	0x0c,0x0a,0x67,0xb1,	0x93,0x57,0xe7,0x0f
.byte	0xb4,0xee,0x96,0xd2,	0x1b,0x9b,0x91,0x9e
.byte	0x80,0xc0,0xc5,0x4f,	0x61,0xdc,0x20,0xa2
.byte	0x5a,0x77,0x4b,0x69,	0x1c,0x12,0x1a,0x16
.byte	0xe2,0x93,0xba,0x0a,	0xc0,0xa0,0x2a,0xe5
.byte	0x3c,0x22,0xe0,0x43,	0x12,0x1b,0x17,0x1d
.byte	0x0e,0x09,0x0d,0x0b,	0xf2,0x8b,0xc7,0xad
.byte	0x2d,0xb6,0xa8,0xb9,	0x14,0x1e,0xa9,0xc8
.byte	0x57,0xf1,0x19,0x85,	0xaf,0x75,0x07,0x4c
.byte	0xee,0x99,0xdd,0xbb,	0xa3,0x7f,0x60,0xfd
.byte	0xf7,0x01,0x26,0x9f,	0x5c,0x72,0xf5,0xbc
.byte	0x44,0x66,0x3b,0xc5,	0x5b,0xfb,0x7e,0x34
.byte	0x8b,0x43,0x29,0x76,	0xcb,0x23,0xc6,0xdc
.byte	0xb6,0xed,0xfc,0x68,	0xb8,0xe4,0xf1,0x63
.byte	0xd7,0x31,0xdc,0xca,	0x42,0x63,0x85,0x10
.byte	0x13,0x97,0x22,0x40,	0x84,0xc6,0x11,0x20
.byte	0x85,0x4a,0x24,0x7d,	0xd2,0xbb,0x3d,0xf8
.byte	0xae,0xf9,0x32,0x11,	0xc7,0x29,0xa1,0x6d
.byte	0x1d,0x9e,0x2f,0x4b,	0xdc,0xb2,0x30,0xf3
.byte	0x0d,0x86,0x52,0xec,	0x77,0xc1,0xe3,0xd0
.byte	0x2b,0xb3,0x16,0x6c,	0xa9,0x70,0xb9,0x99
.byte	0x11,0x94,0x48,0xfa,	0x47,0xe9,0x64,0x22
.byte	0xa8,0xfc,0x8c,0xc4,	0xa0,0xf0,0x3f,0x1a
.byte	0x56,0x7d,0x2c,0xd8,	0x22,0x33,0x90,0xef
.byte	0x87,0x49,0x4e,0xc7,	0xd9,0x38,0xd1,0xc1
.byte	0x8c,0xca,0xa2,0xfe,	0x98,0xd4,0x0b,0x36
.byte	0xa6,0xf5,0x81,0xcf,	0xa5,0x7a,0xde,0x28
.byte	0xda,0xb7,0x8e,0x26,	0x3f,0xad,0xbf,0xa4
.byte	0x2c,0x3a,0x9d,0xe4,	0x50,0x78,0x92,0x0d
.byte	0x6a,0x5f,0xcc,0x9b,	0x54,0x7e,0x46,0x62
.byte	0xf6,0x8d,0x13,0xc2,	0x90,0xd8,0xb8,0xe8
.byte	0x2e,0x39,0xf7,0x5e,	0x82,0xc3,0xaf,0xf5
.byte	0x9f,0x5d,0x80,0xbe,	0x69,0xd0,0x93,0x7c
.byte	0x6f,0xd5,0x2d,0xa9,	0xcf,0x25,0x12,0xb3
.byte	0xc8,0xac,0x99,0x3b,	0x10,0x18,0x7d,0xa7
.byte	0xe8,0x9c,0x63,0x6e,	0xdb,0x3b,0xbb,0x7b
.byte	0xcd,0x26,0x78,0x09,	0x6e,0x59,0x18,0xf4
.byte	0xec,0x9a,0xb7,0x01,	0x83,0x4f,0x9a,0xa8
.byte	0xe6,0x95,0x6e,0x65,	0xaa,0xff,0xe6,0x7e
.byte	0x21,0xbc,0xcf,0x08,	0xef,0x15,0xe8,0xe6
.byte	0xba,0xe7,0x9b,0xd9,	0x4a,0x6f,0x36,0xce
.byte	0xea,0x9f,0x09,0xd4,	0x29,0xb0,0x7c,0xd6
.byte	0x31,0xa4,0xb2,0xaf,	0x2a,0x3f,0x23,0x31
.byte	0xc6,0xa5,0x94,0x30,	0x35,0xa2,0x66,0xc0
.byte	0x74,0x4e,0xbc,0x37,	0xfc,0x82,0xca,0xa6
.byte	0xe0,0x90,0xd0,0xb0,	0x33,0xa7,0xd8,0x15
.byte	0xf1,0x04,0x98,0x4a,	0x41,0xec,0xda,0xf7
.byte	0x7f,0xcd,0x50,0x0e,	0x17,0x91,0xf6,0x2f
.byte	0x76,0x4d,0xd6,0x8d,	0x43,0xef,0xb0,0x4d
.byte	0xcc,0xaa,0x4d,0x54,	0xe4,0x96,0x04,0xdf
.byte	0x9e,0xd1,0xb5,0xe3,	0x4c,0x6a,0x88,0x1b
.byte	0xc1,0x2c,0x1f,0xb8,	0x46,0x65,0x51,0x7f
.byte	0x9d,0x5e,0xea,0x04,	0x01,0x8c,0x35,0x5d
.byte	0xfa,0x87,0x74,0x73,	0xfb,0x0b,0x41,0x2e
.byte	0xb3,0x67,0x1d,0x5a,	0x92,0xdb,0xd2,0x52
.byte	0xe9,0x10,0x56,0x33,	0x6d,0xd6,0x47,0x13
.byte	0x9a,0xd7,0x61,0x8c,	0x37,0xa1,0x0c,0x7a
.byte	0x59,0xf8,0x14,0x8e,	0xeb,0x13,0x3c,0x89
.byte	0xce,0xa9,0x27,0xee,	0xb7,0x61,0xc9,0x35
.byte	0xe1,0x1c,0xe5,0xed,	0x7a,0x47,0xb1,0x3c
.byte	0x9c,0xd2,0xdf,0x59,	0x55,0xf2,0x73,0x3f
.byte	0x18,0x14,0xce,0x79,	0x73,0xc7,0x37,0xbf
.byte	0x53,0xf7,0xcd,0xea,	0x5f,0xfd,0xaa,0x5b
.byte	0xdf,0x3d,0x6f,0x14,	0x78,0x44,0xdb,0x86
.byte	0xca,0xaf,0xf3,0x81,	0xb9,0x68,0xc4,0x3e
.byte	0x38,0x24,0x34,0x2c,	0xc2,0xa3,0x40,0x5f
.byte	0x16,0x1d,0xc3,0x72,	0xbc,0xe2,0x25,0x0c
.byte	0x28,0x3c,0x49,0x8b,	0xff,0x0d,0x95,0x41
.byte	0x39,0xa8,0x01,0x71,	0x08,0x0c,0xb3,0xde
.byte	0xd8,0xb4,0xe4,0x9c,	0x64,0x56,0xc1,0x90
.byte	0x7b,0xcb,0x84,0x61,	0xd5,0x32,0xb6,0x70
.byte	0x48,0x6c,0x5c,0x74,	0xd0,0xb8,0x57,0x42

.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38	# Td4
.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d

AES_Te4:
.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5	# Te4
.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16

.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00	# rcon
.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00


================================================
FILE: lib/aes_acc/asm/mips_be.S
================================================
.text
#ifdef OPENSSL_FIPSCANISTER
# include <openssl/fipssyms.h>
#endif

#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
#define _MIPS_ARCH_MIPS32R2
#endif

#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
.option	pic2
#endif
.set	noat
.align	5
.ent	_mips_AES_encrypt
_mips_AES_encrypt:
	.frame	$29,0,$31
	.set	reorder
	lw	$12,0($6)
	lw	$13,4($6)
	lw	$14,8($6)
	lw	$15,12($6)
	lw	$30,240($6)
	add $3,$6,16

	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15

	sub	$30,1
#if defined(__mips_smartmips)
	ext	$1,$9,16,8
.Loop_enc:
	ext	$2,$10,16,8
	ext	$24,$11,16,8
	ext	$25,$8,16,8
	lwxs	$12,$1($7)		# Te1[s1>>16]
	ext	$1,$10,8,8
	lwxs	$13,$2($7)		# Te1[s2>>16]
	ext	$2,$11,8,8
	lwxs	$14,$24($7)		# Te1[s3>>16]
	ext	$24,$8,8,8
	lwxs	$15,$25($7)		# Te1[s0>>16]
	ext	$25,$9,8,8

	lwxs	$16,$1($7)		# Te2[s2>>8]
	ext	$1,$11,0,8
	lwxs	$17,$2($7)		# Te2[s3>>8]
	ext	$2,$8,0,8
	lwxs	$18,$24($7)		# Te2[s0>>8]
	ext	$24,$9,0,8
	lwxs	$19,$25($7)		# Te2[s1>>8]
	ext	$25,$10,0,8

	lwxs	$20,$1($7)		# Te3[s3]
	ext	$1,$8,24,8
	lwxs	$21,$2($7)		# Te3[s0]
	ext	$2,$9,24,8
	lwxs	$22,$24($7)		# Te3[s1]
	ext	$24,$10,24,8
	lwxs	$23,$25($7)		# Te3[s2]
	ext	$25,$11,24,8

	rotr	$12,$12,8
	rotr	$13,$13,8
	rotr	$14,$14,8
	rotr	$15,$15,8

	rotr	$16,$16,16
	rotr	$17,$17,16
	rotr	$18,$18,16
	rotr	$19,$19,16

	xor	$12,$16
	lwxs	$16,$1($7)		# Te0[s0>>24]
	xor	$13,$17
	lwxs	$17,$2($7)		# Te0[s1>>24]
	xor	$14,$18
	lwxs	$18,$24($7)		# Te0[s2>>24]
	xor	$15,$19
	lwxs	$19,$25($7)		# Te0[s3>>24]

	rotr	$20,$20,24
	lw	$8,0($3)
	rotr	$21,$21,24
	lw	$9,4($3)
	rotr	$22,$22,24
	lw	$10,8($3)
	rotr	$23,$23,24
	lw	$11,12($3)

	xor	$12,$20
	xor	$13,$21
	xor	$14,$22
	xor	$15,$23

	xor	$12,$16
	xor	$13,$17
	xor	$14,$18
	xor	$15,$19

	sub	$30,1
	add $3,16
	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15
	.set	noreorder
	bnez	$30,.Loop_enc
	ext	$1,$9,16,8

	srl	$1,$9,14
#else
	srl	$1,$9,14
.Loop_enc:
	srl	$2,$10,14
	srl	$24,$11,14
	srl	$25,$8,14
	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	lw	$12,0($1)		# Te1[s1>>16]
	srl	$1,$10,6
	lw	$13,0($2)		# Te1[s2>>16]
	srl	$2,$11,6
	lw	$14,0($24)		# Te1[s3>>16]
	srl	$24,$8,6
	lw	$15,0($25)		# Te1[s0>>16]
	srl	$25,$9,6
#else
	lwl	$12,3($1)		# Te1[s1>>16]
	lwl	$13,3($2)		# Te1[s2>>16]
	lwl	$14,3($24)		# Te1[s3>>16]
	lwl	$15,3($25)		# Te1[s0>>16]
	lwr	$12,2($1)		# Te1[s1>>16]
	srl	$1,$10,6
	lwr	$13,2($2)		# Te1[s2>>16]
	srl	$2,$11,6
	lwr	$14,2($24)		# Te1[s3>>16]
	srl	$24,$8,6
	lwr	$15,2($25)		# Te1[s0>>16]
	srl	$25,$9,6
#endif
	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	rotr	$12,$12,8
	rotr	$13,$13,8
	rotr	$14,$14,8
	rotr	$15,$15,8
# if defined(_MIPSEL)
	lw	$16,0($1)		# Te2[s2>>8]
	sll	$1,$11,2
	lw	$17,0($2)		# Te2[s3>>8]
	sll	$2,$8,2
	lw	$18,0($24)		# Te2[s0>>8]
	sll	$24,$9,2
	lw	$19,0($25)		# Te2[s1>>8]
	sll	$25,$10,2

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lw	$20,0($1)		# Te3[s3]
	ins $1,$8,2,8
	lw	$21,0($2)		# Te3[s0]
	ins $2,$9,2,8
	lw	$22,0($24)		# Te3[s1]
	ins $24,$10,2,8
	lw	$23,0($25)		# Te3[s2]
	ins $25,$11,2,8
# else
	lw	$16,0($1)		# Te2[s2>>8]
	ins $1,$11,2,8
	lw	$17,0($2)		# Te2[s3>>8]
	ins $2,$8,2,8
	lw	$18,0($24)		# Te2[s0>>8]
	ins $24,$9,2,8
	lw	$19,0($25)		# Te2[s1>>8]
	ins $25,$10,2,8

	lw	$20,0($1)		# Te3[s3]
	srl	$1,$8,22
	lw	$21,0($2)		# Te3[s0]
	srl	$2,$9,22
	lw	$22,0($24)		# Te3[s1]
	srl	$24,$10,22
	lw	$23,0($25)		# Te3[s2]
	srl	$25,$11,22

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
# endif
	rotr	$16,$16,16
	rotr	$17,$17,16
	rotr	$18,$18,16
	rotr	$19,$19,16

	rotr	$20,$20,24
	rotr	$21,$21,24
	rotr	$22,$22,24
	rotr	$23,$23,24
#else
	lwl	$16,2($1)		# Te2[s2>>8]
	lwl	$17,2($2)		# Te2[s3>>8]
	lwl	$18,2($24)		# Te2[s0>>8]
	lwl	$19,2($25)		# Te2[s1>>8]
	lwr	$16,1($1)		# Te2[s2>>8]
	sll	$1,$11,2
	lwr	$17,1($2)		# Te2[s3>>8]
	sll	$2,$8,2
	lwr	$18,1($24)		# Te2[s0>>8]
	sll	$24,$9,2
	lwr	$19,1($25)		# Te2[s1>>8]
	sll	$25,$10,2

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lwl	$20,1($1)		# Te3[s3]
	lwl	$21,1($2)		# Te3[s0]
	lwl	$22,1($24)		# Te3[s1]
	lwl	$23,1($25)		# Te3[s2]
	lwr	$20,0($1)		# Te3[s3]
	srl	$1,$8,22
	lwr	$21,0($2)		# Te3[s0]
	srl	$2,$9,22
	lwr	$22,0($24)		# Te3[s1]
	srl	$24,$10,22
	lwr	$23,0($25)		# Te3[s2]
	srl	$25,$11,22

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#endif
	xor	$12,$16
	lw	$16,0($1)		# Te0[s0>>24]
	xor	$13,$17
	lw	$17,0($2)		# Te0[s1>>24]
	xor	$14,$18
	lw	$18,0($24)		# Te0[s2>>24]
	xor	$15,$19
	lw	$19,0($25)		# Te0[s3>>24]

	xor	$12,$20
	lw	$8,0($3)
	xor	$13,$21
	lw	$9,4($3)
	xor	$14,$22
	lw	$10,8($3)
	xor	$15,$23
	lw	$11,12($3)

	xor	$12,$16
	xor	$13,$17
	xor	$14,$18
	xor	$15,$19

	sub	$30,1
	add $3,16
	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15
	.set	noreorder
	bnez	$30,.Loop_enc
	srl	$1,$9,14
#endif

	.set	reorder
	srl	$2,$10,14
	srl	$24,$11,14
	srl	$25,$8,14
	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$12,2($1)		# Te4[s1>>16]
	srl	$1,$10,6
	lbu	$13,2($2)		# Te4[s2>>16]
	srl	$2,$11,6
	lbu	$14,2($24)		# Te4[s3>>16]
	srl	$24,$8,6
	lbu	$15,2($25)		# Te4[s0>>16]
	srl	$25,$9,6

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
# if defined(_MIPSEL)
	lbu	$16,2($1)		# Te4[s2>>8]
	ins $1,$8,2,8
	lbu	$17,2($2)		# Te4[s3>>8]
	ins $2,$9,2,8
	lbu	$18,2($24)		# Te4[s0>>8]
	ins $24,$10,2,8
	lbu	$19,2($25)		# Te4[s1>>8]
	ins $25,$11,2,8

	lbu	$20,2($1)		# Te4[s0>>24]
	sll	$1,$11,2
	lbu	$21,2($2)		# Te4[s1>>24]
	sll	$2,$8,2
	lbu	$22,2($24)		# Te4[s2>>24]
	sll	$24,$9,2
	lbu	$23,2($25)		# Te4[s3>>24]
	sll	$25,$10,2

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
# else
	lbu	$16,2($1)		# Te4[s2>>8]
	srl	$1,$8,22
	lbu	$17,2($2)		# Te4[s3>>8]
	srl	$2,$9,22
	lbu	$18,2($24)		# Te4[s0>>8]
	srl	$24,$10,22
	lbu	$19,2($25)		# Te4[s1>>8]
	srl	$25,$11,22

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$20,2($1)		# Te4[s0>>24]
	ins $1,$11,2,8
	lbu	$21,2($2)		# Te4[s1>>24]
	ins $2,$8,2,8
	lbu	$22,2($24)		# Te4[s2>>24]
	ins $24,$9,2,8
	lbu	$23,2($25)		# Te4[s3>>24]
	ins $25,$10,2,8
# endif
	sll	$12,$12,16
	sll	$13,$13,16
	sll	$14,$14,16
	sll	$15,$15,16

	ins	$12,$16,8,8
	lbu	$16,2($1)		# Te4[s3]
	ins	$13,$17,8,8
	lbu	$17,2($2)		# Te4[s0]
	ins	$14,$18,8,8
	lbu	$18,2($24)		# Te4[s1]
	ins	$15,$19,8,8
	lbu	$19,2($25)		# Te4[s2]

	ins	$12,$20,24,8
	lw	$8,0($3)
	ins	$13,$21,24,8
	lw	$9,4($3)
	ins	$14,$22,24,8
	lw	$10,8($3)
	ins	$15,$23,24,8
	lw	$11,12($3)

	ins	$12,$16,0,8
	ins	$13,$17,0,8
	ins	$14,$18,0,8
	ins	$15,$19,0,8
#else
	lbu	$16,2($1)		# Te4[s2>>8]
	srl	$1,$8,22
	lbu	$17,2($2)		# Te4[s3>>8]
	srl	$2,$9,22
	lbu	$18,2($24)		# Te4[s0>>8]
	srl	$24,$10,22
	lbu	$19,2($25)		# Te4[s1>>8]
	srl	$25,$11,22

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$20,2($1)		# Te4[s0>>24]
	sll	$1,$11,2
	lbu	$21,2($2)		# Te4[s1>>24]
	sll	$2,$8,2
	lbu	$22,2($24)		# Te4[s2>>24]
	sll	$24,$9,2
	lbu	$23,2($25)		# Te4[s3>>24]
	sll	$25,$10,2

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7

	sll	$12,$12,16
	sll	$13,$13,16
	sll	$14,$14,16
	sll	$15,$15,16

	sll	$16,$16,8
	sll	$17,$17,8
	sll	$18,$18,8
	sll	$19,$19,8

	xor	$12,$16
	lbu	$16,2($1)		# Te4[s3]
	xor	$13,$17
	lbu	$17,2($2)		# Te4[s0]
	xor	$14,$18
	lbu	$18,2($24)		# Te4[s1]
	xor	$15,$19
	lbu	$19,2($25)		# Te4[s2]

	sll	$20,$20,24
	lw	$8,0($3)
	sll	$21,$21,24
	lw	$9,4($3)
	sll	$22,$22,24
	lw	$10,8($3)
	sll	$23,$23,24
	lw	$11,12($3)

	xor	$12,$20
	xor	$13,$21
	xor	$14,$22
	xor	$15,$23

	#sll	$16,$16,0
	#sll	$17,$17,0
	#sll	$18,$18,0
	#sll	$19,$19,0

	xor	$12,$16
	xor	$13,$17
	xor	$14,$18
	xor	$15,$19
#endif
	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15

	jr	$31
.end	_mips_AES_encrypt

.align	5
.globl	AES_encrypt
.ent	AES_encrypt
AES_encrypt:
	.frame	$29,64,$31
	.mask	0xc0ff0000,-4
	.set	noreorder
	.cpload	$25
	sub $29,64
	sw	$31,64-1*4($29)
	sw	$30,64-2*4($29)
	sw	$23,64-3*4($29)
	sw	$22,64-4*4($29)
	sw	$21,64-5*4($29)
	sw	$20,64-6*4($29)
	sw	$19,64-7*4($29)
	sw	$18,64-8*4($29)
	sw	$17,64-9*4($29)
	sw	$16,64-10*4($29)
	.set	reorder
	la	$7,AES_Te		# PIC-ified 'load address'

	lwl	$8,0+0($4)
	lwl	$9,4+0($4)
	lwl	$10,8+0($4)
	lwl	$11,12+0($4)
	lwr	$8,0+3($4)
	lwr	$9,4+3($4)
	lwr	$10,8+3($4)
	lwr	$11,12+3($4)

	bal	_mips_AES_encrypt

	swr	$8,0+3($5)
	swr	$9,4+3($5)
	swr	$10,8+3($5)
	swr	$11,12+3($5)
	swl	$8,0+0($5)
	swl	$9,4+0($5)
	swl	$10,8+0($5)
	swl	$11,12+0($5)

	.set	noreorder
	lw	$31,64-1*4($29)
	lw	$30,64-2*4($29)
	lw	$23,64-3*4($29)
	lw	$22,64-4*4($29)
	lw	$21,64-5*4($29)
	lw	$20,64-6*4($29)
	lw	$19,64-7*4($29)
	lw	$18,64-8*4($29)
	lw	$17,64-9*4($29)
	lw	$16,64-10*4($29)
	jr	$31
	add $29,64
.end	AES_encrypt
.align	5
.ent	_mips_AES_decrypt
_mips_AES_decrypt:
	.frame	$29,0,$31
	.set	reorder
	lw	$12,0($6)
	lw	$13,4($6)
	lw	$14,8($6)
	lw	$15,12($6)
	lw	$30,240($6)
	add $3,$6,16

	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15

	sub	$30,1
#if defined(__mips_smartmips)
	ext	$1,$11,16,8
.Loop_dec:
	ext	$2,$8,16,8
	ext	$24,$9,16,8
	ext	$25,$10,16,8
	lwxs	$12,$1($7)		# Td1[s3>>16]
	ext	$1,$10,8,8
	lwxs	$13,$2($7)		# Td1[s0>>16]
	ext	$2,$11,8,8
	lwxs	$14,$24($7)		# Td1[s1>>16]
	ext	$24,$8,8,8
	lwxs	$15,$25($7)		# Td1[s2>>16]
	ext	$25,$9,8,8

	lwxs	$16,$1($7)		# Td2[s2>>8]
	ext	$1,$9,0,8
	lwxs	$17,$2($7)		# Td2[s3>>8]
	ext	$2,$10,0,8
	lwxs	$18,$24($7)		# Td2[s0>>8]
	ext	$24,$11,0,8
	lwxs	$19,$25($7)		# Td2[s1>>8]
	ext	$25,$8,0,8

	lwxs	$20,$1($7)		# Td3[s1]
	ext	$1,$8,24,8
	lwxs	$21,$2($7)		# Td3[s2]
	ext	$2,$9,24,8
	lwxs	$22,$24($7)		# Td3[s3]
	ext	$24,$10,24,8
	lwxs	$23,$25($7)		# Td3[s0]
	ext	$25,$11,24,8

	rotr	$12,$12,8
	rotr	$13,$13,8
	rotr	$14,$14,8
	rotr	$15,$15,8

	rotr	$16,$16,16
	rotr	$17,$17,16
	rotr	$18,$18,16
	rotr	$19,$19,16

	xor	$12,$16
	lwxs	$16,$1($7)		# Td0[s0>>24]
	xor	$13,$17
	lwxs	$17,$2($7)		# Td0[s1>>24]
	xor	$14,$18
	lwxs	$18,$24($7)		# Td0[s2>>24]
	xor	$15,$19
	lwxs	$19,$25($7)		# Td0[s3>>24]

	rotr	$20,$20,24
	lw	$8,0($3)
	rotr	$21,$21,24
	lw	$9,4($3)
	rotr	$22,$22,24
	lw	$10,8($3)
	rotr	$23,$23,24
	lw	$11,12($3)

	xor	$12,$20
	xor	$13,$21
	xor	$14,$22
	xor	$15,$23

	xor	$12,$16
	xor	$13,$17
	xor	$14,$18
	xor	$15,$19

	sub	$30,1
	add $3,16
	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15
	.set	noreorder
	bnez	$30,.Loop_dec
	ext	$1,$11,16,8

	srl	$1,$11,14
#else
	srl	$1,$11,14
.Loop_dec:
	srl	$2,$8,14
	srl	$24,$9,14
	srl	$25,$10,14
	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	lw	$12,0($1)		# Td1[s3>>16]
	srl	$1,$10,6
	lw	$13,0($2)		# Td1[s0>>16]
	srl	$2,$11,6
	lw	$14,0($24)		# Td1[s1>>16]
	srl	$24,$8,6
	lw	$15,0($25)		# Td1[s2>>16]
	srl	$25,$9,6
#else
	lwl	$12,3($1)		# Td1[s3>>16]
	lwl	$13,3($2)		# Td1[s0>>16]
	lwl	$14,3($24)		# Td1[s1>>16]
	lwl	$15,3($25)		# Td1[s2>>16]
	lwr	$12,2($1)		# Td1[s3>>16]
	srl	$1,$10,6
	lwr	$13,2($2)		# Td1[s0>>16]
	srl	$2,$11,6
	lwr	$14,2($24)		# Td1[s1>>16]
	srl	$24,$8,6
	lwr	$15,2($25)		# Td1[s2>>16]
	srl	$25,$9,6
#endif

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	rotr	$12,$12,8
	rotr	$13,$13,8
	rotr	$14,$14,8
	rotr	$15,$15,8
# if defined(_MIPSEL)
	lw	$16,0($1)		# Td2[s2>>8]
	sll	$1,$9,2
	lw	$17,0($2)		# Td2[s3>>8]
	sll	$2,$10,2
	lw	$18,0($24)		# Td2[s0>>8]
	sll	$24,$11,2
	lw	$19,0($25)		# Td2[s1>>8]
	sll	$25,$8,2

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lw	$20,0($1)		# Td3[s1]
	ins $1,$8,2,8
	lw	$21,0($2)		# Td3[s2]
	ins $2,$9,2,8
	lw	$22,0($24)		# Td3[s3]
	ins $24,$10,2,8
	lw	$23,0($25)		# Td3[s0]
	ins $25,$11,2,8
#else
	lw	$16,0($1)		# Td2[s2>>8]
	ins $1,$9,2,8
	lw	$17,0($2)		# Td2[s3>>8]
	ins $2,$10,2,8
	lw	$18,0($24)		# Td2[s0>>8]
	ins $24,$11,2,8
	lw	$19,0($25)		# Td2[s1>>8]
	ins $25,$8,2,8

	lw	$20,0($1)		# Td3[s1]
	srl	$1,$8,22
	lw	$21,0($2)		# Td3[s2]
	srl	$2,$9,22
	lw	$22,0($24)		# Td3[s3]
	srl	$24,$10,22
	lw	$23,0($25)		# Td3[s0]
	srl	$25,$11,22

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#endif
	rotr	$16,$16,16
	rotr	$17,$17,16
	rotr	$18,$18,16
	rotr	$19,$19,16

	rotr	$20,$20,24
	rotr	$21,$21,24
	rotr	$22,$22,24
	rotr	$23,$23,24
#else
	lwl	$16,2($1)		# Td2[s2>>8]
	lwl	$17,2($2)		# Td2[s3>>8]
	lwl	$18,2($24)		# Td2[s0>>8]
	lwl	$19,2($25)		# Td2[s1>>8]
	lwr	$16,1($1)		# Td2[s2>>8]
	sll	$1,$9,2
	lwr	$17,1($2)		# Td2[s3>>8]
	sll	$2,$10,2
	lwr	$18,1($24)		# Td2[s0>>8]
	sll	$24,$11,2
	lwr	$19,1($25)		# Td2[s1>>8]
	sll	$25,$8,2

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lwl	$20,1($1)		# Td3[s1]
	lwl	$21,1($2)		# Td3[s2]
	lwl	$22,1($24)		# Td3[s3]
	lwl	$23,1($25)		# Td3[s0]
	lwr	$20,0($1)		# Td3[s1]
	srl	$1,$8,22
	lwr	$21,0($2)		# Td3[s2]
	srl	$2,$9,22
	lwr	$22,0($24)		# Td3[s3]
	srl	$24,$10,22
	lwr	$23,0($25)		# Td3[s0]
	srl	$25,$11,22

	and	$1,0x3fc
	and	$2,0x3fc
	and	$24,0x3fc
	and	$25,0x3fc
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#endif

	xor	$12,$16
	lw	$16,0($1)		# Td0[s0>>24]
	xor	$13,$17
	lw	$17,0($2)		# Td0[s1>>24]
	xor	$14,$18
	lw	$18,0($24)		# Td0[s2>>24]
	xor	$15,$19
	lw	$19,0($25)		# Td0[s3>>24]

	xor	$12,$20
	lw	$8,0($3)
	xor	$13,$21
	lw	$9,4($3)
	xor	$14,$22
	lw	$10,8($3)
	xor	$15,$23
	lw	$11,12($3)

	xor	$12,$16
	xor	$13,$17
	xor	$14,$18
	xor	$15,$19

	sub	$30,1
	add $3,16
	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15
	.set	noreorder
	bnez	$30,.Loop_dec
	srl	$1,$11,14
#endif

	.set	reorder
	lw	$16,1024($7)		# prefetch Td4
	srl	$1,$11,16
	lw	$17,1024+32($7)
	srl	$2,$8,16
	lw	$18,1024+64($7)
	srl	$24,$9,16
	lw	$19,1024+96($7)
	srl	$25,$10,16
	lw	$20,1024+128($7)
	and	$1,0xff
	lw	$21,1024+160($7)
	and	$2,0xff
	lw	$22,1024+192($7)
	and	$24,0xff
	lw	$23,1024+224($7)
	and	$25,0xff

	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$12,1024($1)		# Td4[s3>>16]
	srl	$1,$10,8
	lbu	$13,1024($2)		# Td4[s0>>16]
	srl	$2,$11,8
	lbu	$14,1024($24)		# Td4[s1>>16]
	srl	$24,$8,8
	lbu	$15,1024($25)		# Td4[s2>>16]
	srl	$25,$9,8

	and	$1,0xff
	and	$2,0xff
	and	$24,0xff
	and	$25,0xff
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
# if defined(_MIPSEL)
	lbu	$16,1024($1)		# Td4[s2>>8]
	ins $1,$8,0,8
	lbu	$17,1024($2)		# Td4[s3>>8]
	ins $2,$9,0,8
	lbu	$18,1024($24)		# Td4[s0>>8]
	ins $24,$10,0,8
	lbu	$19,1024($25)		# Td4[s1>>8]
	ins $25,$11,0,8

	lbu	$20,1024($1)		# Td4[s0>>24]
	and	$1,$9,0xff
	lbu	$21,1024($2)		# Td4[s1>>24]
	and	$2,$10,0xff
	lbu	$22,1024($24)		# Td4[s2>>24]
	and	$24,$11,0xff
	lbu	$23,1024($25)		# Td4[s3>>24]
	and	$25,$8,0xff

	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
# else
	lbu	$16,1024($1)		# Td4[s2>>8]
	srl	$1,$8,24
	lbu	$17,1024($2)		# Td4[s3>>8]
	srl	$2,$9,24
	lbu	$18,1024($24)		# Td4[s0>>8]
	srl	$24,$10,24
	lbu	$19,1024($25)		# Td4[s1>>8]
	srl	$25,$11,24

	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$20,1024($1)		# Td4[s0>>24]
	ins $1,$9,0,8
	lbu	$21,1024($2)		# Td4[s1>>24]
	ins $2,$10,0,8
	lbu	$22,1024($24)		# Td4[s2>>24]
	ins $24,$11,0,8
	lbu	$23,1024($25)		# Td4[s3>>24]
	ins $25,$8,0,8
# endif
	sll	$12,$12,16
	sll	$13,$13,16
	sll	$14,$14,16
	sll	$15,$15,16

	ins	$12,$16,8,8
	lbu	$16,1024($1)		# Td4[s1]
	ins	$13,$17,8,8
	lbu	$17,1024($2)		# Td4[s2]
	ins	$14,$18,8,8
	lbu	$18,1024($24)		# Td4[s3]
	ins	$15,$19,8,8
	lbu	$19,1024($25)		# Td4[s0]

	ins	$12,$20,24,8
	lw	$8,0($3)
	ins	$13,$21,24,8
	lw	$9,4($3)
	ins	$14,$22,24,8
	lw	$10,8($3)
	ins	$15,$23,24,8
	lw	$11,12($3)

	ins	$12,$16,0,8
	ins	$13,$17,0,8
	ins	$14,$18,0,8
	ins	$15,$19,0,8
#else
	lbu	$16,1024($1)		# Td4[s2>>8]
	srl	$1,$8,24
	lbu	$17,1024($2)		# Td4[s3>>8]
	srl	$2,$9,24
	lbu	$18,1024($24)		# Td4[s0>>8]
	srl	$24,$10,24
	lbu	$19,1024($25)		# Td4[s1>>8]
	srl	$25,$11,24

	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$20,1024($1)		# Td4[s0>>24]
	and	$1,$9,0xff
	lbu	$21,1024($2)		# Td4[s1>>24]
	and	$2,$10,0xff
	lbu	$22,1024($24)		# Td4[s2>>24]
	and	$24,$11,0xff
	lbu	$23,1024($25)		# Td4[s3>>24]
	and	$25,$8,0xff

	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7

	sll	$12,$12,16
	sll	$13,$13,16
	sll	$14,$14,16
	sll	$15,$15,16

	sll	$16,$16,8
	sll	$17,$17,8
	sll	$18,$18,8
	sll	$19,$19,8

	xor	$12,$16
	lbu	$16,1024($1)		# Td4[s1]
	xor	$13,$17
	lbu	$17,1024($2)		# Td4[s2]
	xor	$14,$18
	lbu	$18,1024($24)		# Td4[s3]
	xor	$15,$19
	lbu	$19,1024($25)		# Td4[s0]

	sll	$20,$20,24
	lw	$8,0($3)
	sll	$21,$21,24
	lw	$9,4($3)
	sll	$22,$22,24
	lw	$10,8($3)
	sll	$23,$23,24
	lw	$11,12($3)

	xor	$12,$20
	xor	$13,$21
	xor	$14,$22
	xor	$15,$23

	#sll	$16,$16,0
	#sll	$17,$17,0
	#sll	$18,$18,0
	#sll	$19,$19,0

	xor	$12,$16
	xor	$13,$17
	xor	$14,$18
	xor	$15,$19
#endif

	xor	$8,$12
	xor	$9,$13
	xor	$10,$14
	xor	$11,$15

	jr	$31
.end	_mips_AES_decrypt

.align	5
.globl	AES_decrypt
.ent	AES_decrypt
AES_decrypt:
	.frame	$29,64,$31
	.mask	0xc0ff0000,-4
	.set	noreorder
	.cpload	$25
	sub $29,64
	sw	$31,64-1*4($29)
	sw	$30,64-2*4($29)
	sw	$23,64-3*4($29)
	sw	$22,64-4*4($29)
	sw	$21,64-5*4($29)
	sw	$20,64-6*4($29)
	sw	$19,64-7*4($29)
	sw	$18,64-8*4($29)
	sw	$17,64-9*4($29)
	sw	$16,64-10*4($29)
	.set	reorder
	la	$7,AES_Td		# PIC-ified 'load address'

	lwl	$8,0+0($4)
	lwl	$9,4+0($4)
	lwl	$10,8+0($4)
	lwl	$11,12+0($4)
	lwr	$8,0+3($4)
	lwr	$9,4+3($4)
	lwr	$10,8+3($4)
	lwr	$11,12+3($4)

	bal	_mips_AES_decrypt

	swr	$8,0+3($5)
	swr	$9,4+3($5)
	swr	$10,8+3($5)
	swr	$11,12+3($5)
	swl	$8,0+0($5)
	swl	$9,4+0($5)
	swl	$10,8+0($5)
	swl	$11,12+0($5)

	.set	noreorder
	lw	$31,64-1*4($29)
	lw	$30,64-2*4($29)
	lw	$23,64-3*4($29)
	lw	$22,64-4*4($29)
	lw	$21,64-5*4($29)
	lw	$20,64-6*4($29)
	lw	$19,64-7*4($29)
	lw	$18,64-8*4($29)
	lw	$17,64-9*4($29)
	lw	$16,64-10*4($29)
	jr	$31
	add $29,64
.end	AES_decrypt
.align	5
.ent	_mips_AES_set_encrypt_key
_mips_AES_set_encrypt_key:
	.frame	$29,0,$31
	.set	noreorder
	beqz	$4,.Lekey_done
	li	$2,-1
	beqz	$6,.Lekey_done
	add $3,$7,256

	.set	reorder
	lwl	$8,0+0($4)	# load 128 bits
	lwl	$9,4+0($4)
	lwl	$10,8+0($4)
	lwl	$11,12+0($4)
	li	$1,128
	lwr	$8,0+3($4)
	lwr	$9,4+3($4)
	lwr	$10,8+3($4)
	lwr	$11,12+3($4)
	.set	noreorder
	beq	$5,$1,.L128bits
	li	$30,10

	.set	reorder
	lwl	$12,16+0($4)	# load 192 bits
	lwl	$13,20+0($4)
	li	$1,192
	lwr	$12,16+3($4)
	lwr	$13,20+3($4)
	.set	noreorder
	beq	$5,$1,.L192bits
	li	$30,8

	.set	reorder
	lwl	$14,24+0($4)	# load 256 bits
	lwl	$15,28+0($4)
	li	$1,256
	lwr	$14,24+3($4)
	lwr	$15,28+3($4)
	.set	noreorder
	beq	$5,$1,.L256bits
	li	$30,7

	b	.Lekey_done
	li	$2,-2

.align	4
.L128bits:
	.set	reorder
	srl	$1,$11,16
	srl	$2,$11,8
	and	$1,0xff
	and	$2,0xff
	and	$24,$11,0xff
	srl	$25,$11,24
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$1,0($1)
	lbu	$2,0($2)
	lbu	$24,0($24)
	lbu	$25,0($25)

	sw	$8,0($6)
	sw	$9,4($6)
	sw	$10,8($6)
	sw	$11,12($6)
	sub	$30,1
	add $6,16

	sll	$1,$1,24
	sll	$2,$2,16
	sll	$24,$24,8
	#sll	$25,$25,0

	xor	$8,$1
	lw	$1,0($3)
	xor	$8,$2
	xor	$8,$24
	xor	$8,$25
	xor	$8,$1

	xor	$9,$8
	xor	$10,$9
	xor	$11,$10

	.set	noreorder
	bnez	$30,.L128bits
	add $3,4

	sw	$8,0($6)
	sw	$9,4($6)
	sw	$10,8($6)
	li	$30,10
	sw	$11,12($6)
	li	$2,0
	sw	$30,80($6)
	b	.Lekey_done
	sub $6,10*16

.align	4
.L192bits:
	.set	reorder
	srl	$1,$13,16
	srl	$2,$13,8
	and	$1,0xff
	and	$2,0xff
	and	$24,$13,0xff
	srl	$25,$13,24
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$1,0($1)
	lbu	$2,0($2)
	lbu	$24,0($24)
	lbu	$25,0($25)

	sw	$8,0($6)
	sw	$9,4($6)
	sw	$10,8($6)
	sw	$11,12($6)
	sw	$12,16($6)
	sw	$13,20($6)
	sub	$30,1
	add $6,24

	sll	$1,$1,24
	sll	$2,$2,16
	sll	$24,$24,8
	#sll	$25,$25,0

	xor	$8,$1
	lw	$1,0($3)
	xor	$8,$2
	xor	$8,$24
	xor	$8,$25
	xor	$8,$1

	xor	$9,$8
	xor	$10,$9
	xor	$11,$10
	xor	$12,$11
	xor	$13,$12

	.set	noreorder
	bnez	$30,.L192bits
	add $3,4

	sw	$8,0($6)
	sw	$9,4($6)
	sw	$10,8($6)
	li	$30,12
	sw	$11,12($6)
	li	$2,0
	sw	$30,48($6)
	b	.Lekey_done
	sub $6,12*16

.align	4
.L256bits:
	.set	reorder
	srl	$1,$15,16
	srl	$2,$15,8
	and	$1,0xff
	and	$2,0xff
	and	$24,$15,0xff
	srl	$25,$15,24
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$1,0($1)
	lbu	$2,0($2)
	lbu	$24,0($24)
	lbu	$25,0($25)

	sw	$8,0($6)
	sw	$9,4($6)
	sw	$10,8($6)
	sw	$11,12($6)
	sw	$12,16($6)
	sw	$13,20($6)
	sw	$14,24($6)
	sw	$15,28($6)
	sub	$30,1

	sll	$1,$1,24
	sll	$2,$2,16
	sll	$24,$24,8
	#sll	$25,$25,0

	xor	$8,$1
	lw	$1,0($3)
	xor	$8,$2
	xor	$8,$24
	xor	$8,$25
	xor	$8,$1

	xor	$9,$8
	xor	$10,$9
	xor	$11,$10
	beqz	$30,.L256bits_done

	srl	$1,$11,24
	srl	$2,$11,16
	srl	$24,$11,8
	and	$25,$11,0xff
	and	$2,0xff
	and	$24,0xff
	add $1,$7
	add $2,$7
	add $24,$7
	add $25,$7
	lbu	$1,0($1)
	lbu	$2,0($2)
	lbu	$24,0($24)
	lbu	$25,0($25)
	sll	$1,24
	sll	$2,16
	sll	$24,8

	xor	$12,$1
	xor	$12,$2
	xor	$12,$24
	xor	$12,$25

	xor	$13,$12
	xor	$14,$13
	xor	$15,$14

	add $6,32
	.set	noreorder
	b	.L256bits
	add $3,4

.L256bits_done:
	sw	$8,32($6)
	sw	$9,36($6)
	sw	$10,40($6)
	li	$30,14
	sw	$11,44($6)
	li	$2,0
	sw	$30,48($6)
	sub $6,12*16

.Lekey_done:
	jr	$31
	nop
.end	_mips_AES_set_encrypt_key

.globl	AES_set_encrypt_key
.ent	AES_set_encrypt_key
AES_set_encrypt_key:
	.frame	$29,32,$31
	.mask	0xc0000000,-4
	.set	noreorder
	.cpload	$25
	sub $29,32
	sw	$31,32-1*4($29)
	sw	$30,32-2*4($29)
	.set	reorder
	la	$7,AES_Te4		# PIC-ified 'load address'

	bal	_mips_AES_set_encrypt_key

	.set	noreorder
	move	$4,$2
	lw	$31,32-1*4($29)
	lw	$30,32-2*4($29)
	jr	$31
	add $29,32
.end	AES_set_encrypt_key
.align	5
.globl	AES_set_decrypt_key
.ent	AES_set_decrypt_key
AES_set_decrypt_key:
	.frame	$29,32,$31
	.mask	0xc0000000,-4
	.set	noreorder
	.cpload	$25
	sub $29,32
	sw	$31,32-1*4($29)
	sw	$30,32-2*4($29)
	.set	reorder
	la	$7,AES_Te4		# PIC-ified 'load address'

	bal	_mips_AES_set_encrypt_key

	bltz	$2,.Ldkey_done

	sll	$1,$30,4
	add $4,$6,0
	add $5,$6,$1
.align	4
.Lswap:
	lw	$8,0($4)
	lw	$9,4($4)
	lw	$10,8($4)
	lw	$11,12($4)
	lw	$12,0($5)
	lw	$13,4($5)
	lw	$14,8($5)
	lw	$15,12($5)
	sw	$8,0($5)
	sw	$9,4($5)
	sw	$10,8($5)
	sw	$11,12($5)
	add $4,16
	sub $5,16
	sw	$12,-16($4)
	sw	$13,-12($4)
	sw	$14,-8($4)
	sw	$15,-4($4)
	bne	$4,$5,.Lswap

	lw	$8,16($6)		# modulo-scheduled
	lui	$2,0x8080
	sub	$30,1
	or	$2,0x8080
	sll	$30,2
	add $6,16
	lui	$25,0x1b1b
	nor	$24,$0,$2
	or	$25,0x1b1b
.align	4
.Lmix:
	and	$1,$8,$2
	and	$9,$8,$24
	srl	$10,$1,7
	addu	$9,$9		# tp2<<1
	subu	$1,$10
	and	$1,$25
	xor	$9,$1

	and	$1,$9,$2
	and	$10,$9,$24
	srl	$11,$1,7
	addu	$10,$10		# tp4<<1
	subu	$1,$11
	and	$1,$25
	xor	$10,$1

	and	$1,$10,$2
	and	$11,$10,$24
	srl	$12,$1,7
	addu	$11,$11		# tp8<<1
	subu	$1,$12
	and	$1,$25
	xor	$11,$1

	xor	$12,$11,$8
	xor	$15,$11,$10
	xor	$13,$12,$9
	xor	$14,$12,$10

#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	rotr	$8,$14,16
	 xor	$15,$9
	rotr	$9,$12,8
	xor	$15,$8
	rotr	$10,$13,24
	xor	$15,$9
	lw	$8,4($6)		# modulo-scheduled
	xor	$15,$10
#else
	srl	$8,$14,16
	 xor	$15,$9
	sll	$9,$14,16
	xor	$15,$8
	srl	$8,$12,8
	xor	$15,$9
	sll	$9,$12,24
	xor	$15,$8
	srl	$8,$13,24
	xor	$15,$9
	sll	$9,$13,8
	xor	$15,$8
	lw	$8,4($6)		# modulo-scheduled
	xor	$15,$9
#endif
	sub	$30,1
	sw	$15,0($6)
	add $6,4
	bnez	$30,.Lmix

	li	$2,0
.Ldkey_done:
	.set	noreorder
	move	$4,$2
	lw	$31,32-1*4($29)
	lw	$30,32-2*4($29)
	jr	$31
	add $29,32
.end	AES_set_decrypt_key
.rdata
.align	10
AES_Te:
.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84	# Te0
.byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
.byte	0xff,0xf2,0xf2,0x0d,	0xd6,0x6b,0x6b,0xbd
.byte	0xde,0x6f,0x6f,0xb1,	0x91,0xc5,0xc5,0x54
.byte	0x60,0x30,0x30,0x50,	0x02,0x01,0x01,0x03
.byte	0xce,0x67,0x67,0xa9,	0x56,0x2b,0x2b,0x7d
.byte	0xe7,0xfe,0xfe,0x19,	0xb5,0xd7,0xd7,0x62
.byte	0x4d,0xab,0xab,0xe6,	0xec,0x76,0x76,0x9a
.byte	0x8f,0xca,0xca,0x45,	0x1f,0x82,0x82,0x9d
.byte	0x89,0xc9,0xc9,0x40,	0xfa,0x7d,0x7d,0x87
.byte	0xef,0xfa,0xfa,0x15,	0xb2,0x59,0x59,0xeb
.byte	0x8e,0x47,0x47,0xc9,	0xfb,0xf0,0xf0,0x0b
.byte	0x41,0xad,0xad,0xec,	0xb3,0xd4,0xd4,0x67
.byte	0x5f,0xa2,0xa2,0xfd,	0x45,0xaf,0xaf,0xea
.byte	0x23,0x9c,0x9c,0xbf,	0x53,0xa4,0xa4,0xf7
.byte	0xe4,0x72,0x72,0x96,	0x9b,0xc0,0xc0,0x5b
.byte	0x75,0xb7,0xb7,0xc2,	0xe1,0xfd,0xfd,0x1c
.byte	0x3d,0x93,0x93,0xae,	0x4c,0x26,0x26,0x6a
.byte	0x6c,0x36,0x36,0x5a,	0x7e,0x3f,0x3f,0x41
.byte	0xf5,0xf7,0xf7,0x02,	0x83,0xcc,0xcc,0x4f
.byte	0x68,0x34,0x34,0x5c,	0x51,0xa5,0xa5,0xf4
.byte	0xd1,0xe5,0xe5,0x34,	0xf9,0xf1,0xf1,0x08
.byte	0xe2,0x71,0x71,0x93,	0xab,0xd8,0xd8,0x73
.byte	0x62,0x31,0x31,0x53,	0x2a,0x15,0x15,0x3f
.byte	0x08,0x04,0x04,0x0c,	0x95,0xc7,0xc7,0x52
.byte	0x46,0x23,0x23,0x65,	0x9d,0xc3,0xc3,0x5e
.byte	0x30,0x18,0x18,0x28,	0x37,0x96,0x96,0xa1
.byte	0x0a,0x05,0x05,0x0f,	0x2f,0x9a,0x9a,0xb5
.byte	0x0e,0x07,0x07,0x09,	0x24,0x12,0x12,0x36
.byte	0x1b,0x80,0x80,0x9b,	0xdf,0xe2,0xe2,0x3d
.byte	0xcd,0xeb,0xeb,0x26,	0x4e,0x27,0x27,0x69
.byte	0x7f,0xb2,0xb2,0xcd,	0xea,0x75,0x75,0x9f
.byte	0x12,0x09,0x09,0x1b,	0x1d,0x83,0x83,0x9e
.byte	0x58,0x2c,0x2c,0x74,	0x34,0x1a,0x1a,0x2e
.byte	0x36,0x1b,0x1b,0x2d,	0xdc,0x6e,0x6e,0xb2
.byte	0xb4,0x5a,0x5a,0xee,	0x5b,0xa0,0xa0,0xfb
.byte	0xa4,0x52,0x52,0xf6,	0x76,0x3b,0x3b,0x4d
.byte	0xb7,0xd6,0xd6,0x61,	0x7d,0xb3,0xb3,0xce
.byte	0x52,0x29,0x29,0x7b,	0xdd,0xe3,0xe3,0x3e
.byte	0x5e,0x2f,0x2f,0x71,	0x13,0x84,0x84,0x97
.byte	0xa6,0x53,0x53,0xf5,	0xb9,0xd1,0xd1,0x68
.byte	0x00,0x00,0x00,0x00,	0xc1,0xed,0xed,0x2c
.byte	0x40,0x20,0x20,0x60,	0xe3,0xfc,0xfc,0x1f
.byte	0x79,0xb1,0xb1,0xc8,	0xb6,0x5b,0x5b,0xed
.byte	0xd4,0x6a,0x6a,0xbe,	0x8d,0xcb,0xcb,0x46
.byte	0x67,0xbe,0xbe,0xd9,	0x72,0x39,0x39,0x4b
.byte	0x94,0x4a,0x4a,0xde,	0x98,0x4c,0x4c,0xd4
.byte	0xb0,0x58,0x58,0xe8,	0x85,0xcf,0xcf,0x4a
.byte	0xbb,0xd0,0xd0,0x6b,	0xc5,0xef,0xef,0x2a
.byte	0x4f,0xaa,0xaa,0xe5,	0xed,0xfb,0xfb,0x16
.byte	0x86,0x43,0x43,0xc5,	0x9a,0x4d,0x4d,0xd7
.byte	0x66,0x33,0x33,0x55,	0x11,0x85,0x85,0x94
.byte	0x8a,0x45,0x45,0xcf,	0xe9,0xf9,0xf9,0x10
.byte	0x04,0x02,0x02,0x06,	0xfe,0x7f,0x7f,0x81
.byte	0xa0,0x50,0x50,0xf0,	0x78,0x3c,0x3c,0x44
.byte	0x25,0x9f,0x9f,0xba,	0x4b,0xa8,0xa8,0xe3
.byte	0xa2,0x51,0x51,0xf3,	0x5d,0xa3,0xa3,0xfe
.byte	0x80,0x40,0x40,0xc0,	0x05,0x8f,0x8f,0x8a
.byte	0x3f,0x92,0x92,0xad,	0x21,0x9d,0x9d,0xbc
.byte	0x70,0x38,0x38,0x48,	0xf1,0xf5,0xf5,0x04
.byte	0x63,0xbc,0xbc,0xdf,	0x77,0xb6,0xb6,0xc1
.byte	0xaf,0xda,0xda,0x75,	0x42,0x21,0x21,0x63
.byte	0x20,0x10,0x10,0x30,	0xe5,0xff,0xff,0x1a
.byte	0xfd,0xf3,0xf3,0x0e,	0xbf,0xd2,0xd2,0x6d
.byte	0x81,0xcd,0xcd,0x4c,	0x18,0x0c,0x0c,0x14
.byte	0x26,0x13,0x13,0x35,	0xc3,0xec,0xec,0x2f
.byte	0xbe,0x5f,0x5f,0xe1,	0x35,0x97,0x97,0xa2
.byte	0x88,0x44,0x44,0xcc,	0x2e,0x17,0x17,0x39
.byte	0x93,0xc4,0xc4,0x57,	0x55,0xa7,0xa7,0xf2
.byte	0xfc,0x7e,0x7e,0x82,	0x7a,0x3d,0x3d,0x47
.byte	0xc8,0x64,0x64,0xac,	0xba,0x5d,0x5d,0xe7
.byte	0x32,0x19,0x19,0x2b,	0xe6,0x73,0x73,0x95
.byte	0xc0,0x60,0x60,0xa0,	0x19,0x81,0x81,0x98
.byte	0x9e,0x4f,0x4f,0xd1,	0xa3,0xdc,0xdc,0x7f
.byte	0x44,0x22,0x22,0x66,	0x54,0x2a,0x2a,0x7e
.byte	0x3b,0x90,0x90,0xab,	0x0b,0x88,0x88,0x83
.byte	0x8c,0x46,0x46,0xca,	0xc7,0xee,0xee,0x29
.byte	0x6b,0xb8,0xb8,0xd3,	0x28,0x14,0x14,0x3c
.byte	0xa7,0xde,0xde,0x79,	0xbc,0x5e,0x5e,0xe2
.byte	0x16,0x0b,0x0b,0x1d,	0xad,0xdb,0xdb,0x76
.byte	0xdb,0xe0,0xe0,0x3b,	0x64,0x32,0x32,0x56
.byte	0x74,0x3a,0x3a,0x4e,	0x14,0x0a,0x0a,0x1e
.byte	0x92,0x49,0x49,0xdb,	0x0c,0x06,0x06,0x0a
.byte	0x48,0x24,0x24,0x6c,	0xb8,0x5c,0x5c,0xe4
.byte	0x9f,0xc2,0xc2,0x5d,	0xbd,0xd3,0xd3,0x6e
.byte	0x43,0xac,0xac,0xef,	0xc4,0x62,0x62,0xa6
.byte	0x39,0x91,0x91,0xa8,	0x31,0x95,0x95,0xa4
.byte	0xd3,0xe4,0xe4,0x37,	0xf2,0x79,0x79,0x8b
.byte	0xd5,0xe7,0xe7,0x32,	0x8b,0xc8,0xc8,0x43
.byte	0x6e,0x37,0x37,0x59,	0xda,0x6d,0x6d,0xb7
.byte	0x01,0x8d,0x8d,0x8c,	0xb1,0xd5,0xd5,0x64
.byte	0x9c,0x4e,0x4e,0xd2,	0x49,0xa9,0xa9,0xe0
.byte	0xd8,0x6c,0x6c,0xb4,	0xac,0x56,0x56,0xfa
.byte	0xf3,0xf4,0xf4,0x07,	0xcf,0xea,0xea,0x25
.byte	0xca,0x65,0x65,0xaf,	0xf4,0x7a,0x7a,0x8e
.byte	0x47,0xae,0xae,0xe9,	0x10,0x08,0x08,0x18
.byte	0x6f,0xba,0xba,0xd5,	0xf0,0x78,0x78,0x88
.byte	0x4a,0x25,0x25,0x6f,	0x5c,0x2e,0x2e,0x72
.byte	0x38,0x1c,0x1c,0x24,	0x57,0xa6,0xa6,0xf1
.byte	0x73,0xb4,0xb4,0xc7,	0x97,0xc6,0xc6,0x51
.byte	0xcb,0xe8,0xe8,0x23,	0xa1,0xdd,0xdd,0x7c
.byte	0xe8,0x74,0x74,0x9c,	0x3e,0x1f,0x1f,0x21
.byte	0x96,0x4b,0x4b,0xdd,	0x61,0xbd,0xbd,0xdc
.byte	0x0d,0x8b,0x8b,0x86,	0x0f,0x8a,0x8a,0x85
.byte	0xe0,0x70,0x70,0x90,	0x7c,0x3e,0x3e,0x42
.byte	0x71,0xb5,0xb5,0xc4,	0xcc,0x66,0x66,0xaa
.byte	0x90,0x48,0x48,0xd8,	0x06,0x03,0x03,0x05
.byte	0xf7,0xf6,0xf6,0x01,	0x1c,0x0e,0x0e,0x12
.byte	0xc2,0x61,0x61,0xa3,	0x6a,0x35,0x35,0x5f
.byte	0xae,0x57,0x57,0xf9,	0x69,0xb9,0xb9,0xd0
.byte	0x17,0x86,0x86,0x91,	0x99,0xc1,0xc1,0x58
.byte	0x3a,0x1d,0x1d,0x27,	0x27,0x9e,0x9e,0xb9
.byte	0xd9,0xe1,0xe1,0x38,	0xeb,0xf8,0xf8,0x13
.byte	0x2b,0x98,0x98,0xb3,	0x22,0x11,0x11,0x33
.byte	0xd2,0x69,0x69,0xbb,	0xa9,0xd9,0xd9,0x70
.byte	0x07,0x8e,0x8e,0x89,	0x33,0x94,0x94,0xa7
.byte	0x2d,0x9b,0x9b,0xb6,	0x3c,0x1e,0x1e,0x22
.byte	0x15,0x87,0x87,0x92,	0xc9,0xe9,0xe9,0x20
.byte	0x87,0xce,0xce,0x49,	0xaa,0x55,0x55,0xff
.byte	0x50,0x28,0x28,0x78,	0xa5,0xdf,0xdf,0x7a
.byte	0x03,0x8c,0x8c,0x8f,	0x59,0xa1,0xa1,0xf8
.byte	0x09,0x89,0x89,0x80,	0x1a,0x0d,0x0d,0x17
.byte	0x65,0xbf,0xbf,0xda,	0xd7,0xe6,0xe6,0x31
.byte	0x84,0x42,0x42,0xc6,	0xd0,0x68,0x68,0xb8
.byte	0x82,0x41,0x41,0xc3,	0x29,0x99,0x99,0xb0
.byte	0x5a,0x2d,0x2d,0x77,	0x1e,0x0f,0x0f,0x11
.byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
.byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a

AES_Td:
.byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53	# Td0
.byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
.byte	0x3b,0xab,0x6b,0xcb,	0x1f,0x9d,0x45,0xf1
.byte	0xac,0xfa,0x58,0xab,	0x4b,0xe3,0x03,0x93
.byte	0x20,0x30,0xfa,0x55,	0xad,0x76,0x6d,0xf6
.byte	0x88,0xcc,0x76,0x91,	0xf5,0x02,0x4c,0x25
.byte	0x4f,0xe5,0xd7,0xfc,	0xc5,0x2a,0xcb,0xd7
.byte	0x26,0x35,0x44,0x80,	0xb5,0x62,0xa3,0x8f
.byte	0xde,0xb1,0x5a,0x49,	0x25,0xba,0x1b,0x67
.byte	0x45,0xea,0x0e,0x98,	0x5d,0xfe,0xc0,0xe1
.byte	0xc3,0x2f,0x75,0x02,	0x81,0x4c,0xf0,0x12
.byte	0x8d,0x46,0x97,0xa3,	0x6b,0xd3,0xf9,0xc6
.byte	0x03,0x8f,0x5f,0xe7,	0x15,0x92,0x9c,0x95
.byte	0xbf,0x6d,0x7a,0xeb,	0x95,0x52,0x59,0xda
.byte	0xd4,0xbe,0x83,0x2d,	0x58,0x74,0x21,0xd3
.byte	0x49,0xe0,0x69,0x29,	0x8e,0xc9,0xc8,0x44
.byte	0x75,0xc2,0x89,0x6a,	0xf4,0x8e,0x79,0x78
.byte	0x99,0x58,0x3e,0x6b,	0x27,0xb9,0x71,0xdd
.byte	0xbe,0xe1,0x4f,0xb6,	0xf0,0x88,0xad,0x17
.byte	0xc9,0x20,0xac,0x66,	0x7d,0xce,0x3a,0xb4
.byte	0x63,0xdf,0x4a,0x18,	0xe5,0x1a,0x31,0x82
.byte	0x97,0x51,0x33,0x60,	0x62,0x53,0x7f,0x45
.byte	0xb1,0x64,0x77,0xe0,	0xbb,0x6b,0xae,0x84
.byte	0xfe,0x81,0xa0,0x1c,	0xf9,0x08,0x2b,0x94
.byte	0x70,0x48,0x68,0x58,	0x8f,0x45,0xfd,0x19
.byte	0x94,0xde,0x6c,0x87,	0x52,0x7b,0xf8,0xb7
.byte	0xab,0x73,0xd3,0x23,	0x72,0x4b,0x02,0xe2
.byte	0xe3,0x1f,0x8f,0x57,	0x66,0x55,0xab,0x2a
.byte	0xb2,0xeb,0x28,0x07,	0x2f,0xb5,0xc2,0x03
.byte	0x86,0xc5,0x7b,0x9a,	0xd3,0x37,0x08,0xa5
.byte	0x30,0x28,0x87,0xf2,	0x23,0xbf,0xa5,0xb2
.byte	0x02,0x03,0x6a,0xba,	0xed,0x16,0x82,0x5c
.byte	0x8a,0xcf,0x1c,0x2b,	0xa7,0x79,0xb4,0x92
.byte	0xf3,0x07,0xf2,0xf0,	0x4e,0x69,0xe2,0xa1
.byte	0x65,0xda,0xf4,0xcd,	0x06,0x05,0xbe,0xd5
.byte	0xd1,0x34,0x62,0x1f,	0xc4,0xa6,0xfe,0x8a
.byte	0x34,0x2e,0x53,0x9d,	0xa2,0xf3,0x55,0xa0
.byte	0x05,0x8a,0xe1,0x32,	0xa4,0xf6,0xeb,0x75
.byte	0x0b,0x83,0xec,0x39,	0x40,0x60,0xef,0xaa
.byte	0x5e,0x71,0x9f,0x06,	0xbd,0x6e,0x10,0x51
.byte	0x3e,0x21,0x8a,0xf9,	0x96,0xdd,0x06,0x3d
.byte	0xdd,0x3e,0x05,0xae,	0x4d,0xe6,0xbd,0x46
.byte	0x91,0x54,0x8d,0xb5,	0x71,0xc4,0x5d,0x05
.byte	0x04,0x06,0xd4,0x6f,	0x60,0x50,0x15,0xff
.byte	0x19,0x98,0xfb,0x24,	0xd6,0xbd,0xe9,0x97
.byte	0x89,0x40,0x43,0xcc,	0x67,0xd9,0x9e,0x77
.byte	0xb0,0xe8,0x42,0xbd,	0x07,0x89,0x8b,0x88
.byte	0xe7,0x19,0x5b,0x38,	0x79,0xc8,0xee,0xdb
.byte	0xa1,0x7c,0x0a,0x47,	0x7c,0x42,0x0f,0xe9
.byte	0xf8,0x84,0x1e,0xc9,	0x00,0x00,0x00,0x00
.byte	0x09,0x80,0x86,0x83,	0x32,0x2b,0xed,0x48
.byte	0x1e,0x11,0x70,0xac,	0x6c,0x5a,0x72,0x4e
.byte	0xfd,0x0e,0xff,0xfb,	0x0f,0x85,0x38,0x56
.byte	0x3d,0xae,0xd5,0x1e,	0x36,0x2d,0x39,0x27
.byte	0x0a,0x0f,0xd9,0x64,	0x68,0x5c,0xa6,0x21
.byte	0x9b,0x5b,0x54,0xd1,	0x24,0x36,0x2e,0x3a
.byte	0x0c,0x0a,0x67,0xb1,	0x93,0x57,0xe7,0x0f
.byte	0xb4,0xee,0x96,0xd2,	0x1b,0x9b,0x91,0x9e
.byte	0x80,0xc0,0xc5,0x4f,	0x61,0xdc,0x20,0xa2
.byte	0x5a,0x77,0x4b,0x69,	0x1c,0x12,0x1a,0x16
.byte	0xe2,0x93,0xba,0x0a,	0xc0,0xa0,0x2a,0xe5
.byte	0x3c,0x22,0xe0,0x43,	0x12,0x1b,0x17,0x1d
.byte	0x0e,0x09,0x0d,0x0b,	0xf2,0x8b,0xc7,0xad
.byte	0x2d,0xb6,0xa8,0xb9,	0x14,0x1e,0xa9,0xc8
.byte	0x57,0xf1,0x19,0x85,	0xaf,0x75,0x07,0x4c
.byte	0xee,0x99,0xdd,0xbb,	0xa3,0x7f,0x60,0xfd
.byte	0xf7,0x01,0x26,0x9f,	0x5c,0x72,0xf5,0xbc
.byte	0x44,0x66,0x3b,0xc5,	0x5b,0xfb,0x7e,0x34
.byte	0x8b,0x43,0x29,0x76,	0xcb,0x23,0xc6,0xdc
.byte	0xb6,0xed,0xfc,0x68,	0xb8,0xe4,0xf1,0x63
.byte	0xd7,0x31,0xdc,0xca,	0x42,0x63,0x85,0x10
.byte	0x13,0x97,0x22,0x40,	0x84,0xc6,0x11,0x20
.byte	0x85,0x4a,0x24,0x7d,	0xd2,0xbb,0x3d,0xf8
.byte	0xae,0xf9,0x32,0x11,	0xc7,0x29,0xa1,0x6d
.byte	0x1d,0x9e,0x2f,0x4b,	0xdc,0xb2,0x30,0xf3
.byte	0x0d,0x86,0x52,0xec,	0x77,0xc1,0xe3,0xd0
.byte	0x2b,0xb3,0x16,0x6c,	0xa9,0x70,0xb9,0x99
.byte	0x11,0x94,0x48,0xfa,	0x47,0xe9,0x64,0x22
.byte	0xa8,0xfc,0x8c,0xc4,	0xa0,0xf0,0x3f,0x1a
.byte	0x56,0x7d,0x2c,0xd8,	0x22,0x33,0x90,0xef
.byte	0x87,0x49,0x4e,0xc7,	0xd9,0x38,0xd1,0xc1
.byte	0x8c,0xca,0xa2,0xfe,	0x98,0xd4,0x0b,0x36
.byte	0xa6,0xf5,0x81,0xcf,	0xa5,0x7a,0xde,0x28
.byte	0xda,0xb7,0x8e,0x26,	0x3f,0xad,0xbf,0xa4
.byte	0x2c,0x3a,0x9d,0xe4,	0x50,0x78,0x92,0x0d
.byte	0x6a,0x5f,0xcc,0x9b,	0x54,0x7e,0x46,0x62
.byte	0xf6,0x8d,0x13,0xc2,	0x90,0xd8,0xb8,0xe8
.byte	0x2e,0x39,0xf7,0x5e,	0x82,0xc3,0xaf,0xf5
.byte	0x9f,0x5d,0x80,0xbe,	0x69,0xd0,0x93,0x7c
.byte	0x6f,0xd5,0x2d,0xa9,	0xcf,0x25,0x12,0xb3
.byte	0xc8,0xac,0x99,0x3b,	0x10,0x18,0x7d,0xa7
.byte	0xe8,0x9c,0x63,0x6e,	0xdb,0x3b,0xbb,0x7b
.byte	0xcd,0x26,0x78,0x09,	0x6e,0x59,0x18,0xf4
.byte	0xec,0x9a,0xb7,0x01,	0x83,0x4f,0x9a,0xa8
.byte	0xe6,0x95,0x6e,0x65,	0xaa,0xff,0xe6,0x7e
.byte	0x21,0xbc,0xcf,0x08,	0xef,0x15,0xe8,0xe6
.byte	0xba,0xe7,0x9b,0xd9,	0x4a,0x6f,0x36,0xce
.byte	0xea,0x9f,0x09,0xd4,	0x29,0xb0,0x7c,0xd6
.byte	0x31,0xa4,0xb2,0xaf,	0x2a,0x3f,0x23,0x31
.byte	0xc6,0xa5,0x94,0x30,	0x35,0xa2,0x66,0xc0
.byte	0x74,0x4e,0xbc,0x37,	0xfc,0x82,0xca,0xa6
.byte	0xe0,0x90,0xd0,0xb0,	0x33,0xa7,0xd8,0x15
.byte	0xf1,0x04,0x98,0x4a,	0x41,0xec,0xda,0xf7
.byte	0x7f,0xcd,0x50,0x0e,	0x17,0x91,0xf6,0x2f
.byte	0x76,0x4d,0xd6,0x8d,	0x43,0xef,0xb0,0x4d
.byte	0xcc,0xaa,0x4d,0x54,	0xe4,0x96,0x04,0xdf
.byte	0x9e,0xd1,0xb5,0xe3,	0x4c,0x6a,0x88,0x1b
.byte	0xc1,0x2c,0x1f,0xb8,	0x46,0x65,0x51,0x7f
.byte	0x9d,0x5e,0xea,0x04,	0x01,0x8c,0x35,0x5d
.byte	0xfa,0x87,0x74,0x73,	0xfb,0x0b,0x41,0x2e
.byte	0xb3,0x67,0x1d,0x5a,	0x92,0xdb,0xd2,0x52
.byte	0xe9,0x10,0x56,0x33,	0x6d,0xd6,0x47,0x13
.byte	0x9a,0xd7,0x61,0x8c,	0x37,0xa1,0x0c,0x7a
.byte	0x59,0xf8,0x14,0x8e,	0xeb,0x13,0x3c,0x89
.byte	0xce,0xa9,0x27,0xee,	0xb7,0x61,0xc9,0x35
.byte	0xe1,0x1c,0xe5,0xed,	0x7a,0x47,0xb1,0x3c
.byte	0x9c,0xd2,0xdf,0x59,	0x55,0xf2,0x73,0x3f
.byte	0x18,0x14,0xce,0x79,	0x73,0xc7,0x37,0xbf
.byte	0x53,0xf7,0xcd,0xea,	0x5f,0xfd,0xaa,0x5b
.byte	0xdf,0x3d,0x6f,0x14,	0x78,0x44,0xdb,0x86
.byte	0xca,0xaf,0xf3,0x81,	0xb9,0x68,0xc4,0x3e
.byte	0x38,0x24,0x34,0x2c,	0xc2,0xa3,0x40,0x5f
.byte	0x16,0x1d,0xc3,0x72,	0xbc,0xe2,0x25,0x0c
.byte	0x28,0x3c,0x49,0x8b,	0xff,0x0d,0x95,0x41
.byte	0x39,0xa8,0x01,0x71,	0x08,0x0c,0xb3,0xde
.byte	0xd8,0xb4,0xe4,0x9c,	0x64,0x56,0xc1,0x90
.byte	0x7b,0xcb,0x84,0x61,	0xd5,0x32,0xb6,0x70
.byte	0x48,0x6c,0x5c,0x74,	0xd0,0xb8,0x57,0x42

.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38	# Td4
.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d

AES_Te4:
.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5	# Te4
.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16

.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00	# rcon
.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00


================================================
FILE: lib/aes_acc/asm/x64.S
================================================
.text	


.type	_vpaes_encrypt_core,@function
.align	16
_vpaes_encrypt_core:
	movq	%rdx,%r9
	movq	$16,%r11
	movl	240(%rdx),%eax
	movdqa	%xmm9,%xmm1
	movdqa	.Lk_ipt(%rip),%xmm2
	pandn	%xmm0,%xmm1
	movdqu	(%r9),%xmm5
	psrld	$4,%xmm1
	pand	%xmm9,%xmm0
.byte	102,15,56,0,208
	movdqa	.Lk_ipt+16(%rip),%xmm0
.byte	102,15,56,0,193
	pxor	%xmm5,%xmm2
	addq	$16,%r9
	pxor	%xmm2,%xmm0
	leaq	.Lk_mc_backward(%rip),%r10
	jmp	.Lenc_entry

.align	16
.Lenc_loop:

	movdqa	%xmm13,%xmm4
	movdqa	%xmm12,%xmm0
.byte	102,15,56,0,226
.byte	102,15,56,0,195
	pxor	%xmm5,%xmm4
	movdqa	%xmm15,%xmm5
	pxor	%xmm4,%xmm0
	movdqa	-64(%r11,%r10,1),%xmm1
.byte	102,15,56,0,234
	movdqa	(%r11,%r10,1),%xmm4
	movdqa	%xmm14,%xmm2
.byte	102,15,56,0,211
	movdqa	%xmm0,%xmm3
	pxor	%xmm5,%xmm2
.byte	102,15,56,0,193
	addq	$16,%r9
	pxor	%xmm2,%xmm0
.byte	102,15,56,0,220
	addq	$16,%r11
	pxor	%xmm0,%xmm3
.byte	102,15,56,0,193
	andq	$0x30,%r11
	subq	$1,%rax
	pxor	%xmm3,%xmm0

.Lenc_entry:

	movdqa	%xmm9,%xmm1
	movdqa	%xmm11,%xmm5
	pandn	%xmm0,%xmm1
	psrld	$4,%xmm1
	pand	%xmm9,%xmm0
.byte	102,15,56,0,232
	movdqa	%xmm10,%xmm3
	pxor	%xmm1,%xmm0
.byte	102,15,56,0,217
	movdqa	%xmm10,%xmm4
	pxor	%xmm5,%xmm3
.byte	102,15,56,0,224
	movdqa	%xmm10,%xmm2
	pxor	%xmm5,%xmm4
.byte	102,15,56,0,211
	movdqa	%xmm10,%xmm3
	pxor	%xmm0,%xmm2
.byte	102,15,56,0,220
	movdqu	(%r9),%xmm5
	pxor	%xmm1,%xmm3
	jnz	.Lenc_loop


	movdqa	-96(%r10),%xmm4
	movdqa	-80(%r10),%xmm0
.byte	102,15,56,0,226
	pxor	%xmm5,%xmm4
.byte	102,15,56,0,195
	movdqa	64(%r11,%r10,1),%xmm1
	pxor	%xmm4,%xmm0
.byte	102,15,56,0,193
	.byte	0xf3,0xc3
.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core


.type	_vpaes_decrypt_core,@function
.align	16
_vpaes_decrypt_core:
	movq	%rdx,%r9
	movl	240(%rdx),%eax
	movdqa	%xmm9,%xmm1
	movdqa	.Lk_dipt(%rip),%xmm2
	pandn	%xmm0,%xmm1
	movq	%rax,%r11
	psrld	$4,%xmm1
	movdqu	(%r9),%xmm5
	shlq	$4,%r11
	pand	%xmm9,%xmm0
.byte	102,15,56,0,208
	movdqa	.Lk_dipt+16(%rip),%xmm0
	xorq	$0x30,%r11
	leaq	.Lk_dsbd(%rip),%r10
.byte	102,15,56,0,193
	andq	$0x30,%r11
	pxor	%xmm5,%xmm2
	movdqa	.Lk_mc_forward+48(%rip),%xmm5
	pxor	%xmm2,%xmm0
	addq	$16,%r9
	addq	%r10,%r11
	jmp	.Ldec_entry

.align	16
.Ldec_loop:


	movdqa	-32(%r10),%xmm4
	movdqa	-16(%r10),%xmm1
.byte	102,15,56,0,226
.byte	102,15,56,0,203
	pxor	%xmm4,%xmm0
	movdqa	0(%r10),%xmm4
	pxor	%xmm1,%xmm0
	movdqa	16(%r10),%xmm1

.byte	102,15,56,0,226
.byte	102,15,56,0,197
.byte	102,15,56,0,203
	pxor	%xmm4,%xmm0
	movdqa	32(%r10),%xmm4
	pxor	%xmm1,%xmm0
	movdqa	48(%r10),%xmm1

.byte	102,15,56,0,226
.byte	102,15,56,0,197
.byte	102,15,56,0,203
	pxor	%xmm4,%xmm0
	movdqa	64(%r10),%xmm4
	pxor	%xmm1,%xmm0
	movdqa	80(%r10),%xmm1

.byte	102,15,56,0,226
.byte	102,15,56,0,197
.byte	102,15,56,0,203
	pxor	%xmm4,%xmm0
	addq	$16,%r9
.byte	102,15,58,15,237,12
	pxor	%xmm1,%xmm0
	subq	$1,%rax

.Ldec_entry:

	movdqa	%xmm9,%xmm1
	pandn	%xmm0,%xmm1
	movdqa	%xmm11,%xmm2
	psrld	$4,%xmm1
	pand	%xmm9,%xmm0
.byte	102,15,56,0,208
	movdqa	%xmm10,%xmm3
	pxor	%xmm1,%xmm0
.byte	102,15,56,0,217
	movdqa	%xmm10,%xmm4
	pxor	%xmm2,%xmm3
.byte	102,15,56,0,224
	pxor	%xmm2,%xmm4
	movdqa	%xmm10,%xmm2
.byte	102,15,56,0,211
	movdqa	%xmm10,%xmm3
	pxor	%xmm0,%xmm2
.byte	102,15,56,0,220
	movdqu	(%r9),%xmm0
	pxor	%xmm1,%xmm3
	jnz	.Ldec_loop


	movdqa	96(%r10),%xmm4
.byte	102,15,56,0,226
	pxor	%xmm0,%xmm4
	movdqa	112(%r10),%xmm0
	movdqa	-352(%r11),%xmm2
.byte	102,15,56,0,195
	pxor	%xmm4,%xmm0
.byte	102,15,56,0,194
	.byte	0xf3,0xc3
.size	_vpaes_decrypt_core,.-_vpaes_decrypt_core


.type	_vpaes_schedule_core,@function
.align	16
_vpaes_schedule_core:


	call	_vpaes_preheat
	movdqa	.Lk_rcon(%rip),%xmm8
	movdqu	(%rdi),%xmm0


	movdqa	%xmm0,%xmm3
	leaq	.Lk_ipt(%rip),%r11
	call	_vpaes_schedule_transform
	movdqa	%xmm0,%xmm7

	leaq	.Lk_sr(%rip),%r10
	testq	%rcx,%rcx
	jnz	.Lschedule_am_decrypting


	movdqu	%xmm0,(%rdx)
	jmp	.Lschedule_go

.Lschedule_am_decrypting:

	movdqa	(%r8,%r10,1),%xmm1
.byte	102,15,56,0,217
	movdqu	%xmm3,(%rdx)
	xorq	$0x30,%r8

.Lschedule_go:
	cmpl	$192,%esi
	ja	.Lschedule_256
	je	.Lschedule_192


.Lschedule_128:
	movl	$10,%esi

.Loop_schedule_128:
	call	_vpaes_schedule_round
	decq	%rsi
	jz	.Lschedule_mangle_last
	call	_vpaes_schedule_mangle
	jmp	.Loop_schedule_128


.align	16
.Lschedule_192:
	movdqu	8(%rdi),%xmm0
	call	_vpaes_schedule_transform
	movdqa	%xmm0,%xmm6
	pxor	%xmm4,%xmm4
	movhlps	%xmm4,%xmm6
	movl	$4,%esi

.Loop_schedule_192:
	call	_vpaes_schedule_round
.byte	102,15,58,15,198,8
	call	_vpaes_schedule_mangle
	call	_vpaes_schedule_192_smear
	call	_vpaes_schedule_mangle
	call	_vpaes_schedule_round
	decq	%rsi
	jz	.Lschedule_mangle_last
	call	_vpaes_schedule_mangle
	call	_vpaes_schedule_192_smear
	jmp	.Loop_schedule_192


.align	16
.Lschedule_256:
	movdqu	16(%rdi),%xmm0
	call	_vpaes_schedule_transform
	movl	$7,%esi

.Loop_schedule_256:
	call	_vpaes_schedule_mangle
	movdqa	%xmm0,%xmm6


	call	_vpaes_schedule_round
	decq	%rsi
	jz	.Lschedule_mangle_last
	call	_vpaes_schedule_mangle


	pshufd	$0xFF,%xmm0,%xmm0
	movdqa	%xmm7,%xmm5
	movdqa	%xmm6,%xmm7
	call	_vpaes_schedule_low_round
	movdqa	%xmm5,%xmm7

	jmp	.Loop_schedule_256


.align	16
.Lschedule_mangle_last:

	leaq	.Lk_deskew(%rip),%r11
	testq	%rcx,%rcx
	jnz	.Lschedule_mangle_last_dec


	movdqa	(%r8,%r10,1),%xmm1
.byte	102,15,56,0,193
	leaq	.Lk_opt(%rip),%r11
	addq	$32,%rdx

.Lschedule_mangle_last_dec:
	addq	$-16,%rdx
	pxor	.Lk_s63(%rip),%xmm0
	call	_vpaes_schedule_transform
	movdqu	%xmm0,(%rdx)


	pxor	%xmm0,%xmm0
	pxor	%xmm1,%xmm1
	pxor	%xmm2,%xmm2
	pxor	%xmm3,%xmm3
	pxor	%xmm4,%xmm4
	pxor	%xmm5,%xmm5
	pxor	%xmm6,%xmm6
	pxor	%xmm7,%xmm7
	.byte	0xf3,0xc3
.size	_vpaes_schedule_core,.-_vpaes_schedule_core


.type	_vpaes_schedule_192_smear,@function
.align	16
_vpaes_schedule_192_smear:
	pshufd	$0x80,%xmm6,%xmm1
	pshufd	$0xFE,%xmm7,%xmm0
	pxor	%xmm1,%xmm6
	pxor	%xmm1,%xmm1
	pxor	%xmm0,%xmm6
	movdqa	%xmm6,%xmm0
	movhlps	%xmm1,%xmm6
	.byte	0xf3,0xc3
.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear


.type	_vpaes_schedule_round,@function
.align	16
_vpaes_schedule_round:

	pxor	%xmm1,%xmm1
.byte	102,65,15,58,15,200,15
.byte	102,69,15,58,15,192,15
	pxor	%xmm1,%xmm7


	pshufd	$0xFF,%xmm0,%xmm0
.byte	102,15,58,15,192,1


_vpaes_schedule_low_round:

	movdqa	%xmm7,%xmm1
	pslldq	$4,%xmm7
	pxor	%xmm1,%xmm7
	movdqa	%xmm7,%xmm1
	pslldq	$8,%xmm7
	pxor	%xmm1,%xmm7
	pxor	.Lk_s63(%rip),%xmm7


	movdqa	%xmm9,%xmm1
	pandn	%xmm0,%xmm1
	psrld	$4,%xmm1
	pand	%xmm9,%xmm0
	movdqa	%xmm11,%xmm2
.byte	102,15,56,0,208
	pxor	%xmm1,%xmm0
	movdqa	%xmm10,%xmm3
.byte	102,15,56,0,217
	pxor	%xmm2,%xmm3
	movdqa	%xmm10,%xmm4
.byte	102,15,56,0,224
	pxor	%xmm2,%xmm4
	movdqa	%xmm10,%xmm2
.byte	102,15,56,0,211
	pxor	%xmm0,%xmm2
	movdqa	%xmm10,%xmm3
.byte	102,15,56,0,220
	pxor	%xmm1,%xmm3
	movdqa	%xmm13,%xmm4
.byte	102,15,56,0,226
	movdqa	%xmm12,%xmm0
.byte	102,15,56,0,195
	pxor	%xmm4,%xmm0


	pxor	%xmm7,%xmm0
	movdqa	%xmm0,%xmm7
	.byte	0xf3,0xc3
.size	_vpaes_schedule_round,.-_vpaes_schedule_round


.type	_vpaes_schedule_transform,@function
.align	16
_vpaes_schedule_transform:
	movdqa	%xmm9,%xmm1
	pandn	%xmm0,%xmm1
	psrld	$4,%xmm1
	pand	%xmm9,%xmm0
	movdqa	(%r11),%xmm2
.byte	102,15,56,0,208
	movdqa	16(%r11),%xmm0
.byte	102,15,56,0,193
	pxor	%xmm2,%xmm0
	.byte	0xf3,0xc3
.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform


.type	_vpaes_schedule_mangle,@function
.align	16
_vpaes_schedule_mangle:
	movdqa	%xmm0,%xmm4
	movdqa	.Lk_mc_forward(%rip),%xmm5
	testq	%rcx,%rcx
	jnz	.Lschedule_mangle_dec


	addq	$16,%rdx
	pxor	.Lk_s63(%rip),%xmm4
.byte	102,15,56,0,229
	movdqa	%xmm4,%xmm3
.byte	102,15,56,0,229
	pxor	%xmm4,%xmm3
.byte	102,15,56,0,229
	pxor	%xmm4,%xmm3

	jmp	.Lschedule_mangle_both
.align	16
.Lschedule_mangle_dec:

	leaq	.Lk_dksd(%rip),%r11
	movdqa	%xmm9,%xmm1
	pandn	%xmm4,%xmm1
	psrld	$4,%xmm1
	pand	%xmm9,%xmm4

	movdqa	0(%r11),%xmm2
.byte	102,15,56,0,212
	movdqa	16(%r11),%xmm3
.byte	102,15,56,0,217
	pxor	%xmm2,%xmm3
.byte	102,15,56,0,221

	movdqa	32(%r11),%xmm2
.byte	102,15,56,0,212
	pxor	%xmm3,%xmm2
	movdqa	48(%r11),%xmm3
.byte	102,15,56,0,217
	pxor	%xmm2,%xmm3
.byte	102,15,56,0,221

	movdqa	64(%r11),%xmm2
.byte	102,15,56,0,212
	pxor	%xmm3,%xmm2
	movdqa	80(%r11),%xmm3
.byte	102,15,56,0,217
	pxor	%xmm2,%xmm3
.byte	102,15,56,0,221

	movdqa	96(%r11),%xmm2
.byte	102,15,56,0,212
	pxor	%xmm3,%xmm2
	movdqa	112(%r11),%xmm3
.byte	102,15,56,0,217
	pxor	%xmm2,%xmm3

	addq	$-16,%rdx

.Lschedule_mangle_both:
	movdqa	(%r8,%r10,1),%xmm1
.byte	102,15,56,0,217
	addq	$-16,%r8
	andq	$0x30,%r8
	movdqu	%xmm3,(%rdx)
	.byte	0xf3,0xc3
.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle


.globl	vpaes_set_encrypt_key
.type	vpaes_set_encrypt_key,@function
.align	16
vpaes_set_encrypt_key:
	movl	%esi,%eax
	shrl	$5,%eax
	addl	$5,%eax
	movl	%eax,240(%rdx)

	movl	$0,%ecx
	movl	$0x30,%r8d
	call	_vpaes_schedule_core
	xorl	%eax,%eax
	.byte	0xf3,0xc3
.size	vpaes_set_encrypt_key,.-vpaes_set_encrypt_key

.globl	vpaes_set_decrypt_key
.type	vpaes_set_decrypt_key,@function
.align	16
vpaes_set_decrypt_key:
	movl	%esi,%eax
	shrl	$5,%eax
	addl	$5,%eax
	movl	%eax,240(%rdx)
	shll	$4,%eax
	leaq	16(%rdx,%rax,1),%rdx

	movl	$1,%ecx
	movl	%esi,%r8d
	shrl	$1,%r8d
	andl	$32,%r8d
	xorl	$32,%r8d
	call	_vpaes_schedule_core
	xorl	%eax,%eax
	.byte	0xf3,0xc3
.size	vpaes_set_decrypt_key,.-vpaes_set_decrypt_key

.globl	vpaes_encrypt
.type	vpaes_encrypt,@function
.align	16
vpaes_encrypt:
	movdqu	(%rdi),%xmm0
	call	_vpaes_preheat
	call	_vpaes_encrypt_core
	movdqu	%xmm0,(%rsi)
	.byte	0xf3,0xc3
.size	vpaes_encrypt,.-vpaes_encrypt

.globl	vpaes_decrypt
.type	vpaes_decrypt,@function
.align	16
vpaes_decrypt:
	movdqu	(%rdi),%xmm0
	call	_vpaes_preheat
	call	_vpaes_decrypt_core
	movdqu	%xmm0,(%rsi)
	.byte	0xf3,0xc3
.size	vpaes_decrypt,.-vpaes_decrypt
.globl	vpaes_cbc_encrypt
.type	vpaes_cbc_encrypt,@function
.align	16
vpaes_cbc_encrypt:
	xchgq	%rcx,%rdx
	subq	$16,%rcx
	jc	.Lcbc_abort
	movdqu	(%r8),%xmm6
	subq	%rdi,%rsi
	call	_vpaes_preheat
	cmpl	$0,%r9d
	je	.Lcbc_dec_loop
	jmp	.Lcbc_enc_loop
.align	16
.Lcbc_enc_loop:
	movdqu	(%rdi),%xmm0
	pxor	%xmm6,%xmm0
	call	_vpaes_encrypt_core
	movdqa	%xmm0,%xmm6
	movdqu	%xmm0,(%rsi,%rdi,1)
	leaq	16(%rdi),%rdi
	subq	$16,%rcx
	jnc	.Lcbc_enc_loop
	jmp	.Lcbc_done
.align	16
.Lcbc_dec_loop:
	movdqu	(%rdi),%xmm0
	movdqa	%xmm0,%xmm7
	call	_vpaes_decrypt_core
	pxor	%xmm6,%xmm0
	movdqa	%xmm7,%xmm6
	movdqu	%xmm0,(%rsi,%rdi,1)
	leaq	16(%rdi),%rdi
	subq	$16,%rcx
	jnc	.Lcbc_dec_loop
.Lcbc_done:
	movdqu	%xmm6,(%r8)
.Lcbc_abort:
	.byte	0xf3,0xc3
.size	vpaes_cbc_encrypt,.-vpaes_cbc_encrypt


.type	_vpaes_preheat,@function
.align	16
_vpaes_preheat:
	leaq	.Lk_s0F(%rip),%r10
	movdqa	-32(%r10),%xmm10
	movdqa	-16(%r10),%xmm11
	movdqa	0(%r10),%xmm9
	movdqa	48(%r10),%xmm13
	movdqa	64(%r10),%xmm12
	movdqa	80(%r10),%xmm15
	movdqa	96(%r10),%xmm14
	.byte	0xf3,0xc3
.size	_vpaes_preheat,.-_vpaes_preheat


.type	_vpaes_consts,@object
.align	64
_vpaes_consts:
.Lk_inv:
.quad	0x0E05060F0D080180, 0x040703090A0B0C02
.quad	0x01040A060F0B0780, 0x030D0E0C02050809

.Lk_s0F:
.quad	0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F

.Lk_ipt:
.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81

.Lk_sb1:
.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
.Lk_sb2:
.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
.Lk_sbo:
.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA

.Lk_mc_forward:
.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
.quad	0x080B0A0904070605, 0x000302010C0F0E0D
.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
.quad	0x000302010C0F0E0D, 0x080B0A0904070605

.Lk_mc_backward:
.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
.quad	0x020100030E0D0C0F, 0x0A09080B06050407
.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
.quad	0x0A09080B06050407, 0x020100030E0D0C0F

.Lk_sr:
.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
.quad	0x030E09040F0A0500, 0x0B06010C07020D08
.quad	0x0F060D040B020900, 0x070E050C030A0108
.quad	0x0B0E0104070A0D00, 0x0306090C0F020508

.Lk_rcon:
.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81

.Lk_s63:
.quad	0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B

.Lk_opt:
.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0

.Lk_deskew:
.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77


.Lk_dksd:
.quad	0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
.quad	0x41C277F4B5368300, 0x5FDC69EAAB289D1E
.Lk_dksb:
.quad	0x9A4FCA1F8550D500, 0x03D653861CC94C99
.quad	0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
.Lk_dkse:
.quad	0xD5031CCA1FC9D600, 0x53859A4C994F5086
.quad	0xA23196054FDC7BE8, 0xCD5EF96A20B31487
.Lk_dks9:
.quad	0xB6116FC87ED9A700, 0x4AED933482255BFC
.quad	0x4576516227143300, 0x8BB89FACE9DAFDCE


.Lk_dipt:
.quad	0x0F505B040B545F00, 0x154A411E114E451A
.quad	0x86E383E660056500, 0x12771772F491F194

.Lk_dsb9:
.quad	0x851C03539A86D600, 0xCAD51F504F994CC9
.quad	0xC03B1789ECD74900, 0x725E2C9EB2FBA565
.Lk_dsbd:
.quad	0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
.quad	0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
.Lk_dsbb:
.quad	0xD022649296B44200, 0x602646F6B0F2D404
.quad	0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
.Lk_dsbe:
.quad	0x46F2929626D4D000, 0x2242600464B4F6B0
.quad	0x0C55A6CDFFAAC100, 0x9467F36B98593E32
.Lk_dsbo:
.quad	0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
.quad	0x12D7560F93441D00, 0xCA4B8159D8C58E9C
.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
.align	64
.size	_vpaes_consts,.-_vpaes_consts


================================================
FILE: lib/aes_acc/asm/x86.S
================================================
.file	"aes-586.s"
.text
.type	_x86_AES_encrypt_compact,@function
.align	16
_x86_AES_encrypt_compact:
	movl	%edi,20(%esp)
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	movl	240(%edi),%esi
	leal	-2(%esi,%esi,1),%esi
	leal	(%edi,%esi,8),%esi
	movl	%esi,24(%esp)
	movl	-128(%ebp),%edi
	movl	-96(%ebp),%esi
	movl	-64(%ebp),%edi
	movl	-32(%ebp),%esi
	movl	(%ebp),%edi
	movl	32(%ebp),%esi
	movl	64(%ebp),%edi
	movl	96(%ebp),%esi
.align	16
.L000loop:
	movl	%eax,%esi
	andl	$255,%esi
	movzbl	-128(%ebp,%esi,1),%esi
	movzbl	%bh,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%ecx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%edx,%edi
	shrl	$24,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	movl	%esi,4(%esp)

	movl	%ebx,%esi
	andl	$255,%esi
	shrl	$16,%ebx
	movzbl	-128(%ebp,%esi,1),%esi
	movzbl	%ch,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%edx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%eax,%edi
	shrl	$24,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	movl	%esi,8(%esp)

	movl	%ecx,%esi
	andl	$255,%esi
	shrl	$24,%ecx
	movzbl	-128(%ebp,%esi,1),%esi
	movzbl	%dh,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%eax,%edi
	shrl	$16,%edi
	andl	$255,%edx
	andl	$255,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movzbl	%bh,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi

	andl	$255,%edx
	movzbl	-128(%ebp,%edx,1),%edx
	movzbl	%ah,%eax
	movzbl	-128(%ebp,%eax,1),%eax
	shll	$8,%eax
	xorl	%eax,%edx
	movl	4(%esp),%eax
	andl	$255,%ebx
	movzbl	-128(%ebp,%ebx,1),%ebx
	shll	$16,%ebx
	xorl	%ebx,%edx
	movl	8(%esp),%ebx
	movzbl	-128(%ebp,%ecx,1),%ecx
	shll	$24,%ecx
	xorl	%ecx,%edx
	movl	%esi,%ecx

	movl	$2155905152,%ebp
	andl	%ecx,%ebp
	leal	(%ecx,%ecx,1),%edi
	movl	%ebp,%esi
	shrl	$7,%ebp
	andl	$4278124286,%edi
	subl	%ebp,%esi
	movl	%ecx,%ebp
	andl	$454761243,%esi
	rorl	$16,%ebp
	xorl	%edi,%esi
	movl	%ecx,%edi
	xorl	%esi,%ecx
	rorl	$24,%edi
	xorl	%ebp,%esi
	roll	$24,%ecx
	xorl	%edi,%esi
	movl	$2155905152,%ebp
	xorl	%esi,%ecx
	andl	%edx,%ebp
	leal	(%edx,%edx,1),%edi
	movl	%ebp,%esi
	shrl	$7,%ebp
	andl	$4278124286,%edi
	subl	%ebp,%esi
	movl	%edx,%ebp
	andl	$454761243,%esi
	rorl	$16,%ebp
	xorl	%edi,%esi
	movl	%edx,%edi
	xorl	%esi,%edx
	rorl	$24,%edi
	xorl	%ebp,%esi
	roll	$24,%edx
	xorl	%edi,%esi
	movl	$2155905152,%ebp
	xorl	%esi,%edx
	andl	%eax,%ebp
	leal	(%eax,%eax,1),%edi
	movl	%ebp,%esi
	shrl	$7,%ebp
	andl	$4278124286,%edi
	subl	%ebp,%esi
	movl	%eax,%ebp
	andl	$454761243,%esi
	rorl	$16,%ebp
	xorl	%edi,%esi
	movl	%eax,%edi
	xorl	%esi,%eax
	rorl	$24,%edi
	xorl	%ebp,%esi
	roll	$24,%eax
	xorl	%edi,%esi
	movl	$2155905152,%ebp
	xorl	%esi,%eax
	andl	%ebx,%ebp
	leal	(%ebx,%ebx,1),%edi
	movl	%ebp,%esi
	shrl	$7,%ebp
	andl	$4278124286,%edi
	subl	%ebp,%esi
	movl	%ebx,%ebp
	andl	$454761243,%esi
	rorl	$16,%ebp
	xorl	%edi,%esi
	movl	%ebx,%edi
	xorl	%esi,%ebx
	rorl	$24,%edi
	xorl	%ebp,%esi
	roll	$24,%ebx
	xorl	%edi,%esi
	xorl	%esi,%ebx
	movl	20(%esp),%edi
	movl	28(%esp),%ebp
	addl	$16,%edi
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	cmpl	24(%esp),%edi
	movl	%edi,20(%esp)
	jb	.L000loop
	movl	%eax,%esi
	andl	$255,%esi
	movzbl	-128(%ebp,%esi,1),%esi
	movzbl	%bh,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%ecx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%edx,%edi
	shrl	$24,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	movl	%esi,4(%esp)

	movl	%ebx,%esi
	andl	$255,%esi
	shrl	$16,%ebx
	movzbl	-128(%ebp,%esi,1),%esi
	movzbl	%ch,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%edx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%eax,%edi
	shrl	$24,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	movl	%esi,8(%esp)

	movl	%ecx,%esi
	andl	$255,%esi
	shrl	$24,%ecx
	movzbl	-128(%ebp,%esi,1),%esi
	movzbl	%dh,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%eax,%edi
	shrl	$16,%edi
	andl	$255,%edx
	andl	$255,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movzbl	%bh,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi

	movl	20(%esp),%edi
	andl	$255,%edx
	movzbl	-128(%ebp,%edx,1),%edx
	movzbl	%ah,%eax
	movzbl	-128(%ebp,%eax,1),%eax
	shll	$8,%eax
	xorl	%eax,%edx
	movl	4(%esp),%eax
	andl	$255,%ebx
	movzbl	-128(%ebp,%ebx,1),%ebx
	shll	$16,%ebx
	xorl	%ebx,%edx
	movl	8(%esp),%ebx
	movzbl	-128(%ebp,%ecx,1),%ecx
	shll	$24,%ecx
	xorl	%ecx,%edx
	movl	%esi,%ecx

	xorl	16(%edi),%eax
	xorl	20(%edi),%ebx
	xorl	24(%edi),%ecx
	xorl	28(%edi),%edx
	ret
.size	_x86_AES_encrypt_compact,.-_x86_AES_encrypt_compact
.type	_sse_AES_encrypt_compact,@function
.align	16
_sse_AES_encrypt_compact:
	pxor	(%edi),%mm0
	pxor	8(%edi),%mm4
	movl	240(%edi),%esi
	leal	-2(%esi,%esi,1),%esi
	leal	(%edi,%esi,8),%esi
	movl	%esi,24(%esp)
	movl	$454761243,%eax
	movl	%eax,8(%esp)
	movl	%eax,12(%esp)
	movl	-128(%ebp),%eax
	movl	-96(%ebp),%ebx
	movl	-64(%ebp),%ecx
	movl	-32(%ebp),%edx
	movl	(%ebp),%eax
	movl	32(%ebp),%ebx
	movl	64(%ebp),%ecx
	movl	96(%ebp),%edx
.align	16
.L001loop:
	pshufw	$8,%mm0,%mm1
	pshufw	$13,%mm4,%mm5
	movd	%mm1,%eax
	movd	%mm5,%ebx
	movl	%edi,20(%esp)
	movzbl	%al,%esi
	movzbl	%ah,%edx
	pshufw	$13,%mm0,%mm2
	movzbl	-128(%ebp,%esi,1),%ecx
	movzbl	%bl,%edi
	movzbl	-128(%ebp,%edx,1),%edx
	shrl	$16,%eax
	shll	$8,%edx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%bh,%edi
	shll	$16,%esi
	pshufw	$8,%mm4,%mm6
	orl	%esi,%ecx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%ah,%edi
	shll	$24,%esi
	shrl	$16,%ebx
	orl	%esi,%edx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%bh,%edi
	shll	$8,%esi
	orl	%esi,%ecx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%al,%edi
	shll	$24,%esi
	orl	%esi,%ecx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%bl,%edi
	movd	%mm2,%eax
	movd	%ecx,%mm0
	movzbl	-128(%ebp,%edi,1),%ecx
	movzbl	%ah,%edi
	shll	$16,%ecx
	movd	%mm6,%ebx
	orl	%esi,%ecx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%bh,%edi
	shll	$24,%esi
	orl	%esi,%ecx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%bl,%edi
	shll	$8,%esi
	shrl	$16,%ebx
	orl	%esi,%ecx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%al,%edi
	shrl	$16,%eax
	movd	%ecx,%mm1
	movzbl	-128(%ebp,%edi,1),%ecx
	movzbl	%ah,%edi
	shll	$16,%ecx
	andl	$255,%eax
	orl	%esi,%ecx
	punpckldq	%mm1,%mm0
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%bh,%edi
	shll	$24,%esi
	andl	$255,%ebx
	movzbl	-128(%ebp,%eax,1),%eax
	orl	%esi,%ecx
	shll	$16,%eax
	movzbl	-128(%ebp,%edi,1),%esi
	orl	%eax,%edx
	shll	$8,%esi
	movzbl	-128(%ebp,%ebx,1),%ebx
	orl	%esi,%ecx
	orl	%ebx,%edx
	movl	20(%esp),%edi
	movd	%ecx,%mm4
	movd	%edx,%mm5
	punpckldq	%mm5,%mm4
	addl	$16,%edi
	cmpl	24(%esp),%edi
	ja	.L002out
	movq	8(%esp),%mm2
	pxor	%mm3,%mm3
	pxor	%mm7,%mm7
	movq	%mm0,%mm1
	movq	%mm4,%mm5
	pcmpgtb	%mm0,%mm3
	pcmpgtb	%mm4,%mm7
	pand	%mm2,%mm3
	pand	%mm2,%mm7
	pshufw	$177,%mm0,%mm2
	pshufw	$177,%mm4,%mm6
	paddb	%mm0,%mm0
	paddb	%mm4,%mm4
	pxor	%mm3,%mm0
	pxor	%mm7,%mm4
	pshufw	$177,%mm2,%mm3
	pshufw	$177,%mm6,%mm7
	pxor	%mm0,%mm1
	pxor	%mm4,%mm5
	pxor	%mm2,%mm0
	pxor	%mm6,%mm4
	movq	%mm3,%mm2
	movq	%mm7,%mm6
	pslld	$8,%mm3
	pslld	$8,%mm7
	psrld	$24,%mm2
	psrld	$24,%mm6
	pxor	%mm3,%mm0
	pxor	%mm7,%mm4
	pxor	%mm2,%mm0
	pxor	%mm6,%mm4
	movq	%mm1,%mm3
	movq	%mm5,%mm7
	movq	(%edi),%mm2
	movq	8(%edi),%mm6
	psrld	$8,%mm1
	psrld	$8,%mm5
	movl	-128(%ebp),%eax
	pslld	$24,%mm3
	pslld	$24,%mm7
	movl	-64(%ebp),%ebx
	pxor	%mm1,%mm0
	pxor	%mm5,%mm4
	movl	(%ebp),%ecx
	pxor	%mm3,%mm0
	pxor	%mm7,%mm4
	movl	64(%ebp),%edx
	pxor	%mm2,%mm0
	pxor	%mm6,%mm4
	jmp	.L001loop
.align	16
.L002out:
	pxor	(%edi),%mm0
	pxor	8(%edi),%mm4
	ret
.size	_sse_AES_encrypt_compact,.-_sse_AES_encrypt_compact
.type	_x86_AES_encrypt,@function
.align	16
_x86_AES_encrypt:
	movl	%edi,20(%esp)
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	movl	240(%edi),%esi
	leal	-2(%esi,%esi,1),%esi
	leal	(%edi,%esi,8),%esi
	movl	%esi,24(%esp)
.align	16
.L003loop:
	movl	%eax,%esi
	andl	$255,%esi
	movl	(%ebp,%esi,8),%esi
	movzbl	%bh,%edi
	xorl	3(%ebp,%edi,8),%esi
	movl	%ecx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	xorl	2(%ebp,%edi,8),%esi
	movl	%edx,%edi
	shrl	$24,%edi
	xorl	1(%ebp,%edi,8),%esi
	movl	%esi,4(%esp)

	movl	%ebx,%esi
	andl	$255,%esi
	shrl	$16,%ebx
	movl	(%ebp,%esi,8),%esi
	movzbl	%ch,%edi
	xorl	3(%ebp,%edi,8),%esi
	movl	%edx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	xorl	2(%ebp,%edi,8),%esi
	movl	%eax,%edi
	shrl	$24,%edi
	xorl	1(%ebp,%edi,8),%esi
	movl	%esi,8(%esp)

	movl	%ecx,%esi
	andl	$255,%esi
	shrl	$24,%ecx
	movl	(%ebp,%esi,8),%esi
	movzbl	%dh,%edi
	xorl	3(%ebp,%edi,8),%esi
	movl	%eax,%edi
	shrl	$16,%edi
	andl	$255,%edx
	andl	$255,%edi
	xorl	2(%ebp,%edi,8),%esi
	movzbl	%bh,%edi
	xorl	1(%ebp,%edi,8),%esi

	movl	20(%esp),%edi
	movl	(%ebp,%edx,8),%edx
	movzbl	%ah,%eax
	xorl	3(%ebp,%eax,8),%edx
	movl	4(%esp),%eax
	andl	$255,%ebx
	xorl	2(%ebp,%ebx,8),%edx
	movl	8(%esp),%ebx
	xorl	1(%ebp,%ecx,8),%edx
	movl	%esi,%ecx

	addl	$16,%edi
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	cmpl	24(%esp),%edi
	movl	%edi,20(%esp)
	jb	.L003loop
	movl	%eax,%esi
	andl	$255,%esi
	movl	2(%ebp,%esi,8),%esi
	andl	$255,%esi
	movzbl	%bh,%edi
	movl	(%ebp,%edi,8),%edi
	andl	$65280,%edi
	xorl	%edi,%esi
	movl	%ecx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movl	(%ebp,%edi,8),%edi
	andl	$16711680,%edi
	xorl	%edi,%esi
	movl	%edx,%edi
	shrl	$24,%edi
	movl	2(%ebp,%edi,8),%edi
	andl	$4278190080,%edi
	xorl	%edi,%esi
	movl	%esi,4(%esp)
	movl	%ebx,%esi
	andl	$255,%esi
	shrl	$16,%ebx
	movl	2(%ebp,%esi,8),%esi
	andl	$255,%esi
	movzbl	%ch,%edi
	movl	(%ebp,%edi,8),%edi
	andl	$65280,%edi
	xorl	%edi,%esi
	movl	%edx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movl	(%ebp,%edi,8),%edi
	andl	$16711680,%edi
	xorl	%edi,%esi
	movl	%eax,%edi
	shrl	$24,%edi
	movl	2(%ebp,%edi,8),%edi
	andl	$4278190080,%edi
	xorl	%edi,%esi
	movl	%esi,8(%esp)
	movl	%ecx,%esi
	andl	$255,%esi
	shrl	$24,%ecx
	movl	2(%ebp,%esi,8),%esi
	andl	$255,%esi
	movzbl	%dh,%edi
	movl	(%ebp,%edi,8),%edi
	andl	$65280,%edi
	xorl	%edi,%esi
	movl	%eax,%edi
	shrl	$16,%edi
	andl	$255,%edx
	andl	$255,%edi
	movl	(%ebp,%edi,8),%edi
	andl	$16711680,%edi
	xorl	%edi,%esi
	movzbl	%bh,%edi
	movl	2(%ebp,%edi,8),%edi
	andl	$4278190080,%edi
	xorl	%edi,%esi
	movl	20(%esp),%edi
	andl	$255,%edx
	movl	2(%ebp,%edx,8),%edx
	andl	$255,%edx
	movzbl	%ah,%eax
	movl	(%ebp,%eax,8),%eax
	andl	$65280,%eax
	xorl	%eax,%edx
	movl	4(%esp),%eax
	andl	$255,%ebx
	movl	(%ebp,%ebx,8),%ebx
	andl	$16711680,%ebx
	xorl	%ebx,%edx
	movl	8(%esp),%ebx
	movl	2(%ebp,%ecx,8),%ecx
	andl	$4278190080,%ecx
	xorl	%ecx,%edx
	movl	%esi,%ecx
	addl	$16,%edi
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	ret
.align	64
.LAES_Te:
.long	2774754246,2774754246
.long	2222750968,2222750968
.long	2574743534,2574743534
.long	2373680118,2373680118
.long	234025727,234025727
.long	3177933782,3177933782
.long	2976870366,2976870366
.long	1422247313,1422247313
.long	1345335392,1345335392
.long	50397442,50397442
.long	2842126286,2842126286
.long	2099981142,2099981142
.long	436141799,436141799
.long	1658312629,1658312629
.long	3870010189,3870010189
.long	2591454956,2591454956
.long	1170918031,1170918031
.long	2642575903,2642575903
.long	1086966153,1086966153
.long	2273148410,2273148410
.long	368769775,368769775
.long	3948501426,3948501426
.long	3376891790,3376891790
.long	200339707,200339707
.long	3970805057,3970805057
.long	1742001331,1742001331
.long	4255294047,4255294047
.long	3937382213,3937382213
.long	3214711843,3214711843
.long	4154762323,4154762323
.long	2524082916,2524082916
.long	1539358875,1539358875
.long	3266819957,3266819957
.long	486407649,486407649
.long	2928907069,2928907069
.long	1780885068,1780885068
.long	1513502316,1513502316
.long	1094664062,1094664062
.long	49805301,49805301
.long	1338821763,1338821763
.long	1546925160,1546925160
.long	4104496465,4104496465
.long	887481809,887481809
.long	150073849,150073849
.long	2473685474,2473685474
.long	1943591083,1943591083
.long	1395732834,1395732834
.long	1058346282,1058346282
.long	201589768,201589768
.long	1388824469,1388824469
.long	1696801606,1696801606
.long	1589887901,1589887901
.long	672667696,672667696
.long	2711000631,2711000631
.long	251987210,251987210
.long	3046808111,3046808111
.long	151455502,151455502
.long	907153956,907153956
.long	2608889883,2608889883
.long	1038279391,1038279391
.long	652995533,652995533
.long	1764173646,1764173646
.long	3451040383,3451040383
.long	2675275242,2675275242
.long	453576978,453576978
.long	2659418909,2659418909
.long	1949051992,1949051992
.long	773462580,773462580
.long	756751158,756751158
.long	2993581788,2993581788
.long	3998898868,3998898868
.long	4221608027,4221608027
.long	4132590244,4132590244
.long	1295727478,1295727478
.long	1641469623,1641469623
.long	3467883389,3467883389
.long	2066295122,2066295122
.long	1055122397,1055122397
.long	1898917726,1898917726
.long	2542044179,2542044179
.long	4115878822,4115878822
.long	1758581177,1758581177
.long	0,0
.long	753790401,753790401
.long	1612718144,1612718144
.long	536673507,536673507
.long	3367088505,3367088505
.long	3982187446,3982187446
.long	3194645204,3194645204
.long	1187761037,1187761037
.long	3653156455,3653156455
.long	1262041458,1262041458
.long	3729410708,3729410708
.long	3561770136,3561770136
.long	3898103984,3898103984
.long	1255133061,1255133061
.long	1808847035,1808847035
.long	720367557,720367557
.long	3853167183,3853167183
.long	385612781,385612781
.long	3309519750,3309519750
.long	3612167578,3612167578
.long	1429418854,1429418854
.long	2491778321,2491778321
.long	3477423498,3477423498
.long	284817897,284817897
.long	100794884,100794884
.long	2172616702,2172616702
.long	4031795360,4031795360
.long	1144798328,1144798328
.long	3131023141,3131023141
.long	3819481163,3819481163
.long	4082192802,4082192802
.long	4272137053,4272137053
.long	3225436288,3225436288
.long	2324664069,2324664069
.long	2912064063,2912064063
.long	3164445985,3164445985
.long	1211644016,1211644016
.long	83228145,83228145
.long	3753688163,3753688163
.long	3249976951,3249976951
.long	1977277103,1977277103
.long	1663115586,1663115586
.long	806359072,806359072
.long	452984805,452984805
.long	250868733,250868733
.long	1842533055,1842533055
.long	1288555905,1288555905
.long	336333848,336333848
.long	890442534,890442534
.long	804056259,804056259
.long	3781124030,3781124030
.long	2727843637,2727843637
.long	3427026056,3427026056
.long	957814574,957814574
.long	1472513171,1472513171
.long	4071073621,4071073621
.long	2189328124,2189328124
.long	1195195770,1195195770
.long	2892260552,2892260552
.long	3881655738,3881655738
.long	723065138,723065138
.long	2507371494,2507371494
.long	2690670784,2690670784
.long	2558624025,2558624025
.long	3511635870,3511635870
.long	2145180835,2145180835
.long	1713513028,1713513028
.long	2116692564,2116692564
.long	2878378043,2878378043
.long	2206763019,2206763019
.long	3393603212,3393603212
.long	703524551,703524551
.long	3552098411,3552098411
.long	1007948840,1007948840
.long	2044649127,2044649127
.long	3797835452,3797835452
.long	487262998,487262998
.long	1994120109,1994120109
.long	1004593371,1004593371
.long	1446130276,1446130276
.long	1312438900,1312438900
.long	503974420,503974420
.long	3679013266,3679013266
.long	168166924,168166924
.long	1814307912,1814307912
.long	3831258296,3831258296
.long	1573044895,1573044895
.long	1859376061,1859376061
.long	4021070915,4021070915
.long	2791465668,2791465668
.long	2828112185,2828112185
.long	2761266481,2761266481
.long	937747667,937747667
.long	2339994098,2339994098
.long	854058965,854058965
.long	1137232011,1137232011
.long	1496790894,1496790894
.long	3077402074,3077402074
.long	2358086913,2358086913
.long	1691735473,1691735473
.long	3528347292,3528347292
.long	3769215305,3769215305
.long	3027004632,3027004632
.long	4199962284,4199962284
.long	133494003,133494003
.long	636152527,636152527
.long	2942657994,2942657994
.long	2390391540,2390391540
.long	3920539207,3920539207
.long	403179536,403179536
.long	3585784431,3585784431
.long	2289596656,2289596656
.long	1864705354,1864705354
.long	1915629148,1915629148
.long	605822008,605822008
.long	4054230615,4054230615
.long	3350508659,3350508659
.long	1371981463,1371981463
.long	602466507,602466507
.long	2094914977,2094914977
.long	2624877800,2624877800
.long	555687742,555687742
.long	3712699286,3712699286
.long	3703422305,3703422305
.long	2257292045,2257292045
.long	2240449039,2240449039
.long	2423288032,2423288032
.long	1111375484,1111375484
.long	3300242801,3300242801
.long	2858837708,2858837708
.long	3628615824,3628615824
.long	84083462,84083462
.long	32962295,32962295
.long	302911004,302911004
.long	2741068226,2741068226
.long	1597322602,1597322602
.long	4183250862,4183250862
.long	3501832553,3501832553
.long	2441512471,2441512471
.long	1489093017,1489093017
.long	656219450,656219450
.long	3114180135,3114180135
.long	954327513,954327513
.long	335083755,335083755
.long	3013122091,3013122091
.long	856756514,856756514
.long	3144247762,3144247762
.long	1893325225,1893325225
.long	2307821063,2307821063
.long	2811532339,2811532339
.long	3063651117,3063651117
.long	572399164,572399164
.long	2458355477,2458355477
.long	552200649,552200649
.long	1238290055,1238290055
.long	4283782570,4283782570
.long	2015897680,2015897680
.long	2061492133,2061492133
.long	2408352771,2408352771
.long	4171342169,4171342169
.long	2156497161,2156497161
.long	386731290,386731290
.long	3669999461,3669999461
.long	837215959,837215959
.long	3326231172,3326231172
.long	3093850320,3093850320
.long	3275833730,3275833730
.long	2962856233,2962856233
.long	1999449434,1999449434
.long	286199582,286199582
.long	3417354363,3417354363
.long	4233385128,4233385128
.long	3602627437,3602627437
.long	974525996,974525996
.byte	99,124,119,123,242,107,111,197
.byte	48,1,103,43,254,215,171,118
.byte	202,130,201,125,250,89,71,240
.byte	173,212,162,175,156,164,114,192
.byte	183,253,147,38,54,63,247,204
.byte	52,165,229,241,113,216,49,21
.byte	4,199,35,195,24,150,5,154
.byte	7,18,128,226,235,39,178,117
.byte	9,131,44,26,27,110,90,160
.byte	82,59,214,179,41,227,47,132
.byte	83,209,0,237,32,252,177,91
.byte	106,203,190,57,74,76,88,207
.byte	208,239,170,251,67,77,51,133
.byte	69,249,2,127,80,60,159,168
.byte	81,163,64,143,146,157,56,245
.byte	188,182,218,33,16,255,243,210
.byte	205,12,19,236,95,151,68,23
.byte	196,167,126,61,100,93,25,115
.byte	96,129,79,220,34,42,144,136
.byte	70,238,184,20,222,94,11,219
.byte	224,50,58,10,73,6,36,92
.byte	194,211,172,98,145,149,228,121
.byte	231,200,55,109,141,213,78,169
.byte	108,86,244,234,101,122,174,8
.byte	186,120,37,46,28,166,180,198
.byte	232,221,116,31,75,189,139,138
.byte	112,62,181,102,72,3,246,14
.byte	97,53,87,185,134,193,29,158
.byte	225,248,152,17,105,217,142,148
.byte	155,30,135,233,206,85,40,223
.byte	140,161,137,13,191,230,66,104
.byte	65,153,45,15,176,84,187,22
.byte	99,124,119,123,242,107,111,197
.byte	48,1,103,43,254,215,171,118
.byte	202,130,201,125,250,89,71,240
.byte	173,212,162,175,156,164,114,192
.byte	183,253,147,38,54,63,247,204
.byte	52,165,229,241,113,216,49,21
.byte	4,199,35,195,24,150,5,154
.byte	7,18,128,226,235,39,178,117
.byte	9,131,44,26,27,110,90,160
.byte	82,59,214,179,41,227,47,132
.byte	83,209,0,237,32,252,177,91
.byte	106,203,190,57,74,76,88,207
.byte	208,239,170,251,67,77,51,133
.byte	69,249,2,127,80,60,159,168
.byte	81,163,64,143,146,157,56,245
.byte	188,182,218,33,16,255,243,210
.byte	205,12,19,236,95,151,68,23
.byte	196,167,126,61,100,93,25,115
.byte	96,129,79,220,34,42,144,136
.byte	70,238,184,20,222,94,11,219
.byte	224,50,58,10,73,6,36,92
.byte	194,211,172,98,145,149,228,121
.byte	231,200,55,109,141,213,78,169
.byte	108,86,244,234,101,122,174,8
.byte	186,120,37,46,28,166,180,198
.byte	232,221,116,31,75,189,139,138
.byte	112,62,181,102,72,3,246,14
.byte	97,53,87,185,134,193,29,158
.byte	225,248,152,17,105,217,142,148
.byte	155,30,135,233,206,85,40,223
.byte	140,161,137,13,191,230,66,104
.byte	65,153,45,15,176,84,187,22
.byte	99,124,119,123,242,107,111,197
.byte	48,1,103,43,254,215,171,118
.byte	202,130,201,125,250,89,71,240
.byte	173,212,162,175,156,164,114,192
.byte	183,253,147,38,54,63,247,204
.byte	52,165,229,241,113,216,49,21
.byte	4,199,35,195,24,150,5,154
.byte	7,18,128,226,235,39,178,117
.byte	9,131,44,26,27,110,90,160
.byte	82,59,214,179,41,227,47,132
.byte	83,209,0,237,32,252,177,91
.byte	106,203,190,57,74,76,88,207
.byte	208,239,170,251,67,77,51,133
.byte	69,249,2,127,80,60,159,168
.byte	81,163,64,143,146,157,56,245
.byte	188,182,218,33,16,255,243,210
.byte	205,12,19,236,95,151,68,23
.byte	196,167,126,61,100,93,25,115
.byte	96,129,79,220,34,42,144,136
.byte	70,238,184,20,222,94,11,219
.byte	224,50,58,10,73,6,36,92
.byte	194,211,172,98,145,149,228,121
.byte	231,200,55,109,141,213,78,169
.byte	108,86,244,234,101,122,174,8
.byte	186,120,37,46,28,166,180,198
.byte	232,221,116,31,75,189,139,138
.byte	112,62,181,102,72,3,246,14
.byte	97,53,87,185,134,193,29,158
.byte	225,248,152,17,105,217,142,148
.byte	155,30,135,233,206,85,40,223
.byte	140,161,137,13,191,230,66,104
.byte	65,153,45,15,176,84,187,22
.byte	99,124,119,123,242,107,111,197
.byte	48,1,103,43,254,215,171,118
.byte	202,130,201,125,250,89,71,240
.byte	173,212,162,175,156,164,114,192
.byte	183,253,147,38,54,63,247,204
.byte	52,165,229,241,113,216,49,21
.byte	4,199,35,195,24,150,5,154
.byte	7,18,128,226,235,39,178,117
.byte	9,131,44,26,27,110,90,160
.byte	82,59,214,179,41,227,47,132
.byte	83,209,0,237,32,252,177,91
.byte	106,203,190,57,74,76,88,207
.byte	208,239,170,251,67,77,51,133
.byte	69,249,2,127,80,60,159,168
.byte	81,163,64,143,146,157,56,245
.byte	188,182,218,33,16,255,243,210
.byte	205,12,19,236,95,151,68,23
.byte	196,167,126,61,100,93,25,115
.byte	96,129,79,220,34,42,144,136
.byte	70,238,184,20,222,94,11,219
.byte	224,50,58,10,73,6,36,92
.byte	194,211,172,98,145,149,228,121
.byte	231,200,55,109,141,213,78,169
.byte	108,86,244,234,101,122,174,8
.byte	186,120,37,46,28,166,180,198
.byte	232,221,116,31,75,189,139,138
.byte	112,62,181,102,72,3,246,14
.byte	97,53,87,185,134,193,29,158
.byte	225,248,152,17,105,217,142,148
.byte	155,30,135,233,206,85,40,223
.byte	140,161,137,13,191,230,66,104
.byte	65,153,45,15,176,84,187,22
.long	1,2,4,8
.long	16,32,64,128
.long	27,54,0,0
.long	0,0,0,0
.size	_x86_AES_encrypt,.-_x86_AES_encrypt
.globl	AES_encrypt
.type	AES_encrypt,@function
.align	16
AES_encrypt:
.L_AES_encrypt_begin:
	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%esi
	movl	28(%esp),%edi
	movl	%esp,%eax
	subl	$36,%esp
	andl	$-64,%esp
	leal	-127(%edi),%ebx
	subl	%esp,%ebx
	negl	%ebx
	andl	$960,%ebx
	subl	%ebx,%esp
	addl	$4,%esp
	movl	%eax,28(%esp)
	call	.L004pic_point
.L004pic_point:
	popl	%ebp
	leal	OPENSSL_ia32cap_P,%eax
	leal	.LAES_Te-.L004pic_point(%ebp),%ebp
	leal	764(%esp),%ebx
	subl	%ebp,%ebx
	andl	$768,%ebx
	leal	2176(%ebp,%ebx,1),%ebp
	btl	$25,(%eax)
	jnc	.L005x86
	movq	(%esi),%mm0
	movq	8(%esi),%mm4
	call	_sse_AES_encrypt_compact
	movl	28(%esp),%esp
	movl	24(%esp),%esi
	movq	%mm0,(%esi)
	movq	%mm4,8(%esi)
	emms
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.align	16
.L005x86:
	movl	%ebp,24(%esp)
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	call	_x86_AES_encrypt_compact
	movl	28(%esp),%esp
	movl	24(%esp),%esi
	movl	%eax,(%esi)
	movl	%ebx,4(%esi)
	movl	%ecx,8(%esi)
	movl	%edx,12(%esi)
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	AES_encrypt,.-.L_AES_encrypt_begin
.type	_x86_AES_decrypt_compact,@function
.align	16
_x86_AES_decrypt_compact:
	movl	%edi,20(%esp)
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	movl	240(%edi),%esi
	leal	-2(%esi,%esi,1),%esi
	leal	(%edi,%esi,8),%esi
	movl	%esi,24(%esp)
	movl	-128(%ebp),%edi
	movl	-96(%ebp),%esi
	movl	-64(%ebp),%edi
	movl	-32(%ebp),%esi
	movl	(%ebp),%edi
	movl	32(%ebp),%esi
	movl	64(%ebp),%edi
	movl	96(%ebp),%esi
.align	16
.L006loop:
	movl	%eax,%esi
	andl	$255,%esi
	movzbl	-128(%ebp,%esi,1),%esi
	movzbl	%dh,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%ecx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%ebx,%edi
	shrl	$24,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	movl	%esi,4(%esp)
	movl	%ebx,%esi
	andl	$255,%esi
	movzbl	-128(%ebp,%esi,1),%esi
	movzbl	%ah,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%edx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%ecx,%edi
	shrl	$24,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	movl	%esi,8(%esp)
	movl	%ecx,%esi
	andl	$255,%esi
	movzbl	-128(%ebp,%esi,1),%esi
	movzbl	%bh,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%eax,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%edx,%edi
	shrl	$24,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	andl	$255,%edx
	movzbl	-128(%ebp,%edx,1),%edx
	movzbl	%ch,%ecx
	movzbl	-128(%ebp,%ecx,1),%ecx
	shll	$8,%ecx
	xorl	%ecx,%edx
	movl	%esi,%ecx
	shrl	$16,%ebx
	andl	$255,%ebx
	movzbl	-128(%ebp,%ebx,1),%ebx
	shll	$16,%ebx
	xorl	%ebx,%edx
	shrl	$24,%eax
	movzbl	-128(%ebp,%eax,1),%eax
	shll	$24,%eax
	xorl	%eax,%edx
	movl	$2155905152,%edi
	andl	%ecx,%edi
	movl	%edi,%esi
	shrl	$7,%edi
	leal	(%ecx,%ecx,1),%eax
	subl	%edi,%esi
	andl	$4278124286,%eax
	andl	$454761243,%esi
	xorl	%esi,%eax
	movl	$2155905152,%edi
	andl	%eax,%edi
	movl	%edi,%esi
	shrl	$7,%edi
	leal	(%eax,%eax,1),%ebx
	subl	%edi,%esi
	andl	$4278124286,%ebx
	andl	$454761243,%esi
	xorl	%ecx,%eax
	xorl	%esi,%ebx
	movl	$2155905152,%edi
	andl	%ebx,%edi
	movl	%edi,%esi
	shrl	$7,%edi
	leal	(%ebx,%ebx,1),%ebp
	subl	%edi,%esi
	andl	$4278124286,%ebp
	andl	$454761243,%esi
	xorl	%ecx,%ebx
	roll	$8,%ecx
	xorl	%esi,%ebp
	xorl	%eax,%ecx
	xorl	%ebp,%eax
	xorl	%ebx,%ecx
	xorl	%ebp,%ebx
	roll	$24,%eax
	xorl	%ebp,%ecx
	roll	$16,%ebx
	xorl	%eax,%ecx
	roll	$8,%ebp
	xorl	%ebx,%ecx
	movl	4(%esp),%eax
	xorl	%ebp,%ecx
	movl	%ecx,12(%esp)
	movl	$2155905152,%edi
	andl	%edx,%edi
	movl	%edi,%esi
	shrl	$7,%edi
	leal	(%edx,%edx,1),%ebx
	subl	%edi,%esi
	andl	$4278124286,%ebx
	andl	$454761243,%esi
	xorl	%esi,%ebx
	movl	$2155905152,%edi
	andl	%ebx,%edi
	movl	%edi,%esi
	shrl	$7,%edi
	leal	(%ebx,%ebx,1),%ecx
	subl	%edi,%esi
	andl	$4278124286,%ecx
	andl	$454761243,%esi
	xorl	%edx,%ebx
	xorl	%esi,%ecx
	movl	$2155905152,%edi
	andl	%ecx,%edi
	movl	%edi,%esi
	shrl	$7,%edi
	leal	(%ecx,%ecx,1),%ebp
	subl	%edi,%esi
	andl	$4278124286,%ebp
	andl	$454761243,%esi
	xorl	%edx,%ecx
	roll	$8,%edx
	xorl	%esi,%ebp
	xorl	%ebx,%edx
	xorl	%ebp,%ebx
	xorl	%ecx,%edx
	xorl	%ebp,%ecx
	roll	$24,%ebx
	xorl	%ebp,%edx
	roll	$16,%ecx
	xorl	%ebx,%edx
	roll	$8,%ebp
	xorl	%ecx,%edx
	movl	8(%esp),%ebx
	xorl	%ebp,%edx
	movl	%edx,16(%esp)
	movl	$2155905152,%edi
	andl	%eax,%edi
	movl	%edi,%esi
	shrl	$7,%edi
	leal	(%eax,%eax,1),%ecx
	subl	%edi,%esi
	andl	$4278124286,%ecx
	andl	$454761243,%esi
	xorl	%esi,%ecx
	movl	$2155905152,%edi
	andl	%ecx,%edi
	movl	%edi,%esi
	shrl	$7,%edi
	leal	(%ecx,%ecx,1),%edx
	subl	%edi,%esi
	andl	$4278124286,%edx
	andl	$454761243,%esi
	xorl	%eax,%ecx
	xorl	%esi,%edx
	movl	$2155905152,%edi
	andl	%edx,%edi
	movl	%edi,%esi
	shrl	$7,%edi
	leal	(%edx,%edx,1),%ebp
	subl	%edi,%esi
	andl	$4278124286,%ebp
	andl	$454761243,%esi
	xorl	%eax,%edx
	roll	$8,%eax
	xorl	%esi,%ebp
	xorl	%ecx,%eax
	xorl	%ebp,%ecx
	xorl	%edx,%eax
	xorl	%ebp,%edx
	roll	$24,%ecx
	xorl	%ebp,%eax
	roll	$16,%edx
	xorl	%ecx,%eax
	roll	$8,%ebp
	xorl	%edx,%eax
	xorl	%ebp,%eax
	movl	$2155905152,%edi
	andl	%ebx,%edi
	movl	%edi,%esi
	shrl	$7,%edi
	leal	(%ebx,%ebx,1),%ecx
	subl	%edi,%esi
	andl	$4278124286,%ecx
	andl	$454761243,%esi
	xorl	%esi,%ecx
	movl	$2155905152,%edi
	andl	%ecx,%edi
	movl	%edi,%esi
	shrl	$7,%edi
	leal	(%ecx,%ecx,1),%edx
	subl	%edi,%esi
	andl	$4278124286,%edx
	andl	$454761243,%esi
	xorl	%ebx,%ecx
	xorl	%esi,%edx
	movl	$2155905152,%edi
	andl	%edx,%edi
	movl	%edi,%esi
	shrl	$7,%edi
	leal	(%edx,%edx,1),%ebp
	subl	%edi,%esi
	andl	$4278124286,%ebp
	andl	$454761243,%esi
	xorl	%ebx,%edx
	roll	$8,%ebx
	xorl	%esi,%ebp
	xorl	%ecx,%ebx
	xorl	%ebp,%ecx
	xorl	%edx,%ebx
	xorl	%ebp,%edx
	roll	$24,%ecx
	xorl	%ebp,%ebx
	roll	$16,%edx
	xorl	%ecx,%ebx
	roll	$8,%ebp
	xorl	%edx,%ebx
	movl	12(%esp),%ecx
	xorl	%ebp,%ebx
	movl	16(%esp),%edx
	movl	20(%esp),%edi
	movl	28(%esp),%ebp
	addl	$16,%edi
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	cmpl	24(%esp),%edi
	movl	%edi,20(%esp)
	jb	.L006loop
	movl	%eax,%esi
	andl	$255,%esi
	movzbl	-128(%ebp,%esi,1),%esi
	movzbl	%dh,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%ecx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%ebx,%edi
	shrl	$24,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	movl	%esi,4(%esp)
	movl	%ebx,%esi
	andl	$255,%esi
	movzbl	-128(%ebp,%esi,1),%esi
	movzbl	%ah,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%edx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%ecx,%edi
	shrl	$24,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	movl	%esi,8(%esp)
	movl	%ecx,%esi
	andl	$255,%esi
	movzbl	-128(%ebp,%esi,1),%esi
	movzbl	%bh,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%eax,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%edx,%edi
	shrl	$24,%edi
	movzbl	-128(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	movl	20(%esp),%edi
	andl	$255,%edx
	movzbl	-128(%ebp,%edx,1),%edx
	movzbl	%ch,%ecx
	movzbl	-128(%ebp,%ecx,1),%ecx
	shll	$8,%ecx
	xorl	%ecx,%edx
	movl	%esi,%ecx
	shrl	$16,%ebx
	andl	$255,%ebx
	movzbl	-128(%ebp,%ebx,1),%ebx
	shll	$16,%ebx
	xorl	%ebx,%edx
	movl	8(%esp),%ebx
	shrl	$24,%eax
	movzbl	-128(%ebp,%eax,1),%eax
	shll	$24,%eax
	xorl	%eax,%edx
	movl	4(%esp),%eax
	xorl	16(%edi),%eax
	xorl	20(%edi),%ebx
	xorl	24(%edi),%ecx
	xorl	28(%edi),%edx
	ret
.size	_x86_AES_decrypt_compact,.-_x86_AES_decrypt_compact
.type	_sse_AES_decrypt_compact,@function
.align	16
_sse_AES_decrypt_compact:
	pxor	(%edi),%mm0
	pxor	8(%edi),%mm4
	movl	240(%edi),%esi
	leal	-2(%esi,%esi,1),%esi
	leal	(%edi,%esi,8),%esi
	movl	%esi,24(%esp)
	movl	$454761243,%eax
	movl	%eax,8(%esp)
	movl	%eax,12(%esp)
	movl	-128(%ebp),%eax
	movl	-96(%ebp),%ebx
	movl	-64(%ebp),%ecx
	movl	-32(%ebp),%edx
	movl	(%ebp),%eax
	movl	32(%ebp),%ebx
	movl	64(%ebp),%ecx
	movl	96(%ebp),%edx
.align	16
.L007loop:
	pshufw	$12,%mm0,%mm1
	pshufw	$9,%mm4,%mm5
	movd	%mm1,%eax
	movd	%mm5,%ebx
	movl	%edi,20(%esp)
	movzbl	%al,%esi
	movzbl	%ah,%edx
	pshufw	$6,%mm0,%mm2
	movzbl	-128(%ebp,%esi,1),%ecx
	movzbl	%bl,%edi
	movzbl	-128(%ebp,%edx,1),%edx
	shrl	$16,%eax
	shll	$8,%edx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%bh,%edi
	shll	$16,%esi
	pshufw	$3,%mm4,%mm6
	orl	%esi,%ecx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%ah,%edi
	shll	$24,%esi
	shrl	$16,%ebx
	orl	%esi,%edx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%bh,%edi
	shll	$24,%esi
	orl	%esi,%ecx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%al,%edi
	shll	$8,%esi
	movd	%mm2,%eax
	orl	%esi,%ecx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%bl,%edi
	shll	$16,%esi
	movd	%mm6,%ebx
	movd	%ecx,%mm0
	movzbl	-128(%ebp,%edi,1),%ecx
	movzbl	%al,%edi
	orl	%esi,%ecx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%bl,%edi
	orl	%esi,%edx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%ah,%edi
	shll	$16,%esi
	shrl	$16,%eax
	orl	%esi,%edx
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%bh,%edi
	shrl	$16,%ebx
	shll	$8,%esi
	movd	%edx,%mm1
	movzbl	-128(%ebp,%edi,1),%edx
	movzbl	%bh,%edi
	shll	$24,%edx
	andl	$255,%ebx
	orl	%esi,%edx
	punpckldq	%mm1,%mm0
	movzbl	-128(%ebp,%edi,1),%esi
	movzbl	%al,%edi
	shll	$8,%esi
	movzbl	%ah,%eax
	movzbl	-128(%ebp,%ebx,1),%ebx
	orl	%esi,%ecx
	movzbl	-128(%ebp,%edi,1),%esi
	orl	%ebx,%edx
	shll	$16,%esi
	movzbl	-128(%ebp,%eax,1),%eax
	orl	%esi,%edx
	shll	$24,%eax
	orl	%eax,%ecx
	movl	20(%esp),%edi
	movd	%edx,%mm4
	movd	%ecx,%mm5
	punpckldq	%mm5,%mm4
	addl	$16,%edi
	cmpl	24(%esp),%edi
	ja	.L008out
	movq	%mm0,%mm3
	movq	%mm4,%mm7
	pshufw	$228,%mm0,%mm2
	pshufw	$228,%mm4,%mm6
	movq	%mm0,%mm1
	movq	%mm4,%mm5
	pshufw	$177,%mm0,%mm0
	pshufw	$177,%mm4,%mm4
	pslld	$8,%mm2
	pslld	$8,%mm6
	psrld	$8,%mm3
	psrld	$8,%mm7
	pxor	%mm2,%mm0
	pxor	%mm6,%mm4
	pxor	%mm3,%mm0
	pxor	%mm7,%mm4
	pslld	$16,%mm2
	pslld	$16,%mm6
	psrld	$16,%mm3
	psrld	$16,%mm7
	pxor	%mm2,%mm0
	pxor	%mm6,%mm4
	pxor	%mm3,%mm0
	pxor	%mm7,%mm4
	movq	8(%esp),%mm3
	pxor	%mm2,%mm2
	pxor	%mm6,%mm6
	pcmpgtb	%mm1,%mm2
	pcmpgtb	%mm5,%mm6
	pand	%mm3,%mm2
	pand	%mm3,%mm6
	paddb	%mm1,%mm1
	paddb	%mm5,%mm5
	pxor	%mm2,%mm1
	pxor	%mm6,%mm5
	movq	%mm1,%mm3
	movq	%mm5,%mm7
	movq	%mm1,%mm2
	movq	%mm5,%mm6
	pxor	%mm1,%mm0
	pxor	%mm5,%mm4
	pslld	$24,%mm3
	pslld	$24,%mm7
	psrld	$8,%mm2
	psrld	$8,%mm6
	pxor	%mm3,%mm0
	pxor	%mm7,%mm4
	pxor	%mm2,%mm0
	pxor	%mm6,%mm4
	movq	8(%esp),%mm2
	pxor	%mm3,%mm3
	pxor	%mm7,%mm7
	pcmpgtb	%mm1,%mm3
	pcmpgtb	%mm5,%mm7
	pand	%mm2,%mm3
	pand	%mm2,%mm7
	paddb	%mm1,%mm1
	paddb	%mm5,%mm5
	pxor	%mm3,%mm1
	pxor	%mm7,%mm5
	pshufw	$177,%mm1,%mm3
	pshufw	$177,%mm5,%mm7
	pxor	%mm1,%mm0
	pxor	%mm5,%mm4
	pxor	%mm3,%mm0
	pxor	%mm7,%mm4
	pxor	%mm3,%mm3
	pxor	%mm7,%mm7
	pcmpgtb	%mm1,%mm3
	pcmpgtb	%mm5,%mm7
	pand	%mm2,%mm3
	pand	%mm2,%mm7
	paddb	%mm1,%mm1
	paddb	%mm5,%mm5
	pxor	%mm3,%mm1
	pxor	%mm7,%mm5
	pxor	%mm1,%mm0
	pxor	%mm5,%mm4
	movq	%mm1,%mm3
	movq	%mm5,%mm7
	pshufw	$177,%mm1,%mm2
	pshufw	$177,%mm5,%mm6
	pxor	%mm2,%mm0
	pxor	%mm6,%mm4
	pslld	$8,%mm1
	pslld	$8,%mm5
	psrld	$8,%mm3
	psrld	$8,%mm7
	movq	(%edi),%mm2
	movq	8(%edi),%mm6
	pxor	%mm1,%mm0
	pxor	%mm5,%mm4
	pxor	%mm3,%mm0
	pxor	%mm7,%mm4
	movl	-128(%ebp),%eax
	pslld	$16,%mm1
	pslld	$16,%mm5
	movl	-64(%ebp),%ebx
	psrld	$16,%mm3
	psrld	$16,%mm7
	movl	(%ebp),%ecx
	pxor	%mm1,%mm0
	pxor	%mm5,%mm4
	movl	64(%ebp),%edx
	pxor	%mm3,%mm0
	pxor	%mm7,%mm4
	pxor	%mm2,%mm0
	pxor	%mm6,%mm4
	jmp	.L007loop
.align	16
.L008out:
	pxor	(%edi),%mm0
	pxor	8(%edi),%mm4
	ret
.size	_sse_AES_decrypt_compact,.-_sse_AES_decrypt_compact
.type	_x86_AES_decrypt,@function
.align	16
_x86_AES_decrypt:
	movl	%edi,20(%esp)
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	movl	240(%edi),%esi
	leal	-2(%esi,%esi,1),%esi
	leal	(%edi,%esi,8),%esi
	movl	%esi,24(%esp)
.align	16
.L009loop:
	movl	%eax,%esi
	andl	$255,%esi
	movl	(%ebp,%esi,8),%esi
	movzbl	%dh,%edi
	xorl	3(%ebp,%edi,8),%esi
	movl	%ecx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	xorl	2(%ebp,%edi,8),%esi
	movl	%ebx,%edi
	shrl	$24,%edi
	xorl	1(%ebp,%edi,8),%esi
	movl	%esi,4(%esp)

	movl	%ebx,%esi
	andl	$255,%esi
	movl	(%ebp,%esi,8),%esi
	movzbl	%ah,%edi
	xorl	3(%ebp,%edi,8),%esi
	movl	%edx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	xorl	2(%ebp,%edi,8),%esi
	movl	%ecx,%edi
	shrl	$24,%edi
	xorl	1(%ebp,%edi,8),%esi
	movl	%esi,8(%esp)

	movl	%ecx,%esi
	andl	$255,%esi
	movl	(%ebp,%esi,8),%esi
	movzbl	%bh,%edi
	xorl	3(%ebp,%edi,8),%esi
	movl	%eax,%edi
	shrl	$16,%edi
	andl	$255,%edi
	xorl	2(%ebp,%edi,8),%esi
	movl	%edx,%edi
	shrl	$24,%edi
	xorl	1(%ebp,%edi,8),%esi

	movl	20(%esp),%edi
	andl	$255,%edx
	movl	(%ebp,%edx,8),%edx
	movzbl	%ch,%ecx
	xorl	3(%ebp,%ecx,8),%edx
	movl	%esi,%ecx
	shrl	$16,%ebx
	andl	$255,%ebx
	xorl	2(%ebp,%ebx,8),%edx
	movl	8(%esp),%ebx
	shrl	$24,%eax
	xorl	1(%ebp,%eax,8),%edx
	movl	4(%esp),%eax

	addl	$16,%edi
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	cmpl	24(%esp),%edi
	movl	%edi,20(%esp)
	jb	.L009loop
	leal	2176(%ebp),%ebp
	movl	-128(%ebp),%edi
	movl	-96(%ebp),%esi
	movl	-64(%ebp),%edi
	movl	-32(%ebp),%esi
	movl	(%ebp),%edi
	movl	32(%ebp),%esi
	movl	64(%ebp),%edi
	movl	96(%ebp),%esi
	leal	-128(%ebp),%ebp
	movl	%eax,%esi
	andl	$255,%esi
	movzbl	(%ebp,%esi,1),%esi
	movzbl	%dh,%edi
	movzbl	(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%ecx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%ebx,%edi
	shrl	$24,%edi
	movzbl	(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	movl	%esi,4(%esp)
	movl	%ebx,%esi
	andl	$255,%esi
	movzbl	(%ebp,%esi,1),%esi
	movzbl	%ah,%edi
	movzbl	(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%edx,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%ecx,%edi
	shrl	$24,%edi
	movzbl	(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	movl	%esi,8(%esp)
	movl	%ecx,%esi
	andl	$255,%esi
	movzbl	(%ebp,%esi,1),%esi
	movzbl	%bh,%edi
	movzbl	(%ebp,%edi,1),%edi
	shll	$8,%edi
	xorl	%edi,%esi
	movl	%eax,%edi
	shrl	$16,%edi
	andl	$255,%edi
	movzbl	(%ebp,%edi,1),%edi
	shll	$16,%edi
	xorl	%edi,%esi
	movl	%edx,%edi
	shrl	$24,%edi
	movzbl	(%ebp,%edi,1),%edi
	shll	$24,%edi
	xorl	%edi,%esi
	movl	20(%esp),%edi
	andl	$255,%edx
	movzbl	(%ebp,%edx,1),%edx
	movzbl	%ch,%ecx
	movzbl	(%ebp,%ecx,1),%ecx
	shll	$8,%ecx
	xorl	%ecx,%edx
	movl	%esi,%ecx
	shrl	$16,%ebx
	andl	$255,%ebx
	movzbl	(%ebp,%ebx,1),%ebx
	shll	$16,%ebx
	xorl	%ebx,%edx
	movl	8(%esp),%ebx
	shrl	$24,%eax
	movzbl	(%ebp,%eax,1),%eax
	shll	$24,%eax
	xorl	%eax,%edx
	movl	4(%esp),%eax
	leal	-2048(%ebp),%ebp
	addl	$16,%edi
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	ret
.align	64
.LAES_Td:
.long	1353184337,1353184337
.long	1399144830,1399144830
.long	3282310938,3282310938
.long	2522752826,2522752826
.long	3412831035,3412831035
.long	4047871263,4047871263
.long	2874735276,2874735276
.long	2466505547,2466505547
.long	1442459680,1442459680
.long	4134368941,4134368941
.long	2440481928,2440481928
.long	625738485,625738485
.long	4242007375,4242007375
.long	3620416197,3620416197
.long	2151953702,2151953702
.long	2409849525,2409849525
.long	1230680542,1230680542
.long	1729870373,1729870373
.long	2551114309,2551114309
.long	3787521629,3787521629
.long	41234371,41234371
.long	317738113,317738113
.long	2744600205,2744600205
.long	3338261355,3338261355
.long	3881799427,3881799427
.long	2510066197,2510066197
.long	3950669247,3950669247
.long	3663286933,3663286933
.long	763608788,763608788
.long	3542185048,3542185048
.long	694804553,694804553
.long	1154009486,1154009486
.long	1787413109,1787413109
.long	2021232372,2021232372
.long	1799248025,1799248025
.long	3715217703,3715217703
.long	3058688446,3058688446
.long	397248752,397248752
.long	1722556617,1722556617
.long	3023752829,3023752829
.long	407560035,407560035
.long	2184256229,2184256229
.long	1613975959,1613975959
.long	1165972322,1165972322
.long	3765920945,3765920945
.long	2226023355,2226023355
.long	480281086,480281086
.long	2485848313,2485848313
.long	1483229296,1483229296
.long	436028815,436028815
.long	2272059028,2272059028
.long	3086515026,3086515026
.long	601060267,601060267
.long	3791801202,3791801202
.long	1468997603,1468997603
.long	715871590,715871590
.long	120122290,120122290
.long	63092015,63092015
.long	2591802758,2591802758
.long	2768779219,2768779219
.long	4068943920,4068943920
.long	2997206819,2997206819
.long	3127509762,3127509762
.long	1552029421,1552029421
.long	723308426,723308426
.long	2461301159,2461301159
.long	4042393587,4042393587
.long	2715969870,2715969870
.long	3455375973,3455375973
.long	3586000134,3586000134
.long	526529745,526529745
.long	2331944644,2331944644
.long	2639474228,2639474228
.long	2689987490,2689987490
.long	853641733,853641733
.long	1978398372,1978398372
.long	971801355,971801355
.long	2867814464,2867814464
.long	111112542,111112542
.long	1360031421,1360031421
.long	4186579262,4186579262
.long	1023860118,1023860118
.long	2919579357,2919579357
.long	1186850381,1186850381
.long	3045938321,3045938321
.long	90031217,90031217
.long	1876166148,1876166148
.long	4279586912,4279586912
.long	620468249,620468249
.long	2548678102,2548678102
.long	3426959497,3426959497
.long	2006899047,2006899047
.long	3175278768,3175278768
.long	2290845959,2290845959
.long	945494503,945494503
.long	3689859193,3689859193
.long	1191869601,1191869601
.long	3910091388,3910091388
.long	3374220536,3374220536
.long	0,0
.long	2206629897,2206629897
.long	1223502642,1223502642
.long	2893025566,2893025566
.long	1316117100,1316117100
.long	4227796733,4227796733
.long	1446544655,1446544655
.long	517320253,517320253
.long	658058550,658058550
.long	1691946762,1691946762
.long	564550760,564550760
.long	3511966619,3511966619
.long	976107044,976107044
.long	2976320012,2976320012
.long	266819475,266819475
.long	3533106868,3533106868
.long	2660342555,2660342555
.long	1338359936,1338359936
.long	2720062561,2720062561
.long	1766553434,1766553434
.long	370807324,370807324
.long	179999714,179999714
.long	3844776128,3844776128
.long	1138762300,1138762300
.long	488053522,488053522
.long	185403662,185403662
.long	2915535858,2915535858
.long	3114841645,3114841645
.long	3366526484,3366526484
.long	2233069911,2233069911
.long	1275557295,1275557295
.long	3151862254,3151862254
.long	4250959779,4250959779
.long	2670068215,2670068215
.long	3170202204,3170202204
.long	3309004356,3309004356
.long	880737115,880737115
.long	1982415755,1982415755
.long	3703972811,3703972811
.long	1761406390,1761406390
.long	1676797112,1676797112
.long	3403428311,3403428311
.long	277177154,277177154
.long	1076008723,1076008723
.long	538035844,538035844
.long	2099530373,2099530373
.long	4164795346,4164795346
.long	288553390,288553390
.long	1839278535,1839278535
.long	1261411869,1261411869
.long	4080055004,4080055004
.long	3964831245,3964831245
.long	3504587127,3504587127
.long	1813426987,1813426987
.long	2579067049,2579067049
.long	4199060497,4199060497
.long	577038663,577038663
.long	3297574056,3297574056
.long	440397984,440397984
.long	3626794326,3626794326
.long	4019204898,4019204898
.long	3343796615,3343796615
.long	3251714265,3251714265
.long	4272081548,4272081548
.long	906744984,906744984
.long	3481400742,3481400742
.long	685669029,685669029
.long	646887386,646887386
.long	2764025151,2764025151
.long	3835509292,3835509292
.long	227702864,227702864
.long	2613862250,2613862250
.long	1648787028,1648787028
.long	3256061430,3256061430
.long	3904428176,3904428176
.long	1593260334,1593260334
.long	4121936770,4121936770
.long	3196083615,3196083615
.long	2090061929,2090061929
.long	2838353263,2838353263
.long	3004310991,3004310991
.long	999926984,999926984
.long	2809993232,2809993232
.long	1852021992,1852021992
.long	2075868123,2075868123
.long	158869197,158869197
.long	4095236462,4095236462
.long	28809964,28809964
.long	2828685187,2828685187
.long	1701746150,1701746150
.long	2129067946,2129067946
.long	147831841,147831841
.long	3873969647,3873969647
.long	3650873274,3650873274
.long	3459673930,3459673930
.long	3557400554,3557400554
.long	3598495785,3598495785
.long	2947720241,2947720241
.long	824393514,824393514
.long	815048134,815048134
.long	3227951669,3227951669
.long	935087732,935087732
.long	2798289660,2798289660
.long	2966458592,2966458592
.long	366520115,366520115
.long	1251476721,1251476721
.long	4158319681,4158319681
.long	240176511,240176511
.long	804688151,804688151
.long	2379631990,2379631990
.long	1303441219,1303441219
.long	1414376140,1414376140
.long	3741619940,3741619940
.long	3820343710,3820343710
.long	461924940,461924940
.long	3089050817,3089050817
.long	2136040774,2136040774
.long	82468509,82468509
.long	1563790337,1563790337
.long	1937016826,1937016826
.long	776014843,776014843
.long	1511876531,1511876531
.long	1389550482,1389550482
.long	861278441,861278441
.long	323475053,323475053
.long	2355222426,2355222426
.long	2047648055,2047648055
.long	2383738969,2383738969
.long	2302415851,2302415851
.long	3995576782,3995576782
.long	902390199,902390199
.long	3991215329,3991215329
.long	1018251130,1018251130
.long	1507840668,1507840668
.long	1064563285,1064563285
.long	2043548696,2043548696
.long	3208103795,3208103795
.long	3939366739,3939366739
.long	1537932639,1537932639
.long	342834655,342834655
.long	2262516856,2262516856
.long	2180231114,2180231114
.long	1053059257,1053059257
.long	741614648,741614648
.long	1598071746,1598071746
.long	1925389590,1925389590
.long	203809468,203809468
.long	2336832552,2336832552
.long	1100287487,1100287487
.long	1895934009,1895934009
.long	3736275976,3736275976
.long	2632234200,2632234200
.long	2428589668,2428589668
.long	1636092795,1636092795
.long	1890988757,1890988757
.long	1952214088,1952214088
.long	1113045200,1113045200
.byte	82,9,106,213,48,54,165,56
.byte	191,64,163,158,129,243,215,251
.byte	124,227,57,130,155,47,255,135
.byte	52,142,67,68,196,222,233,203
.byte	84,123,148,50,166,194,35,61
.byte	238,76,149,11,66,250,195,78
.byte	8,46,161,102,40,217,36,178
.byte	118,91,162,73,109,139,209,37
.byte	114,248,246,100,134,104,152,22
.byte	212,164,92,204,93,101,182,146
.byte	108,112,72,80,253,237,185,218
.byte	94,21,70,87,167,141,157,132
.byte	144,216,171,0,140,188,211,10
.byte	247,228,88,5,184,179,69,6
.byte	208,44,30,143,202,63,15,2
.byte	193,175,189,3,1,19,138,107
.byte	58,145,17,65,79,103,220,234
.byte	151,242,207,206,240,180,230,115
.byte	150,172,116,34,231,173,53,133
.byte	226,249,55,232,28,117,223,110
.byte	71,241,26,113,29,41,197,137
.byte	111,183,98,14,170,24,190,27
.byte	252,86,62,75,198,210,121,32
.byte	154,219,192,254,120,205,90,244
.byte	31,221,168,51,136,7,199,49
.byte	177,18,16,89,39,128,236,95
.byte	96,81,127,169,25,181,74,13
.byte	45,229,122,159,147,201,156,239
.byte	160,224,59,77,174,42,245,176
.byte	200,235,187,60,131,83,153,97
.byte	23,43,4,126,186,119,214,38
.byte	225,105,20,99,85,33,12,125
.byte	82,9,106,213,48,54,165,56
.byte	191,64,163,158,129,243,215,251
.byte	124,227,57,130,155,47,255,135
.byte	52,142,67,68,196,222,233,203
.byte	84,123,148,50,166,194,35,61
.byte	238,76,149,11,66,250,195,78
.byte	8,46,161,102,40,217,36,178
.byte	118,91,162,73,109,139,209,37
.byte	114,248,246,100,134,104,152,22
.byte	212,164,92,204,93,101,182,146
.byte	108,112,72,80,253,237,185,218
.byte	94,21,70,87,167,141,157,132
.byte	144,216,171,0,140,188,211,10
.byte	247,228,88,5,184,179,69,6
.byte	208,44,30,143,202,63,15,2
.byte	193,175,189,3,1,19,138,107
.byte	58,145,17,65,79,103,220,234
.byte	151,242,207,206,240,180,230,115
.byte	150,172,116,34,231,173,53,133
.byte	226,249,55,232,28,117,223,110
.byte	71,241,26,113,29,41,197,137
.byte	111,183,98,14,170,24,190,27
.byte	252,86,62,75,198,210,121,32
.byte	154,219,192,254,120,205,90,244
.byte	31,221,168,51,136,7,199,49
.byte	177,18,16,89,39,128,236,95
.byte	96,81,127,169,25,181,74,13
.byte	45,229,122,159,147,201,156,239
.byte	160,224,59,77,174,42,245,176
.byte	200,235,187,60,131,83,153,97
.byte	23,43,4,126,186,119,214,38
.byte	225,105,20,99,85,33,12,125
.byte	82,9,106,213,48,54,165,56
.byte	191,64,163,158,129,243,215,251
.byte	124,227,57,130,155,47,255,135
.byte	52,142,67,68,196,222,233,203
.byte	84,123,148,50,166,194,35,61
.byte	238,76,149,11,66,250,195,78
.byte	8,46,161,102,40,217,36,178
.byte	118,91,162,73,109,139,209,37
.byte	114,248,246,100,134,104,152,22
.byte	212,164,92,204,93,101,182,146
.byte	108,112,72,80,253,237,185,218
.byte	94,21,70,87,167,141,157,132
.byte	144,216,171,0,140,188,211,10
.byte	247,228,88,5,184,179,69,6
.byte	208,44,30,143,202,63,15,2
.byte	193,175,189,3,1,19,138,107
.byte	58,145,17,65,79,103,220,234
.byte	151,242,207,206,240,180,230,115
.byte	150,172,116,34,231,173,53,133
.byte	226,249,55,232,28,117,223,110
.byte	71,241,26,113,29,41,197,137
.byte	111,183,98,14,170,24,190,27
.byte	252,86,62,75,198,210,121,32
.byte	154,219,192,254,120,205,90,244
.byte	31,221,168,51,136,7,199,49
.byte	177,18,16,89,39,128,236,95
.byte	96,81,127,169,25,181,74,13
.byte	45,229,122,159,147,201,156,239
.byte	160,224,59,77,174,42,245,176
.byte	200,235,187,60,131,83,153,97
.byte	23,43,4,126,186,119,214,38
.byte	225,105,20,99,85,33,12,125
.byte	82,9,106,213,48,54,165,56
.byte	191,64,163,158,129,243,215,251
.byte	124,227,57,130,155,47,255,135
.byte	52,142,67,68,196,222,233,203
.byte	84,123,148,50,166,194,35,61
.byte	238,76,149,11,66,250,195,78
.byte	8,46,161,102,40,217,36,178
.byte	118,91,162,73,109,139,209,37
.byte	114,248,246,100,134,104,152,22
.byte	212,164,92,204,93,101,182,146
.byte	108,112,72,80,253,237,185,218
.byte	94,21,70,87,167,141,157,132
.byte	144,216,171,0,140,188,211,10
.byte	247,228,88,5,184,179,69,6
.byte	208,44,30,143,202,63,15,2
.byte	193,175,189,3,1,19,138,107
.byte	58,145,17,65,79,103,220,234
.byte	151,242,207,206,240,180,230,115
.byte	150,172,116,34,231,173,53,133
.byte	226,249,55,232,28,117,223,110
.byte	71,241,26,113,29,41,197,137
.byte	111,183,98,14,170,24,190,27
.byte	252,86,62,75,198,210,121,32
.byte	154,219,192,254,120,205,90,244
.byte	31,221,168,51,136,7,199,49
.byte	177,18,16,89,39,128,236,95
.byte	96,81,127,169,25,181,74,13
.byte	45,229,122,159,147,201,156,239
.byte	160,224,59,77,174,42,245,176
.byte	200,235,187,60,131,83,153,97
.byte	23,43,4,126,186,119,214,38
.byte	225,105,20,99,85,33,12,125
.size	_x86_AES_decrypt,.-_x86_AES_decrypt
.globl	AES_decrypt
.type	AES_decrypt,@function
.align	16
AES_decrypt:
.L_AES_decrypt_begin:
	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%esi
	movl	28(%esp),%edi
	movl	%esp,%eax
	subl	$36,%esp
	andl	$-64,%esp
	leal	-127(%edi),%ebx
	subl	%esp,%ebx
	negl	%ebx
	andl	$960,%ebx
	subl	%ebx,%esp
	addl	$4,%esp
	movl	%eax,28(%esp)
	call	.L010pic_point
.L010pic_point:
	popl	%ebp
	leal	OPENSSL_ia32cap_P,%eax
	leal	.LAES_Td-.L010pic_point(%ebp),%ebp
	leal	764(%esp),%ebx
	subl	%ebp,%ebx
	andl	$768,%ebx
	leal	2176(%ebp,%ebx,1),%ebp
	btl	$25,(%eax)
	jnc	.L011x86
	movq	(%esi),%mm0
	movq	8(%esi),%mm4
	call	_sse_AES_decrypt_compact
	movl	28(%esp),%esp
	movl	24(%esp),%esi
	movq	%mm0,(%esi)
	movq	%mm4,8(%esi)
	emms
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.align	16
.L011x86:
	movl	%ebp,24(%esp)
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	call	_x86_AES_decrypt_compact
	movl	28(%esp),%esp
	movl	24(%esp),%esi
	movl	%eax,(%esi)
	movl	%ebx,4(%esi)
	movl	%ecx,8(%esi)
	movl	%edx,12(%esi)
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	AES_decrypt,.-.L_AES_decrypt_begin
.globl	AES_cbc_encrypt
.type	AES_cbc_encrypt,@function
.align	16
AES_cbc_encrypt:
.L_AES_cbc_encrypt_begin:
	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	28(%esp),%ecx
	cmpl	$0,%ecx
	je	.L012drop_out
	call	.L013pic_point
.L013pic_point:
	popl	%ebp
	leal	OPENSSL_ia32cap_P,%eax
	cmpl	$0,40(%esp)
	leal	.LAES_Te-.L013pic_point(%ebp),%ebp
	jne	.L014picked_te
	leal	.LAES_Td-.LAES_Te(%ebp),%ebp
.L014picked_te:
	pushfl
	cld
	cmpl	$512,%ecx
	jb	.L015slow_way
	testl	$15,%ecx
	jnz	.L015slow_way
	btl	$28,(%eax)
	jc	.L015slow_way
	leal	-324(%esp),%esi
	andl	$-64,%esi
	movl	%ebp,%eax
	leal	2304(%ebp),%ebx
	movl	%esi,%edx
	andl	$4095,%eax
	andl	$4095,%ebx
	andl	$4095,%edx
	cmpl	%ebx,%edx
	jb	.L016tbl_break_out
	subl	%ebx,%edx
	subl	%edx,%esi
	jmp	.L017tbl_ok
.align	4
.L016tbl_break_out:
	subl	%eax,%edx
	andl	$4095,%edx
	addl	$384,%edx
	subl	%edx,%esi
.align	4
.L017tbl_ok:
	leal	24(%esp),%edx
	xchgl	%esi,%esp
	addl	$4,%esp
	movl	%ebp,24(%esp)
	movl	%esi,28(%esp)
	movl	(%edx),%eax
	movl	4(%edx),%ebx
	movl	12(%edx),%edi
	movl	16(%edx),%esi
	movl	20(%edx),%edx
	movl	%eax,32(%esp)
	movl	%ebx,36(%esp)
	movl	%ecx,40(%esp)
	movl	%edi,44(%esp)
	movl	%esi,48(%esp)
	movl	$0,316(%esp)
	movl	%edi,%ebx
	movl	$61,%ecx
	subl	%ebp,%ebx
	movl	%edi,%esi
	andl	$4095,%ebx
	leal	76(%esp),%edi
	cmpl	$2304,%ebx
	jb	.L018do_copy
	cmpl	$3852,%ebx
	jb	.L019skip_copy
.align	4
.L018do_copy:
	movl	%edi,44(%esp)
.long	2784229001
.L019skip_copy:
	movl	$16,%edi
.align	4
.L020prefetch_tbl:
	movl	(%ebp),%eax
	movl	32(%ebp),%ebx
	movl	64(%ebp),%ecx
	movl	96(%ebp),%esi
	leal	128(%ebp),%ebp
	subl	$1,%edi
	jnz	.L020prefetch_tbl
	subl	$2048,%ebp
	movl	32(%esp),%esi
	movl	48(%esp),%edi
	cmpl	$0,%edx
	je	.L021fast_decrypt
	movl	(%edi),%eax
	movl	4(%edi),%ebx
.align	16
.L022fast_enc_loop:
	movl	8(%edi),%ecx
	movl	12(%edi),%edx
	xorl	(%esi),%eax
	xorl	4(%esi),%ebx
	xorl	8(%esi),%ecx
	xorl	12(%esi),%edx
	movl	44(%esp),%edi
	call	_x86_AES_encrypt
	movl	32(%esp),%esi
	movl	36(%esp),%edi
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	leal	16(%esi),%esi
	movl	40(%esp),%ecx
	movl	%esi,32(%esp)
	leal	16(%edi),%edx
	movl	%edx,36(%esp)
	subl	$16,%ecx
	movl	%ecx,40(%esp)
	jnz	.L022fast_enc_loop
	movl	48(%esp),%esi
	movl	8(%edi),%ecx
	movl	12(%edi),%edx
	movl	%eax,(%esi)
	movl	%ebx,4(%esi)
	movl	%ecx,8(%esi)
	movl	%edx,12(%esi)
	cmpl	$0,316(%esp)
	movl	44(%esp),%edi
	je	.L023skip_ezero
	movl	$60,%ecx
	xorl	%eax,%eax
.align	4
.long	2884892297
.L023skip_ezero:
	movl	28(%esp),%esp
	popfl
.L012drop_out:
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
	pushfl
.align	16
.L021fast_decrypt:
	cmpl	36(%esp),%esi
	je	.L024fast_dec_in_place
	movl	%edi,52(%esp)
.align	4
.align	16
.L025fast_dec_loop:
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	movl	44(%esp),%edi
	call	_x86_AES_decrypt
	movl	52(%esp),%edi
	movl	40(%esp),%esi
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	movl	36(%esp),%edi
	movl	32(%esp),%esi
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	movl	40(%esp),%ecx
	movl	%esi,52(%esp)
	leal	16(%esi),%esi
	movl	%esi,32(%esp)
	leal	16(%edi),%edi
	movl	%edi,36(%esp)
	subl	$16,%ecx
	movl	%ecx,40(%esp)
	jnz	.L025fast_dec_loop
	movl	52(%esp),%edi
	movl	48(%esp),%esi
	movl	(%edi),%eax
	movl	4(%edi),%ebx
	movl	8(%edi),%ecx
	movl	12(%edi),%edx
	movl	%eax,(%esi)
	movl	%ebx,4(%esi)
	movl	%ecx,8(%esi)
	movl	%edx,12(%esi)
	jmp	.L026fast_dec_out
.align	16
.L024fast_dec_in_place:
.L027fast_dec_in_place_loop:
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	leal	60(%esp),%edi
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	movl	44(%esp),%edi
	call	_x86_AES_decrypt
	movl	48(%esp),%edi
	movl	36(%esp),%esi
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	movl	%eax,(%esi)
	movl	%ebx,4(%esi)
	movl	%ecx,8(%esi)
	movl	%edx,12(%esi)
	leal	16(%esi),%esi
	movl	%esi,36(%esp)
	leal	60(%esp),%esi
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	movl	32(%esp),%esi
	movl	40(%esp),%ecx
	leal	16(%esi),%esi
	movl	%esi,32(%esp)
	subl	$16,%ecx
	movl	%ecx,40(%esp)
	jnz	.L027fast_dec_in_place_loop
.align	4
.L026fast_dec_out:
	cmpl	$0,316(%esp)
	movl	44(%esp),%edi
	je	.L028skip_dzero
	movl	$60,%ecx
	xorl	%eax,%eax
.align	4
.long	2884892297
.L028skip_dzero:
	movl	28(%esp),%esp
	popfl
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
	pushfl
.align	16
.L015slow_way:
	movl	(%eax),%eax
	movl	36(%esp),%edi
	leal	-80(%esp),%esi
	andl	$-64,%esi
	leal	-143(%edi),%ebx
	subl	%esi,%ebx
	negl	%ebx
	andl	$960,%ebx
	subl	%ebx,%esi
	leal	768(%esi),%ebx
	subl	%ebp,%ebx
	andl	$768,%ebx
	leal	2176(%ebp,%ebx,1),%ebp
	leal	24(%esp),%edx
	xchgl	%esi,%esp
	addl	$4,%esp
	movl	%ebp,24(%esp)
	movl	%esi,28(%esp)
	movl	%eax,52(%esp)
	movl	(%edx),%eax
	movl	4(%edx),%ebx
	movl	16(%edx),%esi
	movl	20(%edx),%edx
	movl	%eax,32(%esp)
	movl	%ebx,36(%esp)
	movl	%ecx,40(%esp)
	movl	%edi,44(%esp)
	movl	%esi,48(%esp)
	movl	%esi,%edi
	movl	%eax,%esi
	cmpl	$0,%edx
	je	.L029slow_decrypt
	cmpl	$16,%ecx
	movl	%ebx,%edx
	jb	.L030slow_enc_tail
	btl	$25,52(%esp)
	jnc	.L031slow_enc_x86
	movq	(%edi),%mm0
	movq	8(%edi),%mm4
.align	16
.L032slow_enc_loop_sse:
	pxor	(%esi),%mm0
	pxor	8(%esi),%mm4
	movl	44(%esp),%edi
	call	_sse_AES_encrypt_compact
	movl	32(%esp),%esi
	movl	36(%esp),%edi
	movl	40(%esp),%ecx
	movq	%mm0,(%edi)
	movq	%mm4,8(%edi)
	leal	16(%esi),%esi
	movl	%esi,32(%esp)
	leal	16(%edi),%edx
	movl	%edx,36(%esp)
	subl	$16,%ecx
	cmpl	$16,%ecx
	movl	%ecx,40(%esp)
	jae	.L032slow_enc_loop_sse
	testl	$15,%ecx
	jnz	.L030slow_enc_tail
	movl	48(%esp),%esi
	movq	%mm0,(%esi)
	movq	%mm4,8(%esi)
	emms
	movl	28(%esp),%esp
	popfl
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
	pushfl
.align	16
.L031slow_enc_x86:
	movl	(%edi),%eax
	movl	4(%edi),%ebx
.align	4
.L033slow_enc_loop_x86:
	movl	8(%edi),%ecx
	movl	12(%edi),%edx
	xorl	(%esi),%eax
	xorl	4(%esi),%ebx
	xorl	8(%esi),%ecx
	xorl	12(%esi),%edx
	movl	44(%esp),%edi
	call	_x86_AES_encrypt_compact
	movl	32(%esp),%esi
	movl	36(%esp),%edi
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	movl	40(%esp),%ecx
	leal	16(%esi),%esi
	movl	%esi,32(%esp)
	leal	16(%edi),%edx
	movl	%edx,36(%esp)
	subl	$16,%ecx
	cmpl	$16,%ecx
	movl	%ecx,40(%esp)
	jae	.L033slow_enc_loop_x86
	testl	$15,%ecx
	jnz	.L030slow_enc_tail
	movl	48(%esp),%esi
	movl	8(%edi),%ecx
	movl	12(%edi),%edx
	movl	%eax,(%esi)
	movl	%ebx,4(%esi)
	movl	%ecx,8(%esi)
	movl	%edx,12(%esi)
	movl	28(%esp),%esp
	popfl
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
	pushfl
.align	16
.L030slow_enc_tail:
	emms
	movl	%edx,%edi
	movl	$16,%ebx
	subl	%ecx,%ebx
	cmpl	%esi,%edi
	je	.L034enc_in_place
.align	4
.long	2767451785
	jmp	.L035enc_skip_in_place
.L034enc_in_place:
	leal	(%edi,%ecx,1),%edi
.L035enc_skip_in_place:
	movl	%ebx,%ecx
	xorl	%eax,%eax
.align	4
.long	2868115081
	movl	48(%esp),%edi
	movl	%edx,%esi
	movl	(%edi),%eax
	movl	4(%edi),%ebx
	movl	$16,40(%esp)
	jmp	.L033slow_enc_loop_x86
.align	16
.L029slow_decrypt:
	btl	$25,52(%esp)
	jnc	.L036slow_dec_loop_x86
.align	4
.L037slow_dec_loop_sse:
	movq	(%esi),%mm0
	movq	8(%esi),%mm4
	movl	44(%esp),%edi
	call	_sse_AES_decrypt_compact
	movl	32(%esp),%esi
	leal	60(%esp),%eax
	movl	36(%esp),%ebx
	movl	40(%esp),%ecx
	movl	48(%esp),%edi
	movq	(%esi),%mm1
	movq	8(%esi),%mm5
	pxor	(%edi),%mm0
	pxor	8(%edi),%mm4
	movq	%mm1,(%edi)
	movq	%mm5,8(%edi)
	subl	$16,%ecx
	jc	.L038slow_dec_partial_sse
	movq	%mm0,(%ebx)
	movq	%mm4,8(%ebx)
	leal	16(%ebx),%ebx
	movl	%ebx,36(%esp)
	leal	16(%esi),%esi
	movl	%esi,32(%esp)
	movl	%ecx,40(%esp)
	jnz	.L037slow_dec_loop_sse
	emms
	movl	28(%esp),%esp
	popfl
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
	pushfl
.align	16
.L038slow_dec_partial_sse:
	movq	%mm0,(%eax)
	movq	%mm4,8(%eax)
	emms
	addl	$16,%ecx
	movl	%ebx,%edi
	movl	%eax,%esi
.align	4
.long	2767451785
	movl	28(%esp),%esp
	popfl
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
	pushfl
.align	16
.L036slow_dec_loop_x86:
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	leal	60(%esp),%edi
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	movl	44(%esp),%edi
	call	_x86_AES_decrypt_compact
	movl	48(%esp),%edi
	movl	40(%esp),%esi
	xorl	(%edi),%eax
	xorl	4(%edi),%ebx
	xorl	8(%edi),%ecx
	xorl	12(%edi),%edx
	subl	$16,%esi
	jc	.L039slow_dec_partial_x86
	movl	%esi,40(%esp)
	movl	36(%esp),%esi
	movl	%eax,(%esi)
	movl	%ebx,4(%esi)
	movl	%ecx,8(%esi)
	movl	%edx,12(%esi)
	leal	16(%esi),%esi
	movl	%esi,36(%esp)
	leal	60(%esp),%esi
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	movl	32(%esp),%esi
	leal	16(%esi),%esi
	movl	%esi,32(%esp)
	jnz	.L036slow_dec_loop_x86
	movl	28(%esp),%esp
	popfl
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
	pushfl
.align	16
.L039slow_dec_partial_x86:
	leal	60(%esp),%esi
	movl	%eax,(%esi)
	movl	%ebx,4(%esi)
	movl	%ecx,8(%esi)
	movl	%edx,12(%esi)
	movl	32(%esp),%esi
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	movl	40(%esp),%ecx
	movl	36(%esp),%edi
	leal	60(%esp),%esi
.align	4
.long	2767451785
	movl	28(%esp),%esp
	popfl
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	AES_cbc_encrypt,.-.L_AES_cbc_encrypt_begin
.type	_x86_AES_set_encrypt_key,@function
.align	16
_x86_AES_set_encrypt_key:
	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	24(%esp),%esi
	movl	32(%esp),%edi
	testl	$-1,%esi
	jz	.L040badpointer
	testl	$-1,%edi
	jz	.L040badpointer
	call	.L041pic_point
.L041pic_point:
	popl	%ebp
	leal	.LAES_Te-.L041pic_point(%ebp),%ebp
	leal	2176(%ebp),%ebp
	movl	-128(%ebp),%eax
	movl	-96(%ebp),%ebx
	movl	-64(%ebp),%ecx
	movl	-32(%ebp),%edx
	movl	(%ebp),%eax
	movl	32(%ebp),%ebx
	movl	64(%ebp),%ecx
	movl	96(%ebp),%edx
	movl	28(%esp),%ecx
	cmpl	$128,%ecx
	je	.L04210rounds
	cmpl	$192,%ecx
	je	.L04312rounds
	cmpl	$256,%ecx
	je	.L04414rounds
	movl	$-2,%eax
	jmp	.L045exit
.L04210rounds:
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	xorl	%ecx,%ecx
	jmp	.L04610shortcut
.align	4
.L04710loop:
	movl	(%edi),%eax
	movl	12(%edi),%edx
.L04610shortcut:
	movzbl	%dl,%esi
	movzbl	-128(%ebp,%esi,1),%ebx
	movzbl	%dh,%esi
	shll	$24,%ebx
	xorl	%ebx,%eax
	movzbl	-128(%ebp,%esi,1),%ebx
	shrl	$16,%edx
	movzbl	%dl,%esi
	xorl	%ebx,%eax
	movzbl	-128(%ebp,%esi,1),%ebx
	movzbl	%dh,%esi
	shll	$8,%ebx
	xorl	%ebx,%eax
	movzbl	-128(%ebp,%esi,1),%ebx
	shll	$16,%ebx
	xorl	%ebx,%eax
	xorl	896(%ebp,%ecx,4),%eax
	movl	%eax,16(%edi)
	xorl	4(%edi),%eax
	movl	%eax,20(%edi)
	xorl	8(%edi),%eax
	movl	%eax,24(%edi)
	xorl	12(%edi),%eax
	movl	%eax,28(%edi)
	incl	%ecx
	addl	$16,%edi
	cmpl	$10,%ecx
	jl	.L04710loop
	movl	$10,80(%edi)
	xorl	%eax,%eax
	jmp	.L045exit
.L04312rounds:
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	movl	16(%esi),%ecx
	movl	20(%esi),%edx
	movl	%ecx,16(%edi)
	movl	%edx,20(%edi)
	xorl	%ecx,%ecx
	jmp	.L04812shortcut
.align	4
.L04912loop:
	movl	(%edi),%eax
	movl	20(%edi),%edx
.L04812shortcut:
	movzbl	%dl,%esi
	movzbl	-128(%ebp,%esi,1),%ebx
	movzbl	%dh,%esi
	shll	$24,%ebx
	xorl	%ebx,%eax
	movzbl	-128(%ebp,%esi,1),%ebx
	shrl	$16,%edx
	movzbl	%dl,%esi
	xorl	%ebx,%eax
	movzbl	-128(%ebp,%esi,1),%ebx
	movzbl	%dh,%esi
	shll	$8,%ebx
	xorl	%ebx,%eax
	movzbl	-128(%ebp,%esi,1),%ebx
	shll	$16,%ebx
	xorl	%ebx,%eax
	xorl	896(%ebp,%ecx,4),%eax
	movl	%eax,24(%edi)
	xorl	4(%edi),%eax
	movl	%eax,28(%edi)
	xorl	8(%edi),%eax
	movl	%eax,32(%edi)
	xorl	12(%edi),%eax
	movl	%eax,36(%edi)
	cmpl	$7,%ecx
	je	.L05012break
	incl	%ecx
	xorl	16(%edi),%eax
	movl	%eax,40(%edi)
	xorl	20(%edi),%eax
	movl	%eax,44(%edi)
	addl	$24,%edi
	jmp	.L04912loop
.L05012break:
	movl	$12,72(%edi)
	xorl	%eax,%eax
	jmp	.L045exit
.L04414rounds:
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	8(%esi),%ecx
	movl	12(%esi),%edx
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%edx,12(%edi)
	movl	16(%esi),%eax
	movl	20(%esi),%ebx
	movl	24(%esi),%ecx
	movl	28(%esi),%edx
	movl	%eax,16(%edi)
	movl	%ebx,20(%edi)
	movl	%ecx,24(%edi)
	movl	%edx,28(%edi)
	xorl	%ecx,%ecx
	jmp	.L05114shortcut
.align	4
.L05214loop:
	movl	28(%edi),%edx
.L05114shortcut:
	movl	(%edi),%eax
	movzbl	%dl,%esi
	movzbl	-128(%ebp,%esi,1),%ebx
	movzbl	%dh,%esi
	shll	$24,%ebx
	xorl	%ebx,%eax
	movzbl	-128(%ebp,%esi,1),%ebx
	shrl	$16,%edx
	movzbl	%dl,%esi
	xorl	%ebx,%eax
	movzbl	-128(%ebp,%esi,1),%ebx
	movzbl	%dh,%esi
	shll	$8,%ebx
	xorl	%ebx,%eax
	movzbl	-128(%ebp,%esi,1),%ebx
	shll	$16,%ebx
	xorl	%ebx,%eax
	xorl	896(%ebp,%ecx,4),%eax
	movl	%eax,32(%edi)
	xorl	4(%edi),%eax
	movl	%eax,36(%edi)
	xorl	8(%edi),%eax
	movl	%eax,40(%edi)
	xorl	12(%edi),%eax
	movl	%eax,44(%edi)
	cmpl	$6,%ecx
	je	.L05314break
	incl	%ecx
	movl	%eax,%edx
	movl	16(%edi),%eax
	movzbl	%dl,%esi
	movzbl	-128(%ebp,%esi,1),%ebx
	movzbl	%dh,%esi
	xorl	%ebx,%eax
	movzbl	-128(%ebp,%esi,1),%ebx
	shrl	$16,%edx
	shll	$8,%ebx
	movzbl	%dl,%esi
	xorl	%ebx,%eax
	movzbl	-128(%ebp,%esi,1),%ebx
	movzbl	%dh,%esi
	shll	$16,%ebx
	xorl	%ebx,%eax
	movzbl	-128(%ebp,%esi,1),%ebx
	shll	$24,%ebx
	xorl	%ebx,%eax
	movl	%eax,48(%edi)
	xorl	20(%edi),%eax
	movl	%eax,52(%edi)
	xorl	24(%edi),%eax
	movl	%eax,56(%edi)
	xorl	28(%edi),%eax
	movl	%eax,60(%edi)
	addl	$32,%edi
	jmp	.L05214loop
.L05314break:
	movl	$14,48(%edi)
	xorl	%eax,%eax
	jmp	.L045exit
.L040badpointer:
	movl	$-1,%eax
.L045exit:
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	_x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key
.globl	AES_set_encrypt_key
.type	AES_set_encrypt_key,@function
.align	16
AES_set_encrypt_key:
.L_AES_set_encrypt_key_begin:
	call	_x86_AES_set_encrypt_key
	ret
.size	AES_set_encrypt_key,.-.L_AES_set_encrypt_key_begin
.globl	AES_set_decrypt_key
.type	AES_set_decrypt_key,@function
.align	16
AES_set_decrypt_key:
.L_AES_set_decrypt_key_begin:
	call	_x86_AES_set_encrypt_key
	cmpl	$0,%eax
	je	.L054proceed
	ret
.L054proceed:
	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	28(%esp),%esi
	movl	240(%esi),%ecx
	leal	(,%ecx,4),%ecx
	leal	(%esi,%ecx,4),%edi
.align	4
.L055invert:
	movl	(%esi),%eax
	movl	4(%esi),%ebx
	movl	(%edi),%ecx
	movl	4(%edi),%edx
	movl	%eax,(%edi)
	movl	%ebx,4(%edi)
	movl	%ecx,(%esi)
	movl	%edx,4(%esi)
	movl	8(%esi),%eax
	movl	12(%esi),%ebx
	movl	8(%edi),%ecx
	movl	12(%edi),%edx
	movl	%eax,8(%edi)
	movl	%ebx,12(%edi)
	movl	%ecx,8(%esi)
	movl	%edx,12(%esi)
	addl	$16,%esi
	subl	$16,%edi
	cmpl	%edi,%esi
	jne	.L055invert
	movl	28(%esp),%edi
	movl	240(%edi),%esi
	leal	-2(%esi,%esi,1),%esi
	leal	(%edi,%esi,8),%esi
	movl	%esi,28(%esp)
	movl	16(%edi),%eax
.align	4
.L056permute:
	addl	$16,%edi
	movl	$2155905152,%ebp
	andl	%eax,%ebp
	leal	(%eax,%eax,1),%ebx
	movl	%ebp,%esi
	shrl	$7,%ebp
	subl	%ebp,%esi
	andl	$4278124286,%ebx
	andl	$454761243,%esi
	xorl	%esi,%ebx
	movl	$2155905152,%ebp
	andl	%ebx,%ebp
	leal	(%ebx,%ebx,1),%ecx
	movl	%ebp,%esi
	shrl	$7,%ebp
	subl	%ebp,%esi
	andl	$4278124286,%ecx
	andl	$454761243,%esi
	xorl	%eax,%ebx
	xorl	%esi,%ecx
	movl	$2155905152,%ebp
	andl	%ecx,%ebp
	leal	(%ecx,%ecx,1),%edx
	movl	%ebp,%esi
	shrl	$7,%ebp
	xorl	%eax,%ecx
	subl	%ebp,%esi
	andl	$4278124286,%edx
	andl	$454761243,%esi
	roll	$8,%eax
	xorl	%esi,%edx
	movl	4(%edi),%ebp
	xorl	%ebx,%eax
	xorl	%edx,%ebx
	xorl	%ecx,%eax
	roll	$24,%ebx
	xorl	%edx,%ecx
	xorl	%edx,%eax
	roll	$16,%ecx
	xorl	%ebx,%eax
	roll	$8,%edx
	xorl	%ecx,%eax
	movl	%ebp,%ebx
	xorl	%edx,%eax
	movl	%eax,(%edi)
	movl	$2155905152,%ebp
	andl	%ebx,%ebp
	leal	(%ebx,%ebx,1),%ecx
	movl	%ebp,%esi
	shrl	$7,%ebp
	subl	%ebp,%esi
	andl	$4278124286,%ecx
	andl	$454761243,%esi
	xorl	%esi,%ecx
	movl	$2155905152,%ebp
	andl	%ecx,%ebp
	leal	(%ecx,%ecx,1),%edx
	movl	%ebp,%esi
	shrl	$7,%ebp
	subl	%ebp,%esi
	andl	$4278124286,%edx
	andl	$454761243,%esi
	xorl	%ebx,%ecx
	xorl	%esi,%edx
	movl	$2155905152,%ebp
	andl	%edx,%ebp
	leal	(%edx,%edx,1),%eax
	movl	%ebp,%esi
	shrl	$7,%ebp
	xorl	%ebx,%edx
	subl	%ebp,%esi
	andl	$4278124286,%eax
	andl	$454761243,%esi
	roll	$8,%ebx
	xorl	%esi,%eax
	movl	8(%edi),%ebp
	xorl	%ecx,%ebx
	xorl	%eax,%ecx
	xorl	%edx,%ebx
	roll	$24,%ecx
	xorl	%eax,%edx
	xorl	%eax,%ebx
	roll	$16,%edx
	xorl	%ecx,%ebx
	roll	$8,%eax
	xorl	%edx,%ebx
	movl	%ebp,%ecx
	xorl	%eax,%ebx
	movl	%ebx,4(%edi)
	movl	$2155905152,%ebp
	andl	%ecx,%ebp
	leal	(%ecx,%ecx,1),%edx
	movl	%ebp,%esi
	shrl	$7,%ebp
	subl	%ebp,%esi
	andl	$4278124286,%edx
	andl	$454761243,%esi
	xorl	%esi,%edx
	movl	$2155905152,%ebp
	andl	%edx,%ebp
	leal	(%edx,%edx,1),%eax
	movl	%ebp,%esi
	shrl	$7,%ebp
	subl	%ebp,%esi
	andl	$4278124286,%eax
	andl	$454761243,%esi
	xorl	%ecx,%edx
	xorl	%esi,%eax
	movl	$2155905152,%ebp
	andl	%eax,%ebp
	leal	(%eax,%eax,1),%ebx
	movl	%ebp,%esi
	shrl	$7,%ebp
	xorl	%ecx,%eax
	subl	%ebp,%esi
	andl	$4278124286,%ebx
	andl	$454761243,%esi
	roll	$8,%ecx
	xorl	%esi,%ebx
	movl	12(%edi),%ebp
	xorl	%edx,%ecx
	xorl	%ebx,%edx
	xorl	%eax,%ecx
	roll	$24,%edx
	xorl	%ebx,%eax
	xorl	%ebx,%ecx
	roll	$16,%eax
	xorl	%edx,%ecx
	roll	$8,%ebx
	xorl	%eax,%ecx
	movl	%ebp,%edx
	xorl	%ebx,%ecx
	movl	%ecx,8(%edi)
	movl	$2155905152,%ebp
	andl	%edx,%ebp
	leal	(%edx,%edx,1),%eax
	movl	%ebp,%esi
	shrl	$7,%ebp
	subl	%ebp,%esi
	andl	$4278124286,%eax
	andl	$454761243,%esi
	xorl	%esi,%eax
	movl	$2155905152,%ebp
	andl	%eax,%ebp
	leal	(%eax,%eax,1),%ebx
	movl	%ebp,%esi
	shrl	$7,%ebp
	subl	%ebp,%esi
	andl	$4278124286,%ebx
	andl	$454761243,%esi
	xorl	%edx,%eax
	xorl	%esi,%ebx
	movl	$2155905152,%ebp
	andl	%ebx,%ebp
	leal	(%ebx,%ebx,1),%ecx
	movl	%ebp,%esi
	shrl	$7,%ebp
	xorl	%edx,%ebx
	subl	%ebp,%esi
	andl	$4278124286,%ecx
	andl	$454761243,%esi
	roll	$8,%edx
	xorl	%esi,%ecx
	movl	16(%edi),%ebp
	xorl	%eax,%edx
	xorl	%ecx,%eax
	xorl	%ebx,%edx
	roll	$24,%eax
	xorl	%ecx,%ebx
	xorl	%ecx,%edx
	roll	$16,%ebx
	xorl	%eax,%edx
	roll	$8,%ecx
	xorl	%ebx,%edx
	movl	%ebp,%eax
	xorl	%ecx,%edx
	movl	%edx,12(%edi)
	cmpl	28(%esp),%edi
	jb	.L056permute
	xorl	%eax,%eax
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.size	AES_set_decrypt_key,.-.L_AES_set_decrypt_key_begin
.byte	65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte	80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte	111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.comm	OPENSSL_ia32cap_P,16,4


================================================
FILE: lib/aes_faster_c/aes.cpp
================================================
/*
 *  FIPS-197 compliant AES implementation
 *
 *  Copyright (C) 2006-2014, Brainspark B.V.
 *
 *  This file is part of PolarSSL (http://www.polarssl.org)
 *  Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org>
 *
 *  All rights reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
/*
 *  The AES block cipher was designed by Vincent Rijmen and Joan Daemen.
 *
 *  http://csrc.nist.gov/encryption/aes/rijndael/Rijndael.pdf
 *  http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
 */

/*
#if !defined(POLARSSL_CONFIG_FILE)
#include "polarssl/config.h"
#else
#include POLARSSL_CONFIG_FILE
#endif
*/
//#if defined(POLARSSL_AES_C)


#include "aes.h"
/*
#if defined(POLARSSL_PADLOCK_C)
#include "polarssl/padlock.h"
#endif
#if defined(POLARSSL_AESNI_C)
#include "polarssl/aesni.h"
#endif

#if defined(POLARSSL_PLATFORM_C)
#include "polarssl/platform.h"
#else
#define polarssl_printf printf
#endif
*/

#if !defined(POLARSSL_AES_ALT)

/* Implementation that should never be optimized out by the compiler */
static void polarssl_zeroize( void *v, size_t n ) {
    volatile unsigned char *p = (unsigned char *)v; while( n-- ) *p++ = 0;
}

/*
 * 32-bit integer manipulation macros (little endian)
 */
#ifndef GET_UINT32_LE
#define GET_UINT32_LE(n,b,i)                            \
{                                                       \
    (n) = ( (uint32_t) (b)[(i)    ]       )             \
        | ( (uint32_t) (b)[(i) + 1] <<  8 )             \
        | ( (uint32_t) (b)[(i) + 2] << 16 )             \
        | ( (uint32_t) (b)[(i) + 3] << 24 );            \
}
#endif

#ifndef PUT_UINT32_LE
#define PUT_UINT32_LE(n,b,i)                            \
{                                                       \
    (b)[(i)    ] = (unsigned char) ( (n)       );       \
    (b)[(i) + 1] = (unsigned char) ( (n) >>  8 );       \
    (b)[(i) + 2] = (unsigned char) ( (n) >> 16 );       \
    (b)[(i) + 3] = (unsigned char) ( (n) >> 24 );       \
}
#endif

#if defined(POLARSSL_PADLOCK_C) &&                      \
    ( defined(POLARSSL_HAVE_X86) || defined(PADLOCK_ALIGN16) )
static int aes_padlock_ace = -1;
#endif

#if defined(POLARSSL_AES_ROM_TABLES)
/*
 * Forward S-box
 */
static const unsigned char FSb[256] =
{
    0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
    0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
    0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
    0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
    0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
    0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
    0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
    0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
    0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
    0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
    0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
    0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
    0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
    0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
    0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
    0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
    0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
    0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
    0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
    0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
    0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
    0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
    0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
    0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
    0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
    0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
    0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
    0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
    0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
    0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
    0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
    0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16
};

/*
 * Forward tables
 */
#define FT \
\
    V(A5,63,63,C6), V(84,7C,7C,F8), V(99,77,77,EE), V(8D,7B,7B,F6), \
    V(0D,F2,F2,FF), V(BD,6B,6B,D6), V(B1,6F,6F,DE), V(54,C5,C5,91), \
    V(50,30,30,60), V(03,01,01,02), V(A9,67,67,CE), V(7D,2B,2B,56), \
    V(19,FE,FE,E7), V(62,D7,D7,B5), V(E6,AB,AB,4D), V(9A,76,76,EC), \
    V(45,CA,CA,8F), V(9D,82,82,1F), V(40,C9,C9,89), V(87,7D,7D,FA), \
    V(15,FA,FA,EF), V(EB,59,59,B2), V(C9,47,47,8E), V(0B,F0,F0,FB), \
    V(EC,AD,AD,41), V(67,D4,D4,B3), V(FD,A2,A2,5F), V(EA,AF,AF,45), \
    V(BF,9C,9C,23), V(F7,A4,A4,53), V(96,72,72,E4), V(5B,C0,C0,9B), \
    V(C2,B7,B7,75), V(1C,FD,FD,E1), V(AE,93,93,3D), V(6A,26,26,4C), \
    V(5A,36,36,6C), V(41,3F,3F,7E), V(02,F7,F7,F5), V(4F,CC,CC,83), \
    V(5C,34,34,68), V(F4,A5,A5,51), V(34,E5,E5,D1), V(08,F1,F1,F9), \
    V(93,71,71,E2), V(73,D8,D8,AB), V(53,31,31,62), V(3F,15,15,2A), \
    V(0C,04,04,08), V(52,C7,C7,95), V(65,23,23,46), V(5E,C3,C3,9D), \
    V(28,18,18,30), V(A1,96,96,37), V(0F,05,05,0A), V(B5,9A,9A,2F), \
    V(09,07,07,0E), V(36,12,12,24), V(9B,80,80,1B), V(3D,E2,E2,DF), \
    V(26,EB,EB,CD), V(69,27,27,4E), V(CD,B2,B2,7F), V(9F,75,75,EA), \
    V(1B,09,09,12), V(9E,83,83,1D), V(74,2C,2C,58), V(2E,1A,1A,34), \
    V(2D,1B,1B,36), V(B2,6E,6E,DC), V(EE,5A,5A,B4), V(FB,A0,A0,5B), \
    V(F6,52,52,A4), V(4D,3B,3B,76), V(61,D6,D6,B7), V(CE,B3,B3,7D), \
    V(7B,29,29,52), V(3E,E3,E3,DD), V(71,2F,2F,5E), V(97,84,84,13), \
    V(F5,53,53,A6), V(68,D1,D1,B9), V(00,00,00,00), V(2C,ED,ED,C1), \
    V(60,20,20,40), V(1F,FC,FC,E3), V(C8,B1,B1,79), V(ED,5B,5B,B6), \
    V(BE,6A,6A,D4), V(46,CB,CB,8D), V(D9,BE,BE,67), V(4B,39,39,72), \
    V(DE,4A,4A,94), V(D4,4C,4C,98), V(E8,58,58,B0), V(4A,CF,CF,85), \
    V(6B,D0,D0,BB), V(2A,EF,EF,C5), V(E5,AA,AA,4F), V(16,FB,FB,ED), \
    V(C5,43,43,86), V(D7,4D,4D,9A), V(55,33,33,66), V(94,85,85,11), \
    V(CF,45,45,8A), V(10,F9,F9,E9), V(06,02,02,04), V(81,7F,7F,FE), \
    V(F0,50,50,A0), V(44,3C,3C,78), V(BA,9F,9F,25), V(E3,A8,A8,4B), \
    V(F3,51,51,A2), V(FE,A3,A3,5D), V(C0,40,40,80), V(8A,8F,8F,05), \
    V(AD,92,92,3F), V(BC,9D,9D,21), V(48,38,38,70), V(04,F5,F5,F1), \
    V(DF,BC,BC,63), V(C1,B6,B6,77), V(75,DA,DA,AF), V(63,21,21,42), \
    V(30,10,10,20), V(1A,FF,FF,E5), V(0E,F3,F3,FD), V(6D,D2,D2,BF), \
    V(4C,CD,CD,81), V(14,0C,0C,18), V(35,13,13,26), V(2F,EC,EC,C3), \
    V(E1,5F,5F,BE), V(A2,97,97,35), V(CC,44,44,88), V(39,17,17,2E), \
    V(57,C4,C4,93), V(F2,A7,A7,55), V(82,7E,7E,FC), V(47,3D,3D,7A), \
    V(AC,64,64,C8), V(E7,5D,5D,BA), V(2B,19,19,32), V(95,73,73,E6), \
    V(A0,60,60,C0), V(98,81,81,19), V(D1,4F,4F,9E), V(7F,DC,DC,A3), \
    V(66,22,22,44), V(7E,2A,2A,54), V(AB,90,90,3B), V(83,88,88,0B), \
    V(CA,46,46,8C), V(29,EE,EE,C7), V(D3,B8,B8,6B), V(3C,14,14,28), \
    V(79,DE,DE,A7), V(E2,5E,5E,BC), V(1D,0B,0B,16), V(76,DB,DB,AD), \
    V(3B,E0,E0,DB), V(56,32,32,64), V(4E,3A,3A,74), V(1E,0A,0A,14), \
    V(DB,49,49,92), V(0A,06,06,0C), V(6C,24,24,48), V(E4,5C,5C,B8), \
    V(5D,C2,C2,9F), V(6E,D3,D3,BD), V(EF,AC,AC,43), V(A6,62,62,C4), \
    V(A8,91,91,39), V(A4,95,95,31), V(37,E4,E4,D3), V(8B,79,79,F2), \
    V(32,E7,E7,D5), V(43,C8,C8,8B), V(59,37,37,6E), V(B7,6D,6D,DA), \
    V(8C,8D,8D,01), V(64,D5,D5,B1), V(D2,4E,4E,9C), V(E0,A9,A9,49), \
    V(B4,6C,6C,D8), V(FA,56,56,AC), V(07,F4,F4,F3), V(25,EA,EA,CF), \
    V(AF,65,65,CA), V(8E,7A,7A,F4), V(E9,AE,AE,47), V(18,08,08,10), \
    V(D5,BA,BA,6F), V(88,78,78,F0), V(6F,25,25,4A), V(72,2E,2E,5C), \
    V(24,1C,1C,38), V(F1,A6,A6,57), V(C7,B4,B4,73), V(51,C6,C6,97), \
    V(23,E8,E8,CB), V(7C,DD,DD,A1), V(9C,74,74,E8), V(21,1F,1F,3E), \
    V(DD,4B,4B,96), V(DC,BD,BD,61), V(86,8B,8B,0D), V(85,8A,8A,0F), \
    V(90,70,70,E0), V(42,3E,3E,7C), V(C4,B5,B5,71), V(AA,66,66,CC), \
    V(D8,48,48,90), V(05,03,03,06), V(01,F6,F6,F7), V(12,0E,0E,1C), \
    V(A3,61,61,C2), V(5F,35,35,6A), V(F9,57,57,AE), V(D0,B9,B9,69), \
    V(91,86,86,17), V(58,C1,C1,99), V(27,1D,1D,3A), V(B9,9E,9E,27), \
    V(38,E1,E1,D9), V(13,F8,F8,EB), V(B3,98,98,2B), V(33,11,11,22), \
    V(BB,69,69,D2), V(70,D9,D9,A9), V(89,8E,8E,07), V(A7,94,94,33), \
    V(B6,9B,9B,2D), V(22,1E,1E,3C), V(92,87,87,15), V(20,E9,E9,C9), \
    V(49,CE,CE,87), V(FF,55,55,AA), V(78,28,28,50), V(7A,DF,DF,A5), \
    V(8F,8C,8C,03), V(F8,A1,A1,59), V(80,89,89,09), V(17,0D,0D,1A), \
    V(DA,BF,BF,65), V(31,E6,E6,D7), V(C6,42,42,84), V(B8,68,68,D0), \
    V(C3,41,41,82), V(B0,99,99,29), V(77,2D,2D,5A), V(11,0F,0F,1E), \
    V(CB,B0,B0,7B), V(FC,54,54,A8), V(D6,BB,BB,6D), V(3A,16,16,2C)

#define V(a,b,c,d) 0x##a##b##c##d
static const uint32_t FT0[256] = { FT };
#undef V

#define V(a,b,c,d) 0x##b##c##d##a
static const uint32_t FT1[256] = { FT };
#undef V

#define V(a,b,c,d) 0x##c##d##a##b
static const uint32_t FT2[256] = { FT };
#undef V

#define V(a,b,c,d) 0x##d##a##b##c
static const uint32_t FT3[256] = { FT };
#undef V

#undef FT

/*
 * Reverse S-box
 */
static const unsigned char RSb[256] =
{
    0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38,
    0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
    0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
    0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
    0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D,
    0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
    0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2,
    0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
    0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
    0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
    0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA,
    0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
    0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A,
    0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
    0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
    0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
    0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA,
    0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
    0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85,
    0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
    0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
    0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
    0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20,
    0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
    0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31,
    0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
    0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
    0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
    0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0,
    0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
    0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26,
    0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D
};

/*
 * Reverse tables
 */
#define RT \
\
    V(50,A7,F4,51), V(53,65,41,7E), V(C3,A4,17,1A), V(96,5E,27,3A), \
    V(CB,6B,AB,3B), V(F1,45,9D,1F), V(AB,58,FA,AC), V(93,03,E3,4B), \
    V(55,FA,30,20), V(F6,6D,76,AD), V(91,76,CC,88), V(25,4C,02,F5), \
    V(FC,D7,E5,4F), V(D7,CB,2A,C5), V(80,44,35,26), V(8F,A3,62,B5), \
    V(49,5A,B1,DE), V(67,1B,BA,25), V(98,0E,EA,45), V(E1,C0,FE,5D), \
    V(02,75,2F,C3), V(12,F0,4C,81), V(A3,97,46,8D), V(C6,F9,D3,6B), \
    V(E7,5F,8F,03), V(95,9C,92,15), V(EB,7A,6D,BF), V(DA,59,52,95), \
    V(2D,83,BE,D4), V(D3,21,74,58), V(29,69,E0,49), V(44,C8,C9,8E), \
    V(6A,89,C2,75), V(78,79,8E,F4), V(6B,3E,58,99), V(DD,71,B9,27), \
    V(B6,4F,E1,BE), V(17,AD,88,F0), V(66,AC,20,C9), V(B4,3A,CE,7D), \
    V(18,4A,DF,63), V(82,31,1A,E5), V(60,33,51,97), V(45,7F,53,62), \
    V(E0,77,64,B1), V(84,AE,6B,BB), V(1C,A0,81,FE), V(94,2B,08,F9), \
    V(58,68,48,70), V(19,FD,45,8F), V(87,6C,DE,94), V(B7,F8,7B,52), \
    V(23,D3,73,AB), V(E2,02,4B,72), V(57,8F,1F,E3), V(2A,AB,55,66), \
    V(07,28,EB,B2), V(03,C2,B5,2F), V(9A,7B,C5,86), V(A5,08,37,D3), \
    V(F2,87,28,30), V(B2,A5,BF,23), V(BA,6A,03,02), V(5C,82,16,ED), \
    V(2B,1C,CF,8A), V(92,B4,79,A7), V(F0,F2,07,F3), V(A1,E2,69,4E), \
    V(CD,F4,DA,65), V(D5,BE,05,06), V(1F,62,34,D1), V(8A,FE,A6,C4), \
    V(9D,53,2E,34), V(A0,55,F3,A2), V(32,E1,8A,05), V(75,EB,F6,A4), \
    V(39,EC,83,0B), V(AA,EF,60,40), V(06,9F,71,5E), V(51,10,6E,BD), \
    V(F9,8A,21,3E), V(3D,06,DD,96), V(AE,05,3E,DD), V(46,BD,E6,4D), \
    V(B5,8D,54,91), V(05,5D,C4,71), V(6F,D4,06,04), V(FF,15,50,60), \
    V(24,FB,98,19), V(97,E9,BD,D6), V(CC,43,40,89), V(77,9E,D9,67), \
    V(BD,42,E8,B0), V(88,8B,89,07), V(38,5B,19,E7), V(DB,EE,C8,79), \
    V(47,0A,7C,A1), V(E9,0F,42,7C), V(C9,1E,84,F8), V(00,00,00,00), \
    V(83,86,80,09), V(48,ED,2B,32), V(AC,70,11,1E), V(4E,72,5A,6C), \
    V(FB,FF,0E,FD), V(56,38,85,0F), V(1E,D5,AE,3D), V(27,39,2D,36), \
    V(64,D9,0F,0A), V(21,A6,5C,68), V(D1,54,5B,9B), V(3A,2E,36,24), \
    V(B1,67,0A,0C), V(0F,E7,57,93), V(D2,96,EE,B4), V(9E,91,9B,1B), \
    V(4F,C5,C0,80), V(A2,20,DC,61), V(69,4B,77,5A), V(16,1A,12,1C), \
    V(0A,BA,93,E2), V(E5,2A,A0,C0), V(43,E0,22,3C), V(1D,17,1B,12), \
    V(0B,0D,09,0E), V(AD,C7,8B,F2), V(B9,A8,B6,2D), V(C8,A9,1E,14), \
    V(85,19,F1,57), V(4C,07,75,AF), V(BB,DD,99,EE), V(FD,60,7F,A3), \
    V(9F,26,01,F7), V(BC,F5,72,5C), V(C5,3B,66,44), V(34,7E,FB,5B), \
    V(76,29,43,8B), V(DC,C6,23,CB), V(68,FC,ED,B6), V(63,F1,E4,B8), \
    V(CA,DC,31,D7), V(10,85,63,42), V(40,22,97,13), V(20,11,C6,84), \
    V(7D,24,4A,85), V(F8,3D,BB,D2), V(11,32,F9,AE), V(6D,A1,29,C7), \
    V(4B,2F,9E,1D), V(F3,30,B2,DC), V(EC,52,86,0D), V(D0,E3,C1,77), \
    V(6C,16,B3,2B), V(99,B9,70,A9), V(FA,48,94,11), V(22,64,E9,47), \
    V(C4,8C,FC,A8), V(1A,3F,F0,A0), V(D8,2C,7D,56), V(EF,90,33,22), \
    V(C7,4E,49,87), V(C1,D1,38,D9), V(FE,A2,CA,8C), V(36,0B,D4,98), \
    V(CF,81,F5,A6), V(28,DE,7A,A5), V(26,8E,B7,DA), V(A4,BF,AD,3F), \
    V(E4,9D,3A,2C), V(0D,92,78,50), V(9B,CC,5F,6A), V(62,46,7E,54), \
    V(C2,13,8D,F6), V(E8,B8,D8,90), V(5E,F7,39,2E), V(F5,AF,C3,82), \
    V(BE,80,5D,9F), V(7C,93,D0,69), V(A9,2D,D5,6F), V(B3,12,25,CF), \
    V(3B,99,AC,C8), V(A7,7D,18,10), V(6E,63,9C,E8), V(7B,BB,3B,DB), \
    V(09,78,26,CD), V(F4,18,59,6E), V(01,B7,9A,EC), V(A8,9A,4F,83), \
    V(65,6E,95,E6), V(7E,E6,FF,AA), V(08,CF,BC,21), V(E6,E8,15,EF), \
    V(D9,9B,E7,BA), V(CE,36,6F,4A), V(D4,09,9F,EA), V(D6,7C,B0,29), \
    V(AF,B2,A4,31), V(31,23,3F,2A), V(30,94,A5,C6), V(C0,66,A2,35), \
    V(37,BC,4E,74), V(A6,CA,82,FC), V(B0,D0,90,E0), V(15,D8,A7,33), \
    V(4A,98,04,F1), V(F7,DA,EC,41), V(0E,50,CD,7F), V(2F,F6,91,17), \
    V(8D,D6,4D,76), V(4D,B0,EF,43), V(54,4D,AA,CC), V(DF,04,96,E4), \
    V(E3,B5,D1,9E), V(1B,88,6A,4C), V(B8,1F,2C,C1), V(7F,51,65,46), \
    V(04,EA,5E,9D), V(5D,35,8C,01), V(73,74,87,FA), V(2E,41,0B,FB), \
    V(5A,1D,67,B3), V(52,D2,DB,92), V(33,56,10,E9), V(13,47,D6,6D), \
    V(8C,61,D7,9A), V(7A,0C,A1,37), V(8E,14,F8,59), V(89,3C,13,EB), \
    V(EE,27,A9,CE), V(35,C9,61,B7), V(ED,E5,1C,E1), V(3C,B1,47,7A), \
    V(59,DF,D2,9C), V(3F,73,F2,55), V(79,CE,14,18), V(BF,37,C7,73), \
    V(EA,CD,F7,53), V(5B,AA,FD,5F), V(14,6F,3D,DF), V(86,DB,44,78), \
    V(81,F3,AF,CA), V(3E,C4,68,B9), V(2C,34,24,38), V(5F,40,A3,C2), \
    V(72,C3,1D,16), V(0C,25,E2,BC), V(8B,49,3C,28), V(41,95,0D,FF), \
    V(71,01,A8,39), V(DE,B3,0C,08), V(9C,E4,B4,D8), V(90,C1,56,64), \
    V(61,84,CB,7B), V(70,B6,32,D5), V(74,5C,6C,48), V(42,57,B8,D0)

#define V(a,b,c,d) 0x##a##b##c##d
static const uint32_t RT0[256] = { RT };
#undef V

#define V(a,b,c,d) 0x##b##c##d##a
static const uint32_t RT1[256] = { RT };
#undef V

#define V(a,b,c,d) 0x##c##d##a##b
static const uint32_t RT2[256] = { RT };
#undef V

#define V(a,b,c,d) 0x##d##a##b##c
static const uint32_t RT3[256] = { RT };
#undef V

#undef RT

/*
 * Round constants
 */
static const uint32_t RCON[10] =
{
    0x00000001, 0x00000002, 0x00000004, 0x00000008,
    0x00000010, 0x00000020, 0x00000040, 0x00000080,
    0x0000001B, 0x00000036
};

#else /* POLARSSL_AES_ROM_TABLES */

/*
 * Forward S-box & tables
 */
static unsigned char FSb[256];
static uint32_t FT0[256];
static uint32_t FT1[256];
static uint32_t FT2[256];
static uint32_t FT3[256];

/*
 * Reverse S-box & tables
 */
static unsigned char RSb[256];
static uint32_t RT0[256];
static uint32_t RT1[256];
static uint32_t RT2[256];
static uint32_t RT3[256];

/*
 * Round constants
 */
static uint32_t RCON[10];

/*
 * Tables generation code
 */
#define ROTL8(x) ( ( x << 8 ) & 0xFFFFFFFF ) | ( x >> 24 )
#define XTIME(x) ( ( x << 1 ) ^ ( ( x & 0x80 ) ? 0x1B : 0x00 ) )
#define MUL(x,y) ( ( x && y ) ? pow[(log[x]+log[y]) % 255] : 0 )

static int aes_init_done = 0;

static void aes_gen_tables( void )
{
    int i, x, y, z;
    int pow[256];
    int log[256];

    /*
     * compute pow and log tables over GF(2^8)
     */
    for( i = 0, x = 1; i < 256; i++ )
    {
        pow[i] = x;
        log[x] = i;
        x = ( x ^ XTIME( x ) ) & 0xFF;
    }

    /*
     * calculate the round constants
     */
    for( i = 0, x = 1; i < 10; i++ )
    {
        RCON[i] = (uint32_t) x;
        x = XTIME( x ) & 0xFF;
    }

    /*
     * generate the forward and reverse S-boxes
     */
    FSb[0x00] = 0x63;
    RSb[0x63] = 0x00;

    for( i = 1; i < 256; i++ )
    {
        x = pow[255 - log[i]];

        y  = x; y = ( ( y << 1 ) | ( y >> 7 ) ) & 0xFF;
        x ^= y; y = ( ( y << 1 ) | ( y >> 7 ) ) & 0xFF;
        x ^= y; y = ( ( y << 1 ) | ( y >> 7 ) ) & 0xFF;
        x ^= y; y = ( ( y << 1 ) | ( y >> 7 ) ) & 0xFF;
        x ^= y ^ 0x63;

        FSb[i] = (unsigned char) x;
        RSb[x] = (unsigned char) i;
    }

    /*
     * generate the forward and reverse tables
     */
    for( i = 0; i < 256; i++ )
    {
        x = FSb[i];
        y = XTIME( x ) & 0xFF;
        z =  ( y ^ x ) & 0xFF;

        FT0[i] = ( (uint32_t) y       ) ^
                 ( (uint32_t) x <<  8 ) ^
                 ( (uint32_t) x << 16 ) ^
                 ( (uint32_t) z << 24 );

        FT1[i] = ROTL8( FT0[i] );
        FT2[i] = ROTL8( FT1[i] );
        FT3[i] = ROTL8( FT2[i] );

        x = RSb[i];

        RT0[i] = ( (uint32_t) MUL( 0x0E, x )       ) ^
                 ( (uint32_t) MUL( 0x09, x ) <<  8 ) ^
                 ( (uint32_t) MUL( 0x0D, x ) << 16 ) ^
                 ( (uint32_t) MUL( 0x0B, x ) << 24 );

        RT1[i] = ROTL8( RT0[i] );
        RT2[i] = ROTL8( RT1[i] );
        RT3[i] = ROTL8( RT2[i] );
    }
}

#endif /* POLARSSL_AES_ROM_TABLES */

void aes_init( aes_context *ctx )
{
    memset( ctx, 0, sizeof( aes_context ) );
}

void aes_free( aes_context *ctx )
{
    if( ctx == NULL )
        return;

    polarssl_zeroize( ctx, sizeof( aes_context ) );
}

/*
 * AES key schedule (encryption)
 */
int aes_setkey_enc( aes_context *ctx, const unsigned char *key,
                    unsigned int keysize )
{
    unsigned int i;
    uint32_t *RK;

#if !defined(POLARSSL_AES_ROM_TABLES)
    if( aes_init_done == 0 )
    {
        aes_gen_tables();
        aes_init_done = 1;

    }
#endif

    switch( keysize )
    {
        case 128: ctx->nr = 10; break;
        case 192: ctx->nr = 12; break;
        case 256: ctx->nr = 14; break;
        default : return( POLARSSL_ERR_AES_INVALID_KEY_LENGTH );
    }

#if defined(POLARSSL_PADLOCK_C) && defined(PADLOCK_ALIGN16)
    if( aes_padlock_ace == -1 )
        aes_padlock_ace = padlock_supports( PADLOCK_ACE );

    if( aes_padlock_ace )
        ctx->rk = RK = PADLOCK_ALIGN16( ctx->buf );
    else
#endif
    ctx->rk = RK = ctx->buf;

#if defined(POLARSSL_AESNI_C) && defined(POLARSSL_HAVE_X86_64)
    if( aesni_supports( POLARSSL_AESNI_AES ) )
        return( aesni_setkey_enc( (unsigned char *) ctx->rk, key, keysize ) );
#endif

    for( i = 0; i < ( keysize >> 5 ); i++ )
    {
        GET_UINT32_LE( RK[i], key, i << 2 );
    }

    switch( ctx->nr )
    {
        case 10:

            for( i = 0; i < 10; i++, RK += 4 )
            {
                RK[4]  = RK[0] ^ RCON[i] ^
                ( (uint32_t) FSb[ ( RK[3] >>  8 ) & 0xFF ]       ) ^
                ( (uint32_t) FSb[ ( RK[3] >> 16 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) FSb[ ( RK[3] >> 24 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) FSb[ ( RK[3]       ) & 0xFF ] << 24 );

                RK[5]  = RK[1] ^ RK[4];
                RK[6]  = RK[2] ^ RK[5];
                RK[7]  = RK[3] ^ RK[6];
            }
            break;

        case 12:

            for( i = 0; i < 8; i++, RK += 6 )
            {
                RK[6]  = RK[0] ^ RCON[i] ^
                ( (uint32_t) FSb[ ( RK[5] >>  8 ) & 0xFF ]       ) ^
                ( (uint32_t) FSb[ ( RK[5] >> 16 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) FSb[ ( RK[5] >> 24 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) FSb[ ( RK[5]       ) & 0xFF ] << 24 );

                RK[7]  = RK[1] ^ RK[6];
                RK[8]  = RK[2] ^ RK[7];
                RK[9]  = RK[3] ^ RK[8];
                RK[10] = RK[4] ^ RK[9];
                RK[11] = RK[5] ^ RK[10];
            }
            break;

        case 14:

            for( i = 0; i < 7; i++, RK += 8 )
            {
                RK[8]  = RK[0] ^ RCON[i] ^
                ( (uint32_t) FSb[ ( RK[7] >>  8 ) & 0xFF ]       ) ^
                ( (uint32_t) FSb[ ( RK[7] >> 16 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) FSb[ ( RK[7] >> 24 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) FSb[ ( RK[7]       ) & 0xFF ] << 24 );

                RK[9]  = RK[1] ^ RK[8];
                RK[10] = RK[2] ^ RK[9];
                RK[11] = RK[3] ^ RK[10];

                RK[12] = RK[4] ^
                ( (uint32_t) FSb[ ( RK[11]       ) & 0xFF ]       ) ^
                ( (uint32_t) FSb[ ( RK[11] >>  8 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) FSb[ ( RK[11] >> 16 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) FSb[ ( RK[11] >> 24 ) & 0xFF ] << 24 );

                RK[13] = RK[5] ^ RK[12];
                RK[14] = RK[6] ^ RK[13];
                RK[15] = RK[7] ^ RK[14];
            }
            break;
    }

    return( 0 );
}

/*
 * AES key schedule (decryption)
 */
int aes_setkey_dec( aes_context *ctx, const unsigned char *key,
                    unsigned int keysize )
{
    int i, j, ret;
    aes_context cty;
    uint32_t *RK;
    uint32_t *SK;

    aes_init( &cty );

#if defined(POLARSSL_PADLOCK_C) && defined(PADLOCK_ALIGN16)
    if( aes_padlock_ace == -1 )
        aes_padlock_ace = padlock_supports( PADLOCK_ACE );

    if( aes_padlock_ace )
        ctx->rk = RK = PADLOCK_ALIGN16( ctx->buf );
    else
#endif
    ctx->rk = RK = ctx->buf;

    /* Also checks keysize */
    if( ( ret = aes_setkey_enc( &cty, key, keysize ) ) != 0 )
        goto exit;

    ctx->nr = cty.nr;

#if defined(POLARSSL_AESNI_C) && defined(POLARSSL_HAVE_X86_64)
    if( aesni_supports( POLARSSL_AESNI_AES ) )
    {
        aesni_inverse_key( (unsigned char *) ctx->rk,
                           (const unsigned char *) cty.rk, ctx->nr );
        goto exit;
    }
#endif

    SK = cty.rk + cty.nr * 4;

    *RK++ = *SK++;
    *RK++ = *SK++;
    *RK++ = *SK++;
    *RK++ = *SK++;

    for( i = ctx->nr - 1, SK -= 8; i > 0; i--, SK -= 8 )
    {
        for( j = 0; j < 4; j++, SK++ )
        {
            *RK++ = RT0[ FSb[ ( *SK       ) & 0xFF ] ] ^
                    RT1[ FSb[ ( *SK >>  8 ) & 0xFF ] ] ^
                    RT2[ FSb[ ( *SK >> 16 ) & 0xFF ] ] ^
                    RT3[ FSb[ ( *SK >> 24 ) & 0xFF ] ];
        }
    }

    *RK++ = *SK++;
    *RK++ = *SK++;
    *RK++ = *SK++;
    *RK++ = *SK++;

exit:
    aes_free( &cty );

    return( ret );
}

#define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3)     \
{                                               \
    X0 = *RK++ ^ FT0[ ( Y0       ) & 0xFF ] ^   \
                 FT1[ ( Y1 >>  8 ) & 0xFF ] ^   \
                 FT2[ ( Y2 >> 16 ) & 0xFF ] ^   \
                 FT3[ ( Y3 >> 24 ) & 0xFF ];    \
                                                \
    X1 = *RK++ ^ FT0[ ( Y1       ) & 0xFF ] ^   \
                 FT1[ ( Y2 >>  8 ) & 0xFF ] ^   \
                 FT2[ ( Y3 >> 16 ) & 0xFF ] ^   \
                 FT3[ ( Y0 >> 24 ) & 0xFF ];    \
                                                \
    X2 = *RK++ ^ FT0[ ( Y2       ) & 0xFF ] ^   \
                 FT1[ ( Y3 >>  8 ) & 0xFF ] ^   \
                 FT2[ ( Y0 >> 16 ) & 0xFF ] ^   \
                 FT3[ ( Y1 >> 24 ) & 0xFF ];    \
                                                \
    X3 = *RK++ ^ FT0[ ( Y3       ) & 0xFF ] ^   \
                 FT1[ ( Y0 >>  8 ) & 0xFF ] ^   \
                 FT2[ ( Y1 >> 16 ) & 0xFF ] ^   \
                 FT3[ ( Y2 >> 24 ) & 0xFF ];    \
}

#define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3)     \
{                                               \
    X0 = *RK++ ^ RT0[ ( Y0       ) & 0xFF ] ^   \
                 RT1[ ( Y3 >>  8 ) & 0xFF ] ^   \
                 RT2[ ( Y2 >> 16 ) & 0xFF ] ^   \
                 RT3[ ( Y1 >> 24 ) & 0xFF ];    \
                                                \
    X1 = *RK++ ^ RT0[ ( Y1       ) & 0xFF ] ^   \
                 RT1[ ( Y0 >>  8 ) & 0xFF ] ^   \
                 RT2[ ( Y3 >> 16 ) & 0xFF ] ^   \
                 RT3[ ( Y2 >> 24 ) & 0xFF ];    \
                                                \
    X2 = *RK++ ^ RT0[ ( Y2       ) & 0xFF ] ^   \
                 RT1[ ( Y1 >>  8 ) & 0xFF ] ^   \
                 RT2[ ( Y0 >> 16 ) & 0xFF ] ^   \
                 RT3[ ( Y3 >> 24 ) & 0xFF ];    \
                                                \
    X3 = *RK++ ^ RT0[ ( Y3       ) & 0xFF ] ^   \
                 RT1[ ( Y2 >>  8 ) & 0xFF ] ^   \
                 RT2[ ( Y1 >> 16 ) & 0xFF ] ^   \
                 RT3[ ( Y0 >> 24 ) & 0xFF ];    \
}

/*
 * AES-ECB block encryption/decryption
 */
int aes_crypt_ecb( aes_context *ctx,
                    int mode,
                    const unsigned char input[16],
                    unsigned char output[16] )
{
    int i;
    uint32_t *RK, X0, X1, X2, X3, Y0, Y1, Y2, Y3;

#if defined(POLARSSL_AESNI_C) && defined(POLARSSL_HAVE_X86_64)
    if( aesni_supports( POLARSSL_AESNI_AES ) )
        return( aesni_crypt_ecb( ctx, mode, input, output ) );
#endif

#if defined(POLARSSL_PADLOCK_C) && defined(POLARSSL_HAVE_X86)
    if( aes_padlock_ace )
    {
        if( padlock_xcryptecb( ctx, mode, input, output ) == 0 )
            return( 0 );

        // If padlock data misaligned, we just fall back to
        // unaccelerated mode
        //
    }
#endif

    RK = ctx->rk;

    GET_UINT32_LE( X0, input,  0 ); X0 ^= *RK++;
    GET_UINT32_LE( X1, input,  4 ); X1 ^= *RK++;
    GET_UINT32_LE( X2, input,  8 ); X2 ^= *RK++;
    GET_UINT32_LE( X3, input, 12 ); X3 ^= *RK++;

    if( mode == AES_DECRYPT )
    {
        for( i = ( ctx->nr >> 1 ) - 1; i > 0; i-- )
        {
            AES_RROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
            AES_RROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 );
        }

        AES_RROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 );

        X0 = *RK++ ^ \
                ( (uint32_t) RSb[ ( Y0       ) & 0xFF ]       ) ^
                ( (uint32_t) RSb[ ( Y3 >>  8 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) RSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) RSb[ ( Y1 >> 24 ) & 0xFF ] << 24 );

        X1 = *RK++ ^ \
                ( (uint32_t) RSb[ ( Y1       ) & 0xFF ]       ) ^
                ( (uint32_t) RSb[ ( Y0 >>  8 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) RSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) RSb[ ( Y2 >> 24 ) & 0xFF ] << 24 );

        X2 = *RK++ ^ \
                ( (uint32_t) RSb[ ( Y2       ) & 0xFF ]       ) ^
                ( (uint32_t) RSb[ ( Y1 >>  8 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) RSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) RSb[ ( Y3 >> 24 ) & 0xFF ] << 24 );

        X3 = *RK++ ^ \
                ( (uint32_t) RSb[ ( Y3       ) & 0xFF ]       ) ^
                ( (uint32_t) RSb[ ( Y2 >>  8 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) RSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) RSb[ ( Y0 >> 24 ) & 0xFF ] << 24 );
    }
    else /* AES_ENCRYPT */
    {
        for( i = ( ctx->nr >> 1 ) - 1; i > 0; i-- )
        {
            AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
            AES_FROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 );
        }

        AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 );

        X0 = *RK++ ^ \
                ( (uint32_t) FSb[ ( Y0       ) & 0xFF ]       ) ^
                ( (uint32_t) FSb[ ( Y1 >>  8 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) FSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) FSb[ ( Y3 >> 24 ) & 0xFF ] << 24 );

        X1 = *RK++ ^ \
                ( (uint32_t) FSb[ ( Y1       ) & 0xFF ]       ) ^
                ( (uint32_t) FSb[ ( Y2 >>  8 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) FSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) FSb[ ( Y0 >> 24 ) & 0xFF ] << 24 );

        X2 = *RK++ ^ \
                ( (uint32_t) FSb[ ( Y2       ) & 0xFF ]       ) ^
                ( (uint32_t) FSb[ ( Y3 >>  8 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) FSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) FSb[ ( Y1 >> 24 ) & 0xFF ] << 24 );

        X3 = *RK++ ^ \
                ( (uint32_t) FSb[ ( Y3       ) & 0xFF ]       ) ^
                ( (uint32_t) FSb[ ( Y0 >>  8 ) & 0xFF ] <<  8 ) ^
                ( (uint32_t) FSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^
                ( (uint32_t) FSb[ ( Y2 >> 24 ) & 0xFF ] << 24 );
    }

    PUT_UINT32_LE( X0, output,  0 );
    PUT_UINT32_LE( X1, output,  4 );
    PUT_UINT32_LE( X2, output,  8 );
    PUT_UINT32_LE( X3, output, 12 );

    return( 0 );
}

#if defined(POLARSSL_CIPHER_MODE_CBC)
/*
 * AES-CBC buffer encryption/decryption
 */
int aes_crypt_cbc( aes_context *ctx,
                    int mode,
                    size_t length,
                    unsigned char iv[16],
                    const unsigned char *input,
                    unsigned char *output )
{
    int i;
    unsigned char temp[16];

    if( length % 16 )
        return( POLARSSL_ERR_AES_INVALID_INPUT_LENGTH );

#if defined(POLARSSL_PADLOCK_C) && defined(POLARSSL_HAVE_X86)
    if( aes_padlock_ace )
    {
        if( padlock_xcryptcbc( ctx, mode, length, iv, input, output ) == 0 )
            return( 0 );

        // If padlock data misaligned, we just fall back to
        // unaccelerated mode
        //
    }
#endif

    if( mode == AES_DECRYPT )
    {
        while( length > 0 )
        {
            memcpy( temp, input, 16 );
            aes_crypt_ecb( ctx, mode, input, output );

            for( i = 0; i < 16; i++ )
                output[i] = (unsigned char)( output[i] ^ iv[i] );

            memcpy( iv, temp, 16 );

            input  += 16;
            output += 16;
            length -= 16;
        }
    }
    else
    {
        while( length > 0 )
        {
            for( i = 0; i < 16; i++ )
                output[i] = (unsigned char)( input[i] ^ iv[i] );

            aes_crypt_ecb( ctx, mode, output, output );
            memcpy( iv, output, 16 );

            input  += 16;
            output += 16;
            length -= 16;
        }
    }

    return( 0 );
}
#endif /* POLARSSL_CIPHER_MODE_CBC */

#if defined(POLARSSL_CIPHER_MODE_CFB)
/*
 * AES-CFB128 buffer encryption/decryption
 */
int aes_crypt_cfb128( aes_context *ctx,
                       int mode,
                       size_t length,
                       size_t *iv_off,
                       unsigned char iv[16],
                       const unsigned char *input,
                       unsigned char *output )
{
    int c;
    size_t n = *iv_off;

    if( mode == AES_DECRYPT )
    {
        while( length-- )
        {
            if( n == 0 )
                aes_crypt_ecb( ctx, AES_ENCRYPT, iv, iv );

            c = *input++;
            *output++ = (unsigned char)( c ^ iv[n] );
            iv[n] = (unsigned char) c;

            n = ( n + 1 ) & 0x0F;
        }
    }
    else
    {
        while( length-- )
        {
            if( n == 0 )
                aes_crypt_ecb( ctx, AES_ENCRYPT, iv, iv );

            iv[n] = *output++ = (unsigned char)( iv[n] ^ *input++ );

            n = ( n + 1 ) & 0x0F;
        }
    }

    *iv_off = n;

    return( 0 );
}

/*
 * AES-CFB8 buffer encryption/decryption
 */
#include <stdio.h>
int aes_crypt_cfb8( aes_context *ctx,
                       int mode,
                       size_t length,
                       unsigned char iv[16],
                       const unsigned char *input,
                       unsigned char *output )
{
    unsigned char c;
    unsigned char ov[17];

    while( length-- )
    {
        memcpy( ov, iv, 16 );
        aes_crypt_ecb( ctx, AES_ENCRYPT, iv, iv );

        if( mode == AES_DECRYPT )
            ov[16] = *input;

        c = *output++ = (unsigned char)( iv[0] ^ *input++ );

        if( mode == AES_ENCRYPT )
            ov[16] = c;

        memcpy( iv, ov + 1, 16 );
    }

    return( 0 );
}
#endif /*POLARSSL_CIPHER_MODE_CFB */

#if defined(POLARSSL_CIPHER_MODE_CTR)
/*
 * AES-CTR buffer encryption/decryption
 */
int aes_crypt_ctr( aes_context *ctx,
                       size_t length,
                       size_t *nc_off,
                       unsigned char nonce_counter[16],
                       unsigned char stream_block[16],
                       const unsigned char *input,
                       unsigned char *output )
{
    int c, i;
    size_t n = *nc_off;

    while( length-- )
    {
        if( n == 0 ) {
            aes_crypt_ecb( ctx, AES_ENCRYPT, nonce_counter, stream_block );

            for( i = 16; i > 0; i-- )
                if( ++nonce_counter[i - 1] != 0 )
                    break;
        }
        c = *input++;
        *output++ = (unsigned char)( c ^ stream_block[n] );

        n = ( n + 1 ) & 0x0F;
    }

    *nc_off = n;

    return( 0 );
}
#endif /* POLARSSL_CIPHER_MODE_CTR */

#endif /* !POLARSSL_AES_ALT */

#if defined(POLARSSL_SELF_TEST)

#include <stdio.h>

/*
 * AES test vectors from:
 *
 * http://csrc.nist.gov/archive/aes/rijndael/rijndael-vals.zip
 */
static const unsigned char aes_test_ecb_dec[3][16] =
{
    { 0x44, 0x41, 0x6A, 0xC2, 0xD1, 0xF5, 0x3C, 0x58,
      0x33, 0x03, 0x91, 0x7E, 0x6B, 0xE9, 0xEB, 0xE0 },
    { 0x48, 0xE3, 0x1E, 0x9E, 0x25, 0x67, 0x18, 0xF2,
      0x92, 0x29, 0x31, 0x9C, 0x19, 0xF1, 0x5B, 0xA4 },
    { 0x05, 0x8C, 0xCF, 0xFD, 0xBB, 0xCB, 0x38, 0x2D,
      0x1F, 0x6F, 0x56, 0x58, 0x5D, 0x8A, 0x4A, 0xDE }
};

static const unsigned char aes_test_ecb_enc[3][16] =
{
    { 0xC3, 0x4C, 0x05, 0x2C, 0xC0, 0xDA, 0x8D, 0x73,
      0x45, 0x1A, 0xFE, 0x5F, 0x03, 0xBE, 0x29, 0x7F },
    { 0xF3, 0xF6, 0x75, 0x2A, 0xE8, 0xD7, 0x83, 0x11,
      0x38, 0xF0, 0x41, 0x56, 0x06, 0x31, 0xB1, 0x14 },
    { 0x8B, 0x79, 0xEE, 0xCC, 0x93, 0xA0, 0xEE, 0x5D,
      0xFF, 0x30, 0xB4, 0xEA, 0x21, 0x63, 0x6D, 0xA4 }
};

#if defined(POLARSSL_CIPHER_MODE_CBC)
static const unsigned char aes_test_cbc_dec[3][16] =
{
    { 0xFA, 0xCA, 0x37, 0xE0, 0xB0, 0xC8, 0x53, 0x73,
      0xDF, 0x70, 0x6E, 0x73, 0xF7, 0xC9, 0xAF, 0x86 },
    { 0x5D, 0xF6, 0x78, 0xDD, 0x17, 0xBA, 0x4E, 0x75,
      0xB6, 0x17, 0x68, 0xC6, 0xAD, 0xEF, 0x7C, 0x7B },
    { 0x48, 0x04, 0xE1, 0x81, 0x8F, 0xE6, 0x29, 0x75,
      0x19, 0xA3, 0xE8, 0x8C, 0x57, 0x31, 0x04, 0x13 }
};

static const unsigned char aes_test_cbc_enc[3][16] =
{
    { 0x8A, 0x05, 0xFC, 0x5E, 0x09, 0x5A, 0xF4, 0x84,
      0x8A, 0x08, 0xD3, 0x28, 0xD3, 0x68, 0x8E, 0x3D },
    { 0x7B, 0xD9, 0x66, 0xD5, 0x3A, 0xD8, 0xC1, 0xBB,
      0x85, 0xD2, 0xAD, 0xFA, 0xE8, 0x7B, 0xB1, 0x04 },
    { 0xFE, 0x3C, 0x53, 0x65, 0x3E, 0x2F, 0x45, 0xB5,
      0x6F, 0xCD, 0x88, 0xB2, 0xCC, 0x89, 0x8F, 0xF0 }
};
#endif /* POLARSSL_CIPHER_MODE_CBC */

#if defined(POLARSSL_CIPHER_MODE_CFB)
/*
 * AES-CFB128 test vectors from:
 *
 * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
 */
static const unsigned char aes_test_cfb128_key[3][32] =
{
    { 0x2B, 0x7E, 0x15, 0x16, 0x28, 0xAE, 0xD2, 0xA6,
      0xAB, 0xF7, 0x15, 0x88, 0x09, 0xCF, 0x4F, 0x3C },
    { 0x8E, 0x73, 0xB0, 0xF7, 0xDA, 0x0E, 0x64, 0x52,
      0xC8, 0x10, 0xF3, 0x2B, 0x80, 0x90, 0x79, 0xE5,
      0x62, 0xF8, 0xEA, 0xD2, 0x52, 0x2C, 0x6B, 0x7B },
    { 0x60, 0x3D, 0xEB, 0x10, 0x15, 0xCA, 0x71, 0xBE,
      0x2B, 0x73, 0xAE, 0xF0, 0x85, 0x7D, 0x77, 0x81,
      0x1F, 0x35, 0x2C, 0x07, 0x3B, 0x61, 0x08, 0xD7,
      0x2D, 0x98, 0x10, 0xA3, 0x09, 0x14, 0xDF, 0xF4 }
};

static const unsigned char aes_test_cfb128_iv[16] =
{
    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
};

static const unsigned char aes_test_cfb128_pt[64] =
{
    0x6B, 0xC1, 0xBE, 0xE2, 0x2E, 0x40, 0x9F, 0x96,
    0xE9, 0x3D, 0x7E, 0x11, 0x73, 0x93, 0x17, 0x2A,
    0xAE, 0x2D, 0x8A, 0x57, 0x1E, 0x03, 0xAC, 0x9C,
    0x9E, 0xB7, 0x6F, 0xAC, 0x45, 0xAF, 0x8E, 0x51,
    0x30, 0xC8, 0x1C, 0x46, 0xA3, 0x5C, 0xE4, 0x11,
    0xE5, 0xFB, 0xC1, 0x19, 0x1A, 0x0A, 0x52, 0xEF,
    0xF6, 0x9F, 0x24, 0x45, 0xDF, 0x4F, 0x9B, 0x17,
    0xAD, 0x2B, 0x41, 0x7B, 0xE6, 0x6C, 0x37, 0x10
};

static const unsigned char aes_test_cfb128_ct[3][64] =
{
    { 0x3B, 0x3F, 0xD9, 0x2E, 0xB7, 0x2D, 0xAD, 0x20,
      0x33, 0x34, 0x49, 0xF8, 0xE8, 0x3C, 0xFB, 0x4A,
      0xC8, 0xA6, 0x45, 0x37, 0xA0, 0xB3, 0xA9, 0x3F,
      0xCD, 0xE3, 0xCD, 0xAD, 0x9F, 0x1C, 0xE5, 0x8B,
      0x26, 0x75, 0x1F, 0x67, 0xA3, 0xCB, 0xB1, 0x40,
      0xB1, 0x80, 0x8C, 0xF1, 0x87, 0xA4, 0xF4, 0xDF,
      0xC0, 0x4B, 0x05, 0x35, 0x7C, 0x5D, 0x1C, 0x0E,
      0xEA, 0xC4, 0xC6, 0x6F, 0x9F, 0xF7, 0xF2, 0xE6 },
    { 0xCD, 0xC8, 0x0D, 0x6F, 0xDD, 0xF1, 0x8C, 0xAB,
      0x34, 0xC2, 0x59, 0x09, 0xC9, 0x9A, 0x41, 0x74,
      0x67, 0xCE, 0x7F, 0x7F, 0x81, 0x17, 0x36, 0x21,
      0x96, 0x1A, 0x2B, 0x70, 0x17, 0x1D, 0x3D, 0x7A,
      0x2E, 0x1E, 0x8A, 0x1D, 0xD5, 0x9B, 0x88, 0xB1,
      0xC8, 0xE6, 0x0F, 0xED, 0x1E, 0xFA, 0xC4, 0xC9,
      0xC0, 0x5F, 0x9F, 0x9C, 0xA9, 0x83, 0x4F, 0xA0,
      0x42, 0xAE, 0x8F, 0xBA, 0x58, 0x4B, 0x09, 0xFF },
    { 0xDC, 0x7E, 0x84, 0xBF, 0xDA, 0x79, 0x16, 0x4B,
      0x7E, 0xCD, 0x84, 0x86, 0x98, 0x5D, 0x38, 0x60,
      0x39, 0xFF, 0xED, 0x14, 0x3B, 0x28, 0xB1, 0xC8,
      0x32, 0x11, 0x3C, 0x63, 0x31, 0xE5, 0x40, 0x7B,
      0xDF, 0x10, 0x13, 0x24, 0x15, 0xE5, 0x4B, 0x92,
      0xA1, 0x3E, 0xD0, 0xA8, 0x26, 0x7A, 0xE2, 0xF9,
      0x75, 0xA3, 0x85, 0x74, 0x1A, 0xB9, 0xCE, 0xF8,
      0x20, 0x31, 0x62, 0x3D, 0x55, 0xB1, 0xE4, 0x71 }
};
#endif /* POLARSSL_CIPHER_MODE_CFB */

#if defined(POLARSSL_CIPHER_MODE_CTR)
/*
 * AES-CTR test vectors from:
 *
 * http://www.faqs.org/rfcs/rfc3686.html
 */

static const unsigned char aes_test_ctr_key[3][16] =
{
    { 0xAE, 0x68, 0x52, 0xF8, 0x12, 0x10, 0x67, 0xCC,
      0x4B, 0xF7, 0xA5, 0x76, 0x55, 0x77, 0xF3, 0x9E },
    { 0x7E, 0x24, 0x06, 0x78, 0x17, 0xFA, 0xE0, 0xD7,
      0x43, 0xD6, 0xCE, 0x1F, 0x32, 0x53, 0x91, 0x63 },
    { 0x76, 0x91, 0xBE, 0x03, 0x5E, 0x50, 0x20, 0xA8,
      0xAC, 0x6E, 0x61, 0x85, 0x29, 0xF9, 0xA0, 0xDC }
};

static const unsigned char aes_test_ctr_nonce_counter[3][16] =
{
    { 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00,
      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 },
    { 0x00, 0x6C, 0xB6, 0xDB, 0xC0, 0x54, 0x3B, 0x59,
      0xDA, 0x48, 0xD9, 0x0B, 0x00, 0x00, 0x00, 0x01 },
    { 0x00, 0xE0, 0x01, 0x7B, 0x27, 0x77, 0x7F, 0x3F,
      0x4A, 0x17, 0x86, 0xF0, 0x00, 0x00, 0x00, 0x01 }
};

static const unsigned char aes_test_ctr_pt[3][48] =
{
    { 0x53, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x62,
      0x6C, 0x6F, 0x63, 0x6B, 0x20, 0x6D, 0x73, 0x67 },

    { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
      0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
      0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F },

    { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
      0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
      0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
      0x20, 0x21, 0x22, 0x23 }
};

static const unsigned char aes_test_ctr_ct[3][48] =
{
    { 0xE4, 0x09, 0x5D, 0x4F, 0xB7, 0xA7, 0xB3, 0x79,
      0x2D, 0x61, 0x75, 0xA3, 0x26, 0x13, 0x11, 0xB8 },
    { 0x51, 0x04, 0xA1, 0x06, 0x16, 0x8A, 0x72, 0xD9,
      0x79, 0x0D, 0x41, 0xEE, 0x8E, 0xDA, 0xD3, 0x88,
      0xEB, 0x2E, 0x1E, 0xFC, 0x46, 0xDA, 0x57, 0xC8,
      0xFC, 0xE6, 0x30, 0xDF, 0x91, 0x41, 0xBE, 0x28 },
    { 0xC1, 0xCF, 0x48, 0xA8, 0x9F, 0x2F, 0xFD, 0xD9,
      0xCF, 0x46, 0x52, 0xE9, 0xEF, 0xDB, 0x72, 0xD7,
      0x45, 0x40, 0xA4, 0x2B, 0xDE, 0x6D, 0x78, 0x36,
      0xD5, 0x9A, 0x5C, 0xEA, 0xAE, 0xF3, 0x10, 0x53,
      0x25, 0xB2, 0x07, 0x2F }
};

static const int aes_test_ctr_len[3] =
    { 16, 32, 36 };
#endif /* POLARSSL_CIPHER_MODE_CTR */

/*
 * Checkup routine
 */
int aes_self_test( int verbose )
{
    int ret = 0, i, j, u, v;
    unsigned char key[32];
    unsigned char buf[64];
    unsigned char iv[16];
#if defined(POLARSSL_CIPHER_MODE_CBC)
    unsigned char prv[16];
#endif
#if defined(POLARSSL_CIPHER_MODE_CTR) || defined(POLARSSL_CIPHER_MODE_CFB)
    size_t offset;
#endif
#if defined(POLARSSL_CIPHER_MODE_CTR)
    int len;
    unsigned char nonce_counter[16];
    unsigned char stream_block[16];
#endif
    aes_context ctx;

    memset( key, 0, 32 );
    aes_init( &ctx );

    /*
     * ECB mode
     */
    for( i = 0; i < 6; i++ )
    {
        u = i >> 1;
        v = i  & 1;

        if( verbose != 0 )
            polarssl_printf( "  AES-ECB-%3d (%s): ", 128 + u * 64,
                             ( v == AES_DECRYPT ) ? "dec" : "enc" );

        memset( buf, 0, 16 );


        if( v == AES_DECRYPT )
        {
            aes_setkey_dec( &ctx, key, 128 + u * 64 );

            for( j = 0; j < 10000; j++ )
                aes_crypt_ecb( &ctx, v, buf, buf );

            if( memcmp( buf, aes_test_ecb_dec[u], 16 ) != 0 )
            {
                if( verbose != 0 )
                    polarssl_printf( "failed\n" );

                ret = 1;
                goto exit;
            }
        }
        else
        {
            aes_setkey_enc( &ctx, key, 128 + u * 64 );

            for( j = 0; j < 10000; j++ )
                aes_crypt_ecb( &ctx, v, buf, buf );

            if( memcmp( buf, aes_test_ecb_enc[u], 16 ) != 0 )
            {
                if( verbose != 0 )
                    polarssl_printf( "failed\n" );

                ret = 1;
                goto exit;
            }
        }

        if( verbose != 0 )
            polarssl_printf( "passed\n" );
    }

    if( verbose != 0 )
        polarssl_printf( "\n" );

#if defined(POLARSSL_CIPHER_MODE_CBC)
    /*
     * CBC mode
     */
    for( i = 0; i < 6; i++ )
    {
        u = i >> 1;
        v = i  & 1;

        if( verbose != 0 )
            polarssl_printf( "  AES-CBC-%3d (%s): ", 128 + u * 64,
                             ( v == AES_DECRYPT ) ? "dec" : "enc" );

        memset( iv , 0, 16 );
        memset( prv, 0, 16 );
        memset( buf, 0, 16 );

        if( v == AES_DECRYPT )
        {
            aes_setkey_dec( &ctx, key, 128 + u * 64 );

            for( j = 0; j < 10000; j++ )
                aes_crypt_cbc( &ctx, v, 16, iv, buf, buf );

            if( memcmp( buf, aes_test_cbc_dec[u], 16 ) != 0 )
            {
                if( verbose != 0 )
                    polarssl_printf( "failed\n" );

                ret = 1;
                goto exit;
            }
        }
        else
        {
            aes_setkey_enc( &ctx, key, 128 + u * 64 );

            for( j = 0; j < 10000; j++ )
            {
                unsigned char tmp[16];

                aes_crypt_cbc( &ctx, v, 16, iv, buf, buf );

                memcpy( tmp, prv, 16 );
                memcpy( prv, buf, 16 );
                memcpy( buf, tmp, 16 );
            }

            if( memcmp( prv, aes_test_cbc_enc[u], 16 ) != 0 )
            {
                if( verbose != 0 )
                    polarssl_printf( "failed\n" );

                ret = 1;
                goto exit;
            }
        }

        if( verbose != 0 )
            polarssl_printf( "passed\n" );
    }

    if( verbose != 0 )
        polarssl_printf( "\n" );
#endif /* POLARSSL_CIPHER_MODE_CBC */

#if defined(POLARSSL_CIPHER_MODE_CFB)
    /*
     * CFB128 mode
     */
    for( i = 0; i < 6; i++ )
    {
        u = i >> 1;
        v = i  & 1;

        if( verbose != 0 )
            polarssl_printf( "  AES-CFB128-%3d (%s): ", 128 + u * 64,
                             ( v == AES_DECRYPT ) ? "dec" : "enc" );

        memcpy( iv,  aes_test_cfb128_iv, 16 );
        memcpy( key, aes_test_cfb128_key[u], 16 + u * 8 );

        offset = 0;
        aes_setkey_enc( &ctx, key, 128 + u * 64 );

        if( v == AES_DECRYPT )
        {
            memcpy( buf, aes_test_cfb128_ct[u], 64 );
            aes_crypt_cfb128( &ctx, v, 64, &offset, iv, buf, buf );

            if( memcmp( buf, aes_test_cfb128_pt, 64 ) != 0 )
            {
                if( verbose != 0 )
                    polarssl_printf( "failed\n" );

                ret = 1;
                goto exit;
            }
        }
        else
        {
            memcpy( buf, aes_test_cfb128_pt, 64 );
            aes_crypt_cfb128( &ctx, v, 64, &offset, iv, buf, buf );

            if( memcmp( buf, aes_test_cfb128_ct[u], 64 ) != 0 )
            {
                if( verbose != 0 )
                    polarssl_printf( "failed\n" );

                ret = 1;
                goto exit;
            }
        }

        if( verbose != 0 )
            polarssl_printf( "passed\n" );
    }

    if( verbose != 0 )
        polarssl_printf( "\n" );
#endif /* POLARSSL_CIPHER_MODE_CFB */

#if defined(POLARSSL_CIPHER_MODE_CTR)
    /*
     * CTR mode
     */
    for( i = 0; i < 6; i++ )
    {
        u = i >> 1;
        v = i  & 1;

        if( verbose != 0 )
            polarssl_printf( "  AES-CTR-128 (%s): ",
                             ( v == AES_DECRYPT ) ? "dec" : "enc" );

        memcpy( nonce_counter, aes_test_ctr_nonce_counter[u], 16 );
        memcpy( key, aes_test_ctr_key[u], 16 );

        offset = 0;
        aes_setkey_enc( &ctx, key, 128 );

        if( v == AES_DECRYPT )
        {
            len = aes_test_ctr_len[u];
            memcpy( buf, aes_test_ctr_ct[u], len );

            aes_crypt_ctr( &ctx, len, &offset, nonce_counter, stream_block,
                           buf, buf );

            if( memcmp( buf, aes_test_ctr_pt[u], len ) != 0 )
            {
                if( verbose != 0 )
                    polarssl_printf( "failed\n" );

                ret = 1;
                goto exit;
            }
        }
        else
        {
            len = aes_test_ctr_len[u];
            memcpy( buf, aes_test_ctr_pt[u], len );

            aes_crypt_ctr( &ctx, len, &offset, nonce_counter, stream_block,
                           buf, buf );

            if( memcmp( buf, aes_test_ctr_ct[u], len ) != 0 )
            {
                if( verbose != 0 )
                    polarssl_printf( "failed\n" );

                ret = 1;
                goto exit;
            }
        }

        if( verbose != 0 )
            polarssl_printf( "passed\n" );
    }

    if( verbose != 0 )
        polarssl_printf( "\n" );
#endif /* POLARSSL_CIPHER_MODE_CTR */

    ret = 0;

exit:
    aes_free( &ctx );

    return( ret );
}

#endif /* POLARSSL_SELF_TEST */

//#endif /* POLARSSL_AES_C */


================================================
FILE: lib/aes_faster_c/aes.h
================================================
/**
 * \file aes.h
 *
 * \brief AES block cipher
 *
 *  Copyright (C) 2006-2014, Brainspark B.V.
 *
 *  This file is part of PolarSSL (http://www.polarssl.org)
 *  Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org>
 *
 *  All rights reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
#ifndef POLARSSL_AES_H
#define POLARSSL_AES_H
/*
#if !defined(POLARSSL_CONFIG_FILE)
#include "config.h"
#else
#include POLARSSL_CONFIG_FILE
#endif
*/

////////modification begin
#define POLARSSL_AES_ROM_TABLES
#define POLARSSL_CIPHER_MODE_CBC
#define POLARSSL_CIPHER_MODE_CFB
//#define POLARSSL_SELF_TEST
#define polarssl_printf printf
///////add end


#include <string.h>

#if defined(_MSC_VER) && !defined(EFIX64) && !defined(EFI32)
#include <basetsd.h>
typedef UINT32 uint32_t;
#else
#include <inttypes.h>
#endif

/* padlock.c and aesni.c rely on these values! */
#define AES_ENCRYPT     1
#define AES_DECRYPT     0

#define POLARSSL_ERR_AES_INVALID_KEY_LENGTH                -0x0020  /**< Invalid key length. */
#define POLARSSL_ERR_AES_INVALID_INPUT_LENGTH              -0x0022  /**< Invalid data input length. */

#if !defined(POLARSSL_AES_ALT)
// Regular implementation
//

#ifdef __cplusplus
extern "C" {
#endif

/**
 * \brief          AES context structure
 *
 * \note           buf is able to hold 32 extra bytes, which can be used:
 *                 - for alignment purposes if VIA padlock is used, and/or
 *                 - to simplify key expansion in the 256-bit case by
 *                 generating an extra round key
 */
typedef struct
{
    int nr;                     /*!<  number of rounds  */
    uint32_t *rk;               /*!<  AES round keys    */
    uint32_t buf[68];           /*!<  unaligned data    */
}
aes_context;

/**
 * \brief          Initialize AES context
 *
 * \param ctx      AES context to be initialized
 */
void aes_init( aes_context *ctx );

/**
 * \brief          Clear AES context
 *
 * \param ctx      AES context to be cleared
 */
void aes_free( aes_context *ctx );

/**
 * \brief          AES key schedule (encryption)
 *
 * \param ctx      AES context to be initialized
 * \param key      encryption key
 * \param keysize  must be 128, 192 or 256
 *
 * \return         0 if successful, or POLARSSL_ERR_AES_INVALID_KEY_LENGTH
 */
int aes_setkey_enc( aes_context *ctx, const unsigned char *key,
                    unsigned int keysize );

/**
 * \brief          AES key schedule (decryption)
 *
 * \param ctx      AES context to be initialized
 * \param key      decryption key
 * \param keysize  must be 128, 192 or 256
 *
 * \return         0 if successful, or POLARSSL_ERR_AES_INVALID_KEY_LENGTH
 */
int aes_setkey_dec( aes_context *ctx, const unsigned char *key,
                    unsigned int keysize );

/**
 * \brief          AES-ECB block encryption/decryption
 *
 * \param ctx      AES context
 * \param mode     AES_ENCRYPT or AES_DECRYPT
 * \param input    16-byte input block
 * \param output   16-byte output block
 *
 * \return         0 if successful
 */
int aes_crypt_ecb( aes_context *ctx,
                    int mode,
                    const unsigned char input[16],
                    unsigned char output[16] );

#if defined(POLARSSL_CIPHER_MODE_CBC)
/**
 * \brief          AES-CBC buffer encryption/decryption
 *                 Length should be a multiple of the block
 *                 size (16 bytes)
 *
 * \param ctx      AES context
 * \param mode     AES_ENCRYPT or AES_DECRYPT
 * \param length   length of the input data
 * \param iv       initialization vector (updated after use)
 * \param input    buffer holding the input data
 * \param output   buffer holding the output data
 *
 * \return         0 if successful, or POLARSSL_ERR_AES_INVALID_INPUT_LENGTH
 */
int aes_crypt_cbc( aes_context *ctx,
                    int mode,
                    size_t length,
                    unsigned char iv[16],
                    const unsigned char *input,
                    unsigned char *output );
#endif /* POLARSSL_CIPHER_MODE_CBC */

#if defined(POLARSSL_CIPHER_MODE_CFB)
/**
 * \brief          AES-CFB128 buffer encryption/decryption.
 *
 * Note: Due to the nature of CFB you should use the same key schedule for
 * both encryption and decryption. So a context initialized with
 * aes_setkey_enc() for both AES_ENCRYPT and AES_DECRYPT.
 *
 * \param ctx      AES context
 * \param mode     AES_ENCRYPT or AES_DECRYPT
 * \param length   length of the input data
 * \param iv_off   offset in IV (updated after use)
 * \param iv       initialization vector (updated after use)
 * \param input    buffer holding the input data
 * \param output   buffer holding the output data
 *
 * \return         0 if successful
 */
int aes_crypt_cfb128( aes_context *ctx,
                       int mode,
                       size_t length,
                       size_t *iv_off,
                       unsigned char iv[16],
                       const unsigned char *input,
                       unsigned char *output );

/**
 * \brief          AES-CFB8 buffer encryption/decryption.
 *
 * Note: Due to the nature of CFB you should use the same key schedule for
 * both encryption and decryption. So a context initialized with
 * aes_setkey_enc() for both AES_ENCRYPT and AES_DECRYPT.
 *
 * \param ctx      AES context
 * \param mode     AES_ENCRYPT or AES_DECRYPT
 * \param length   length of the input data
 * \param iv       initialization vector (updated after use)
 * \param input    buffer holding the input data
 * \param output   buffer holding the output data
 *
 * \return         0 if successful
 */
int aes_crypt_cfb8( aes_context *ctx,
                    int mode,
                    size_t length,
                    unsigned char iv[16],
                    const unsigned char *input,
                    unsigned char *output );
#endif /*POLARSSL_CIPHER_MODE_CFB */

#if defined(POLARSSL_CIPHER_MODE_CTR)
/**
 * \brief               AES-CTR buffer encryption/decryption
 *
 * Warning: You have to keep the maximum use of your counter in mind!
 *
 * Note: Due to the nature of CTR you should use the same key schedule for
 * both encryption and decryption. So a context initialized with
 * aes_setkey_enc() for both AES_ENCRYPT and AES_DECRYPT.
 *
 * \param ctx           AES context
 * \param length        The length of the data
 * \param nc_off        The offset in the current stream_block (for resuming
 *                      within current cipher stream). The offset pointer to
 *                      should be 0 at the start of a stream.
 * \param nonce_counter The 128-bit nonce and counter.
 * \param stream_block  The saved stream-block for resuming. Is overwritten
 *                      by the function.
 * \param input         The input data stream
 * \param output        The output data stream
 *
 * \return         0 if successful
 */
int aes_crypt_ctr( aes_context *ctx,
                       size_t length,
                       size_t *nc_off,
                       unsigned char nonce_counter[16],
                       unsigned char stream_block[16],
                       const unsigned char *input,
                       unsigned char *output );
#endif /* POLARSSL_CIPHER_MODE_CTR */

#ifdef __cplusplus
}
#endif

#else  /* POLARSSL_AES_ALT */
#include "aes_alt.h"
#endif /* POLARSSL_AES_ALT */

#ifdef __cplusplus
extern "C" {
#endif

/**
 * \brief          Checkup routine
 *
 * \return         0 if successful, or 1 if the test failed
 */
int aes_self_test( int verbose );

#ifdef __cplusplus
}
#endif

#endif /* aes.h */


================================================
FILE: lib/aes_faster_c/wrapper.cpp
================================================
#include "aes.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

#if defined(AES256) && (AES256 == 1)
#define AES_KEYSIZE 256
#elif defined(AES192) && (AES192 == 1)
#define AES_KEYSIZE 192
#else
#define AES_KEYSIZE 128
#endif


void AES_ECB_encrypt_buffer(const uint8_t* input, const uint8_t* key, uint8_t *output)
{
	static aes_context ctx;
	if(key!=0)
	{
		aes_init( &ctx);
		aes_setkey_enc(&ctx,key,AES_KEYSIZE);
	}
	int ret=aes_crypt_ecb( &ctx, AES_ENCRYPT, (const unsigned char*)input,(unsigned char*) output );
	assert(ret==0);
	return ;
}
void AES_ECB_decrypt_buffer(const uint8_t* input, const uint8_t* key, uint8_t *output)
{
	static aes_context ctx;
	if(key!=0)
	{
		aes_init( &ctx);
		aes_setkey_dec(&ctx,key,AES_KEYSIZE);
	}
	int ret=aes_crypt_ecb( &ctx, AES_DECRYPT, (const unsigned char*)input,(unsigned char*) output );
	assert(ret==0);
    return ;
}

void AES_CBC_encrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv)
{
	static aes_context ctx;

	char tmp_iv[16];
	if(key!=0)
	{
		aes_init( &ctx);
		aes_setkey_enc(&ctx,key,AES_KEYSIZE);
	}
	memcpy(tmp_iv,iv,16);
	int ret=aes_crypt_cbc( &ctx, AES_ENCRYPT, length, (unsigned char* )tmp_iv, (const unsigned char*)input,(unsigned char*) output );
	assert(ret==0);
	return ;
}
void AES_CBC_decrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv)
{
	static aes_context ctx;

	char tmp_iv[16];
	if(key!=0)
	{
		aes_init( &ctx);
		aes_setkey_dec(&ctx,key,AES_KEYSIZE);
	}
	memcpy(tmp_iv,iv,16);
	int ret=aes_crypt_cbc( &ctx,AES_DECRYPT, length, (unsigned char*)tmp_iv, (const unsigned char*)input, (unsigned char*) output );
	assert(ret==0);
}

void AES_CFB_encrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv)
{
	static aes_context ctx;

	char tmp_iv[16];
	if(key!=0)
	{
		aes_init( &ctx);
		aes_setkey_enc(&ctx,key,AES_KEYSIZE);
	}
	memcpy(tmp_iv,iv,16);
	size_t offset=0;
	int ret=aes_crypt_cfb128( &ctx, AES_ENCRYPT, length,&offset, (unsigned char* )tmp_iv, (const unsigned char*)input,(unsigned char*) output );
	assert(ret==0);
	return ;
}
void AES_CFB_decrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv)
{
	static aes_context ctx;

	char tmp_iv[16];
	if(key!=0)
	{
		aes_init( &ctx);
		aes_setkey_enc(&ctx,key,AES_KEYSIZE);// its aes_setkey_enc again, no typo
	}
	memcpy(tmp_iv,iv,16);
	size_t offset=0;
	int ret=aes_crypt_cfb128( &ctx,AES_DECRYPT, length,&offset, (unsigned char*)tmp_iv, (const unsigned char*)input, (unsigned char*) output );
	assert(ret==0);
	return;
}


================================================
FILE: lib/md5.cpp
================================================
/*
 * This file is adapted from PolarSSL 1.3.19 (GPL)
 */

/*
 *  RFC 1321 compliant MD5 implementation
 *
 *  Copyright (C) 2006-2014, ARM Limited, All Rights Reserved
 *
 *  This file is part of mbed TLS (https://tls.mbed.org)
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
/*
 *  The MD5 algorithm was designed by Ron Rivest in 1991.
 *
 *  http://www.ietf.org/rfc/rfc1321.txt
 */

#include <string.h>
#include <stddef.h>
#include <stdint.h>

typedef struct
{
    uint32_t total[2];          /*!< number of bytes processed  */
    uint32_t state[4];          /*!< intermediate digest state  */
    unsigned char buffer[64];   /*!< data block being processed */

    unsigned char ipad[64];     /*!< HMAC: inner padding        */
    unsigned char opad[64];     /*!< HMAC: outer padding        */
}
md5_context;

/* Implementation that should never be optimized out by the compiler */
static void polarssl_zeroize( void *v, size_t n ) {
    volatile unsigned char *p = (unsigned char *) v; while( n-- ) *p++ = 0;
}

/*
 * 32-bit integer manipulation macros (little endian)
 */
#ifndef GET_UINT32_LE
#define GET_UINT32_LE(n,b,i)                            \
{                                                       \
    (n) = ( (uint32_t) (b)[(i)    ]       )             \
        | ( (uint32_t) (b)[(i) + 1] <<  8 )             \
        | ( (uint32_t) (b)[(i) + 2] << 16 )             \
        | ( (uint32_t) (b)[(i) + 3] << 24 );            \
}
#endif

#ifndef PUT_UINT32_LE
#define PUT_UINT32_LE(n,b,i)                                    \
{                                                               \
    (b)[(i)    ] = (unsigned char) ( ( (n)       ) & 0xFF );    \
    (b)[(i) + 1] = (unsigned char) ( ( (n) >>  8 ) & 0xFF );    \
    (b)[(i) + 2] = (unsigned char) ( ( (n) >> 16 ) & 0xFF );    \
    (b)[(i) + 3] = (unsigned char) ( ( (n) >> 24 ) & 0xFF );    \
}
#endif

void md5_init( md5_context *ctx )
{
    memset( ctx, 0, sizeof( md5_context ) );
}

void md5_free( md5_context *ctx )
{
    if( ctx == NULL )
        return;

    polarssl_zeroize( ctx, sizeof( md5_context ) );
}

/*
 * MD5 context setup
 */
void md5_starts( md5_context *ctx )
{
    ctx->total[0] = 0;
    ctx->total[1] = 0;

    ctx->state[0] = 0x67452301;
    ctx->state[1] = 0xEFCDAB89;
    ctx->state[2] = 0x98BADCFE;
    ctx->state[3] = 0x10325476;
}

void md5_process( md5_context *ctx, const unsigned char data[64] )
{
    uint32_t X[16], A, B, C, D;

    GET_UINT32_LE( X[ 0], data,  0 );
    GET_UINT32_LE( X[ 1], data,  4 );
    GET_UINT32_LE( X[ 2], data,  8 );
    GET_UINT32_LE( X[ 3], data, 12 );
    GET_UINT32_LE( X[ 4], data, 16 );
    GET_UINT32_LE( X[ 5], data, 20 );
    GET_UINT32_LE( X[ 6], data, 24 );
    GET_UINT32_LE( X[ 7], data, 28 );
    GET_UINT32_LE( X[ 8], data, 32 );
    GET_UINT32_LE( X[ 9], data, 36 );
    GET_UINT32_LE( X[10], data, 40 );
    GET_UINT32_LE( X[11], data, 44 );
    GET_UINT32_LE( X[12], data, 48 );
    GET_UINT32_LE( X[13], data, 52 );
    GET_UINT32_LE( X[14], data, 56 );
    GET_UINT32_LE( X[15], data, 60 );

#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))

#define P(a,b,c,d,k,s,t)                                \
{                                                       \
    a += F(b,c,d) + X[k] + t; a = S(a,s) + b;           \
}

    A = ctx->state[0];
    B = ctx->state[1];
    C = ctx->state[2];
    D = ctx->state[3];

#define F(x,y,z) (z ^ (x & (y ^ z)))

    P( A, B, C, D,  0,  7, 0xD76AA478 );
    P( D, A, B, C,  1, 12, 0xE8C7B756 );
    P( C, D, A, B,  2, 17, 0x242070DB );
    P( B, C, D, A,  3, 22, 0xC1BDCEEE );
    P( A, B, C, D,  4,  7, 0xF57C0FAF );
    P( D, A, B, C,  5, 12, 0x4787C62A );
    P( C, D, A, B,  6, 17, 0xA8304613 );
    P( B, C, D, A,  7, 22, 0xFD469501 );
    P( A, B, C, D,  8,  7, 0x698098D8 );
    P( D, A, B, C,  9, 12, 0x8B44F7AF );
    P( C, D, A, B, 10, 17, 0xFFFF5BB1 );
    P( B, C, D, A, 11, 22, 0x895CD7BE );
    P( A, B, C, D, 12,  7, 0x6B901122 );
    P( D, A, B, C, 13, 12, 0xFD987193 );
    P( C, D, A, B, 14, 17, 0xA679438E );
    P( B, C, D, A, 15, 22, 0x49B40821 );

#undef F

#define F(x,y,z) (y ^ (z & (x ^ y)))

    P( A, B, C, D,  1,  5, 0xF61E2562 );
    P( D, A, B, C,  6,  9, 0xC040B340 );
    P( C, D, A, B, 11, 14, 0x265E5A51 );
    P( B, C, D, A,  0, 20, 0xE9B6C7AA );
    P( A, B, C, D,  5,  5, 0xD62F105D );
    P( D, A, B, C, 10,  9, 0x02441453 );
    P( C, D, A, B, 15, 14, 0xD8A1E681 );
    P( B, C, D, A,  4, 20, 0xE7D3FBC8 );
    P( A, B, C, D,  9,  5, 0x21E1CDE6 );
    P( D, A, B, C, 14,  9, 0xC33707D6 );
    P( C, D, A, B,  3, 14, 0xF4D50D87 );
    P( B, C, D, A,  8, 20, 0x455A14ED );
    P( A, B, C, D, 13,  5, 0xA9E3E905 );
    P( D, A, B, C,  2,  9, 0xFCEFA3F8 );
    P( C, D, A, B,  7, 14, 0x676F02D9 );
    P( B, C, D, A, 12, 20, 0x8D2A4C8A );

#undef F

#define F(x,y,z) (x ^ y ^ z)

    P( A, B, C, D,  5,  4, 0xFFFA3942 );
    P( D, A, B, C,  8, 11, 0x8771F681 );
    P( C, D, A, B, 11, 16, 0x6D9D6122 );
    P( B, C, D, A, 14, 23, 0xFDE5380C );
    P( A, B, C, D,  1,  4, 0xA4BEEA44 );
    P( D, A, B, C,  4, 11, 0x4BDECFA9 );
    P( C, D, A, B,  7, 16, 0xF6BB4B60 );
    P( B, C, D, A, 10, 23, 0xBEBFBC70 );
    P( A, B, C, D, 13,  4, 0x289B7EC6 );
    P( D, A, B, C,  0, 11, 0xEAA127FA );
    P( C, D, A, B,  3, 16, 0xD4EF3085 );
    P( B, C, D, A,  6, 23, 0x04881D05 );
    P( A, B, C, D,  9,  4, 0xD9D4D039 );
    P( D, A, B, C, 12, 11, 0xE6DB99E5 );
    P( C, D, A, B, 15, 16, 0x1FA27CF8 );
    P( B, C, D, A,  2, 23, 0xC4AC5665 );

#undef F

#define F(x,y,z) (y ^ (x | ~z))

    P( A, B, C, D,  0,  6, 0xF4292244 );
    P( D, A, B, C,  7, 10, 0x432AFF97 );
    P( C, D, A, B, 14, 15, 0xAB9423A7 );
    P( B, C, D, A,  5, 21, 0xFC93A039 );
    P( A, B, C, D, 12,  6, 0x655B59C3 );
    P( D, A, B, C,  3, 10, 0x8F0CCC92 );
    P( C, D, A, B, 10, 15, 0xFFEFF47D );
    P( B, C, D, A,  1, 21, 0x85845DD1 );
    P( A, B, C, D,  8,  6, 0x6FA87E4F );
    P( D, A, B, C, 15, 10, 0xFE2CE6E0 );
    P( C, D, A, B,  6, 15, 0xA3014314 );
    P( B, C, D, A, 13, 21, 0x4E0811A1 );
    P( A, B, C, D,  4,  6, 0xF7537E82 );
    P( D, A, B, C, 11, 10, 0xBD3AF235 );
    P( C, D, A, B,  2, 15, 0x2AD7D2BB );
    P( B, C, D, A,  9, 21, 0xEB86D391 );

#undef F

    ctx->state[0] += A;
    ctx->state[1] += B;
    ctx->state[2] += C;
    ctx->state[3] += D;
}

/*
 * MD5 process buffer
 */
void md5_update( md5_context *ctx, const unsigned char *input, size_t ilen )
{
    size_t fill;
    uint32_t left;

    if( ilen == 0 )
        return;

    left = ctx->total[0] & 0x3F;
    fill = 64 - left;

    ctx->total[0] += (uint32_t) ilen;
    ctx->total[0] &= 0xFFFFFFFF;

    if( ctx->total[0] < (uint32_t) ilen )
        ctx->total[1]++;

    if( left && ilen >= fill )
    {
        memcpy( (void *) (ctx->buffer + left), input, fill );
        md5_process( ctx, ctx->buffer );
        input += fill;
        ilen  -= fill;
        left = 0;
    }

    while( ilen >= 64 )
    {
        md5_process( ctx, input );
        input += 64;
        ilen  -= 64;
    }

    if( ilen > 0 )
    {
        memcpy( (void *) (ctx->buffer + left), input, ilen );
    }
}

static const unsigned char md5_padding[64] =
{
 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};

/*
 * MD5 final digest
 */
void md5_finish( md5_context *ctx, unsigned char output[16] )
{
    uint32_t last, padn;
    uint32_t high, low;
    unsigned char msglen[8];

    high = ( ctx->total[0] >> 29 )
         | ( ctx->total[1] <<  3 );
    low  = ( ctx->total[0] <<  3 );

    PUT_UINT32_LE( low,  msglen, 0 );
    PUT_UINT32_LE( high, msglen, 4 );

    last = ctx->total[0] & 0x3F;
    padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );

    md5_update( ctx, md5_padding, padn );
    md5_update( ctx, msglen, 8 );

    PUT_UINT32_LE( ctx->state[0], output,  0 );
    PUT_UINT32_LE( ctx->state[1], output,  4 );
    PUT_UINT32_LE( ctx->state[2], output,  8 );
    PUT_UINT32_LE( ctx->state[3], output, 12 );
}

/*
 * output = MD5( input buffer )
 */
void md5( const unsigned char *input, size_t ilen, unsigned char output[16] )
{
    /*static md5_context ctx;
    static int done=0;
    if(done==0)
    {
        md5_init( &ctx );
    	done=1;
    }*/
    md5_context ctx;
    md5_init( &ctx );
    md5_starts( &ctx );
    md5_update( &ctx, input, ilen );
    md5_finish( &ctx, output );
    md5_free( &ctx );
}


/*
 * MD5 HMAC context setup
 */
void md5_hmac_starts( md5_context *ctx, const unsigned char *key,
                      size_t keylen )
{
    size_t i;
    unsigned char sum[16];

    if( keylen > 64 )
    {
        md5( key, keylen, sum );
        keylen = 16;
        key = sum;
    }

    memset( ctx->ipad, 0x36, 64 );
    memset( ctx->opad, 0x5C, 64 );

    for( i = 0; i < keylen; i++ )
    {
        ctx->ipad[i] = (unsigned char)( ctx->ipad[i] ^ key[i] );
        ctx->opad[i] = (unsigned char)( ctx->opad[i] ^ key[i] );
    }

    md5_starts( ctx );
    md5_update( ctx, ctx->ipad, 64 );

    polarssl_zeroize( sum, sizeof( sum ) );
}

/*
 * MD5 HMAC process buffer
 */
void md5_hmac_update( md5_context *ctx, const unsigned char *input,
                      size_t ilen )
{
    md5_update( ctx, input, ilen );
}

/*
 * MD5 HMAC final digest
 */
void md5_hmac_finish( md5_context *ctx, unsigned char output[16] )
{
    unsigned char tmpbuf[16];

    md5_finish( ctx, tmpbuf );
    md5_starts( ctx );
    md5_update( ctx, ctx->opad, 64 );
    md5_update( ctx, tmpbuf, 16 );
    md5_finish( ctx, output );

    polarssl_zeroize( tmpbuf, sizeof( tmpbuf ) );
}

/*
 * MD5 HMAC context reset
 */
void md5_hmac_reset( md5_context *ctx )
{
    md5_starts( ctx );
    md5_update( ctx, ctx->ipad, 64 );
}

/*
 * output = HMAC-MD5( hmac key, input buffer )
 */
void md5_hmac( const unsigned char *key, size_t keylen,
               const unsigned char *input, size_t ilen,
               unsigned char output[16] )
{
    md5_context ctx;

    md5_init( &ctx );
    md5_hmac_starts( &ctx, key, keylen );
    md5_hmac_update( &ctx, input, ilen );
    md5_hmac_finish( &ctx, output );
    md5_free( &ctx );
}


================================================
FILE: lib/md5.h
================================================
#ifndef UDP2RAW_MD5_H_
#define UDP2RAW_MD5_H_
#include <stdint.h>
#include <stddef.h>

void md5(const uint8_t *initial_msg, size_t initial_len, uint8_t *digest);

#endif


================================================
FILE: lib/pbkdf2-sha1.cpp
================================================
/*
   this file is from https://github.com/kholia/PKCS5_PBKDF2

*
 *  FIPS-180-1 compliant SHA-1 implementation
 *
 *  Copyright (C) 2006-2010, Brainspark B.V.
 *
 *  This file is part of PolarSSL (http://www.polarssl.org)
 *  Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org>
 *
 *  All rights reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 *  The SHA-1 standard was published by NIST in 1993.
 *
 *  http://www.itl.nist.gov/fipspubs/fip180-1.htm
 *
 *  Copyright 2012 Mathias Olsson mathias@kompetensum.com
 *
 *  This file is dual licensed as either GPL version 2 or Apache License 2.0 at your choice
 *  http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
 *  http://www.apache.org/licenses/
 *
 *  Note that PolarSSL uses GPL with a FOSS License Exception */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if defined(TEST) ||defined(DEBUG)
#undef TEST 
#undef DEBUG
#warning "undefined TEST/DEBUG"
#endif

typedef struct {
	unsigned long total[2];	/*!< number of bytes processed  */
	unsigned long state[5];	/*!< intermediate digest state  */
	unsigned char buffer[64];	/*!< data block being processed */

	unsigned char ipad[64];	/*!< HMAC: inner padding        */
	unsigned char opad[64];	/*!< HMAC: outer padding        */
} sha1_context;

/*
 * 32-bit integer manipulation macros (big endian)
 */
#ifndef GET_ULONG_BE
#define GET_ULONG_BE(n,b,i)                             \
{                                                       \
    (n) = ( (unsigned long) (b)[(i)    ] << 24 )        \
        | ( (unsigned long) (b)[(i) + 1] << 16 )        \
        | ( (unsigned long) (b)[(i) + 2] <<  8 )        \
        | ( (unsigned long) (b)[(i) + 3]       );       \
}
#endif

#ifndef PUT_ULONG_BE
#define PUT_ULONG_BE(n,b,i)                             \
{                                                       \
    (b)[(i)    ] = (unsigned char) ( (n) >> 24 );       \
    (b)[(i) + 1] = (unsigned char) ( (n) >> 16 );       \
    (b)[(i) + 2] = (unsigned char) ( (n) >>  8 );       \
    (b)[(i) + 3] = (unsigned char) ( (n)       );       \
}
#endif

/*
 * SHA-1 context setup
 */
void sha1_starts(sha1_context * ctx)
{
	ctx->total[0] = 0;
	ctx->total[1] = 0;

	ctx->state[0] = 0x67452301;
	ctx->state[1] = 0xEFCDAB89;
	ctx->state[2] = 0x98BADCFE;
	ctx->state[3] = 0x10325476;
	ctx->state[4] = 0xC3D2E1F0;
}

static void sha1_process(sha1_context * ctx, const unsigned char data[64])
{
	unsigned long temp, W[16], A, B, C, D, E;

	GET_ULONG_BE(W[0], data, 0);
	GET_ULONG_BE(W[1], data, 4);
	GET_ULONG_BE(W[2], data, 8);
	GET_ULONG_BE(W[3], data, 12);
	GET_ULONG_BE(W[4], data, 16);
	GET_ULONG_BE(W[5], data, 20);
	GET_ULONG_BE(W[6], data, 24);
	GET_ULONG_BE(W[7], data, 28);
	GET_ULONG_BE(W[8], data, 32);
	GET_ULONG_BE(W[9], data, 36);
	GET_ULONG_BE(W[10], data, 40);
	GET_ULONG_BE(W[11], data, 44);
	GET_ULONG_BE(W[12], data, 48);
	GET_ULONG_BE(W[13], data, 52);
	GET_ULONG_BE(W[14], data, 56);
	GET_ULONG_BE(W[15], data, 60);

#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))

#define R(t)                                            \
(                                                       \
    temp = W[(t -  3) & 0x0F] ^ W[(t - 8) & 0x0F] ^     \
           W[(t - 14) & 0x0F] ^ W[ t      & 0x0F],      \
    ( W[t & 0x0F] = S(temp,1) )                         \
)

#define P(a,b,c,d,e,x)                                  \
{                                                       \
    e += S(a,5) + F(b,c,d) + K + x; b = S(b,30);        \
}

	A = ctx->state[0];
	B = ctx->state[1];
	C = ctx->state[2];
	D = ctx->state[3];
	E = ctx->state[4];

#define F(x,y,z) (z ^ (x & (y ^ z)))
#define K 0x5A827999

	P(A, B, C, D, E, W[0]);
	P(E, A, B, C, D, W[1]);
	P(D, E, A, B, C, W[2]);
	P(C, D, E, A, B, W[3]);
	P(B, C, D, E, A, W[4]);
	P(A, B, C, D, E, W[5]);
	P(E, A, B, C, D, W[6]);
	P(D, E, A, B, C, W[7]);
	P(C, D, E, A, B, W[8]);
	P(B, C, D, E, A, W[9]);
	P(A, B, C, D, E, W[10]);
	P(E, A, B, C, D, W[11]);
	P(D, E, A, B, C, W[12]);
	P(C, D, E, A, B, W[13]);
	P(B, C, D, E, A, W[14]);
	P(A, B, C, D, E, W[15]);
	P(E, A, B, C, D, R(16));
	P(D, E, A, B, C, R(17));
	P(C, D, E, A, B, R(18));
	P(B, C, D, E, A, R(19));

#undef K
#undef F

#define F(x,y,z) (x ^ y ^ z)
#define K 0x6ED9EBA1

	P(A, B, C, D, E, R(20));
	P(E, A, B, C, D, R(21));
	P(D, E, A, B, C, R(22));
	P(C, D, E, A, B, R(23));
	P(B, C, D, E, A, R(24));
	P(A, B, C, D, E, R(25));
	P(E, A, B, C, D, R(26));
	P(D, E, A, B, C, R(27));
	P(C, D, E, A, B, R(28));
	P(B, C, D, E, A, R(29));
	P(A, B, C, D, E, R(30));
	P(E, A, B, C, D, R(31));
	P(D, E, A, B, C, R(32));
	P(C, D, E, A, B, R(33));
	P(B, C, D, E, A, R(34));
	P(A, B, C, D, E, R(35));
	P(E, A, B, C, D, R(36));
	P(D, E, A, B, C, R(37));
	P(C, D, E, A, B, R(38));
	P(B, C, D, E, A, R(39));

#undef K
#undef F

#define F(x,y,z) ((x & y) | (z & (x | y)))
#define K 0x8F1BBCDC

	P(A, B, C, D, E, R(40));
	P(E, A, B, C, D, R(41));
	P(D, E, A, B, C, R(42));
	P(C, D, E, A, B, R(43));
	P(B, C, D, E, A, R(44));
	P(A, B, C, D, E, R(45));
	P(E, A, B, C, D, R(46));
	P(D, E, A, B, C, R(47));
	P(C, D, E, A, B, R(48));
	P(B, C, D, E, A, R(49));
	P(A, B, C, D, E, R(50));
	P(E, A, B, C, D, R(51));
	P(D, E, A, B, C, R(52));
	P(C, D, E, A, B, R(53));
	P(B, C, D, E, A, R(54));
	P(A, B, C, D, E, R(55));
	P(E, A, B, C, D, R(56));
	P(D, E, A, B, C, R(57));
	P(C, D, E, A, B, R(58));
	P(B, C, D, E, A, R(59));

#undef K
#undef F

#define F(x,y,z) (x ^ y ^ z)
#define K 0xCA62C1D6

	P(A, B, C, D, E, R(60));
	P(E, A, B, C, D, R(61));
	P(D, E, A, B, C, R(62));
	P(C, D, E, A, B, R(63));
	P(B, C, D, E, A, R(64));
	P(A, B, C, D, E, R(65));
	P(E, A, B, C, D, R(66));
	P(D, E, A, B, C, R(67));
	P(C, D, E, A, B, R(68));
	P(B, C, D, E, A, R(69));
	P(A, B, C, D, E, R(70));
	P(E, A, B, C, D, R(71));
	P(D, E, A, B, C, R(72));
	P(C, D, E, A, B, R(73));
	P(B, C, D, E, A, R(74));
	P(A, B, C, D, E, R(75));
	P(E, A, B, C, D, R(76));
	P(D, E, A, B, C, R(77));
	P(C, D, E, A, B, R(78));
	P(B, C, D, E, A, R(79));

#undef K
#undef F

	ctx->state[0] += A;
	ctx->state[1] += B;
	ctx->state[2] += C;
	ctx->state[3] += D;
	ctx->state[4] += E;
}

/*
 * SHA-1 process buffer
 */
void sha1_update(sha1_context * ctx, const unsigned char *input, int ilen)
{
	int fill;
	unsigned long left;

	if (ilen <= 0)
		return;

	left = ctx->total[0] & 0x3F;
	fill = 64 - left;

	ctx->total[0] += (unsigned long) ilen;
	ctx->total[0] &= 0xFFFFFFFF;

	if (ctx->total[0] < (unsigned long) ilen)
		ctx->total[1]++;

	if (left && ilen >= fill) {
		memcpy((void *) (ctx->buffer + left), (void *) input, fill);
		sha1_process(ctx, ctx->buffer);
		input += fill;
		ilen -= fill;
		left = 0;
	}

	while (ilen >= 64) {
		sha1_process(ctx, input);
		input += 64;
		ilen -= 64;
	}

	if (ilen > 0) {
		memcpy((void *) (ctx->buffer + left), (void *) input, ilen);
	}
}

static const unsigned char sha1_padding[64] = {
	0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};

/*
 * SHA-1 final digest
 */
void sha1_finish(sha1_context * ctx, unsigned char output[20])
{
	unsigned long last, padn;
	unsigned long high, low;
	unsigned char msglen[8];

	high = (ctx->total[0] >> 29)
	    | (ctx->total[1] << 3);
	low = (ctx->total[0] << 3);

	PUT_ULONG_BE(high, msglen, 0);
	PUT_ULONG_BE(low, msglen, 4);

	last = ctx->total[0] & 0x3F;
	padn = (last < 56) ? (56 - last) : (120 - last);

	sha1_update(ctx, (unsigned char *) sha1_padding, padn);
	sha1_update(ctx, msglen, 8);

	PUT_ULONG_BE(ctx->state[0], output, 0);
	PUT_ULONG_BE(ctx->state[1], output, 4);
	PUT_ULONG_BE(ctx->state[2], output, 8);
	PUT_ULONG_BE(ctx->state[3], output, 12);
	PUT_ULONG_BE(ctx->state[4], output, 16);
}

/*
 * output = SHA-1( input buffer )
 */
void sha1(const unsigned char *input, int ilen, unsigned char output[20])
{
	sha1_context ctx;

	sha1_starts(&ctx);
	sha1_update(&ctx, input, ilen);
	sha1_finish(&ctx, output);

}


/*
 * SHA-1 HMAC context setup
 */
void sha1_hmac_starts(sha1_context * ctx, const unsigned char *key, int keylen)
{
	int i;
	unsigned char sum[20];

	if (keylen > 64) {
		sha1(key, keylen, sum);
		keylen = 20;
		key = sum;
	}

	memset(ctx->ipad, 0x36, 64);
	memset(ctx->opad, 0x5C, 64);

	for (i = 0; i < keylen; i++) {
		ctx->ipad[i] = (unsigned char) (ctx->ipad[i] ^ key[i]);
		ctx->opad[i] = (unsigned char) (ctx->opad[i] ^ key[i]);
	}

	sha1_starts(ctx);
	sha1_update(ctx, ctx->ipad, 64);

}

/*
 * SHA-1 HMAC process buffer
 */
void sha1_hmac_update(sha1_context * ctx, const unsigned char *input, int ilen)
{
	sha1_update(ctx, input, ilen);
}

/*
 * SHA-1 HMAC final digest
 */
void sha1_hmac_finish(sha1_context * ctx, unsigned char output[20])
{
	unsigned char tmpbuf[20];

	sha1_finish(ctx, tmpbuf);
	sha1_starts(ctx);
	sha1_update(ctx, ctx->opad, 64);
	sha1_update(ctx, tmpbuf, 20);
	sha1_finish(ctx, output);

}

/*
 * SHA1 HMAC context reset
 */
void sha1_hmac_reset(sha1_context * ctx)
{
	sha1_starts(ctx);
	sha1_update(ctx, ctx->ipad, 64);
}

/*
 * output = HMAC-SHA-1( hmac key, input buffer )
 */
void sha1_hmac(const unsigned char *key, int keylen,
    const unsigned char *input, int ilen, unsigned char output[20])
{
	sha1_context ctx;

	sha1_hmac_starts(&ctx, key, keylen);
	sha1_hmac_update(&ctx, input, ilen);
	sha1_hmac_finish(&ctx, output);

}


#ifndef min
#define min( a, b ) ( ((a) < (b)) ? (a) : (b) )
#endif

void PKCS5_PBKDF2_HMAC_SHA1(const unsigned char *password, size_t plen,
    const unsigned char *salt, size_t slen,
    const unsigned long iteration_count, const unsigned long key_length,
    unsigned char *output)
{
	sha1_context ctx;
	sha1_starts(&ctx);

	// Size of the generated digest
	unsigned char md_size = 20;
	unsigned char md1[20];
	unsigned char work[20];

	unsigned long counter = 1;
	unsigned long generated_key_length = 0;
	while (generated_key_length < key_length) {
		// U1 ends up in md1 and work
		unsigned char c[4];
		c[0] = (counter >> 24) & 0xff;
		c[1] = (counter >> 16) & 0xff;
		c[2] = (counter >> 8) & 0xff;
		c[3] = (counter >> 0) & 0xff;

		sha1_hmac_starts(&ctx, password, plen);
		sha1_hmac_update(&ctx, salt, slen);
		sha1_hmac_update(&ctx, c, 4);
		sha1_hmac_finish(&ctx, md1);
		memcpy(work, md1, md_size);

		unsigned long ic = 1;
		for (ic = 1; ic < iteration_count; ic++) {
			// U2 ends up in md1
			sha1_hmac_starts(&ctx, password, plen);
			sha1_hmac_update(&ctx, md1, md_size);
			sha1_hmac_finish(&ctx, md1);
			// U1 xor U2
			unsigned long i = 0;
			for (i = 0; i < md_size; i++) {
				work[i] ^= md1[i];
			}
			// and so on until iteration_count
		}

		// Copy the generated bytes to the key
		unsigned long bytes_to_write =
		    min((key_length - generated_key_length), md_size);
		memcpy(output + generated_key_length, work, bytes_to_write);
		generated_key_length += bytes_to_write;
		++counter;
	}
}


#if defined(TEST)
/*
 * FIPS-180-1 test vectors
 */
static unsigned char sha1_test_buf[3][57] = {
	{"abc"},
	{"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"},
	{""}
};

static const int sha1_test_buflen[3] = {
	3, 56, 1000
};

static const unsigned char sha1_test_sum[3][20] = {
	{0xA9, 0x99, 0x3E, 0x36, 0x47, 0x06, 0x81, 0x6A, 0xBA, 0x3E,
	    0x25, 0x71, 0x78, 0x50, 0xC2, 0x6C, 0x9C, 0xD0, 0xD8, 0x9D},
	{0x84, 0x98, 0x3E, 0x44, 0x1C, 0x3B, 0xD2, 0x6E, 0xBA, 0xAE,
	    0x4A, 0xA1, 0xF9, 0x51, 0x29, 0xE5, 0xE5, 0x46, 0x70, 0xF1},
	{0x34, 0xAA, 0x97, 0x3C, 0xD4, 0xC4, 0xDA, 0xA4, 0xF6, 0x1E,
	    0xEB, 0x2B, 0xDB, 0xAD, 0x27, 0x31, 0x65, 0x34, 0x01, 0x6F}
};

/*
 * RFC 2202 test vectors
 */
static unsigned char sha1_hmac_test_key[7][26] = {
	{"\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
		    "\x0B\x0B\x0B\x0B"},
	{"Jefe"},
	{"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
		    "\xAA\xAA\xAA\xAA"},
	{"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10"
		    "\x11\x12\x13\x14\x15\x16\x17\x18\x19"},
	{"\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C"
		    "\x0C\x0C\x0C\x0C"},
	{""},			/* 0xAA 80 times */
	{""}
};

static const int sha1_hmac_test_keylen[7] = {
	20, 4, 20, 25, 20, 80, 80
};

static unsigned char sha1_hmac_test_buf[7][74] = {
	{"Hi There"},
	{"what do ya want for nothing?"},
	{"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
		    "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
		    "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
		    "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
		    "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"},
	{"\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD"
		    "\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD"
		    "\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD"
		    "\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD"
		    "\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD"},
	{"Test With Truncation"},
	{"Test Using Larger Than Block-Size Key - Hash Key First"},
	{"Test Using Larger Than Block-Size Key and Larger"
		    " Than One Block-Size Data"}
};

static const int sha1_hmac_test_buflen[7] = {
	8, 28, 50, 50, 20, 54, 73
};

static const unsigned char sha1_hmac_test_sum[7][20] = {
	{0xB6, 0x17, 0x31, 0x86, 0x55, 0x05, 0x72, 0x64, 0xE2, 0x8B,
	    0xC0, 0xB6, 0xFB, 0x37, 0x8C, 0x8E, 0xF1, 0x46, 0xBE, 0x00},
	{0xEF, 0xFC, 0xDF, 0x6A, 0xE5, 0xEB, 0x2F, 0xA2, 0xD2, 0x74,
	    0x16, 0xD5, 0xF1, 0x84, 0xDF, 0x9C, 0x25, 0x9A, 0x7C, 0x79},
	{0x12, 0x5D, 0x73, 0x42, 0xB9, 0xAC, 0x11, 0xCD, 0x91, 0xA3,
	    0x9A, 0xF4, 0x8A, 0xA1, 0x7B, 0x4F, 0x63, 0xF1, 0x75, 0xD3},
	{0x4C, 0x90, 0x07, 0xF4, 0x02, 0x62, 0x50, 0xC6, 0xBC, 0x84,
	    0x14, 0xF9, 0xBF, 0x50, 0xC8, 0x6C, 0x2D, 0x72, 0x35, 0xDA},
	{0x4C, 0x1A, 0x03, 0x42, 0x4B, 0x55, 0xE0, 0x7F, 0xE7, 0xF2,
	    0x7B, 0xE1},
	{0xAA, 0x4A, 0xE5, 0xE1, 0x52, 0x72, 0xD0, 0x0E, 0x95, 0x70,
	    0x56, 0x37, 0xCE, 0x8A, 0x3B, 0x55, 0xED, 0x40, 0x21, 0x12},
	{0xE8, 0xE9, 0x9D, 0x0F, 0x45, 0x23, 0x7D, 0x78, 0x6D, 0x6B,
	    0xBA, 0xA7, 0x96, 0x5C, 0x78, 0x08, 0xBB, 0xFF, 0x1A, 0x91}
};

typedef struct {
	char *t;
	char *p;
	int plen;
	char *s;
	int slen;
	int c;
	int dkLen;
	char dk[1024];		// Remember to set this to max dkLen
} testvector;

int do_test(testvector * tv)
{
	printf("Started %s\n", tv->t);
	fflush(stdout);
	char *key = malloc(tv->dkLen);
	if (key == 0) {
		return -1;
	}

	PKCS5_PBKDF2_HMAC(tv->p, tv->plen, tv->s, tv->slen, tv->c,
	    tv->dkLen, key);

	if (memcmp(tv->dk, key, tv->dkLen) != 0) {
		// Failed
		return -1;
	}

	return 0;
}

#ifdef DEBUG
static void print_hex(unsigned char *str, int len)
{
	int i;
	for (i = 0; i < len; ++i)
		printf("%02x", str[i]);
	printf("\n");
}
#endif

/*
 * Checkup routine
 */
int main(int argc,char * argv[])
{
	int verbose = 1;
	int i, j, buflen;
	unsigned char buf[1024];
	unsigned char sha1sum[20];

	sha1_context ctx;

	/*
	 * SHA-1
	 */
	for (i = 0; i < 3; i++) {
		if (verbose != 0)
			printf("  SHA-1 test #%d: ", i + 1);

		sha1_starts(&ctx);

		if (i == 2) {
			memset(buf, 'a', buflen = 1000);

			for (j = 0; j < 1000; j++)
				sha1_update(&ctx, buf, buflen);
		} else
			sha1_update(&ctx, sha1_test_buf[i],
			    sha1_test_buflen[i]);

		sha1_finish(&ctx, sha1sum);

		if (memcmp(sha1sum, sha1_test_sum[i], 20) != 0) {
			if (verbose != 0)
				printf("failed\n");

			return (1);
		}

		if (verbose != 0)
			printf("passed\n");
	}

	if (verbose != 0)
		printf("\n");

	for (i = 0; i < 7; i++) {
		if (verbose != 0)
			printf("  HMAC-SHA-1 test #%d: ", i + 1);

		if (i == 5 || i == 6) {
			memset(buf, '\xAA', buflen = 80);
			sha1_hmac_starts(&ctx, buf, buflen);
		} else
			sha1_hmac_starts(&ctx, sha1_hmac_test_key[i],
			    sha1_hmac_test_keylen[i]);

		sha1_hmac_update(&ctx, sha1_hmac_test_buf[i],
		    sha1_hmac_test_buflen[i]);

		sha1_hmac_finish(&ctx, sha1sum);

		buflen = (i == 4) ? 12 : 20;

		if (memcmp(sha1sum, sha1_hmac_test_sum[i], buflen) != 0) {
			if (verbose != 0)
				printf("failed\n");

			return (1);
		}

		if (verbose != 0)
			printf("passed\n");
	}

	if (verbose != 0)
		printf("\n");

	// Test vectors from RFC 6070

	testvector *tv = 0;
	int res = 0;

/*
    Input:
       P = "password" (8 octets)
       S = "salt" (4 octets)
       c = 1
       dkLen = 20

     Output:
       DK = 0c 60 c8 0f 96 1f 0e 71
            f3 a9 b5 24 af 60 12 06
            2f e0 37 a6             (20 octets)

*/
	testvector t1 = {
		"Test 1",
		"password", 8, "salt", 4, 1, 20,
		.dk = {0x0c, 0x60, 0xc8, 0x0f, 0x96, 0x1f, 0x0e, 0x71,
			    0xf3, 0xa9, 0xb5, 0x24, 0xaf, 0x60, 0x12, 0x06,
		    0x2f, 0xe0, 0x37, 0xa6}
	};

	tv = &t1;
	res = do_test(tv);
	if (res != 0) {
		printf("%s failed\n", tv->t);
		return res;
	}

/*
       Input:
             P = "password" (8 octets)
             S = "salt" (4 octets)
             c = 2
             dkLen = 20

           Output:
             DK = ea 6c 01 4d c7 2d 6f 8c
                  cd 1e d9 2a ce 1d 41 f0
                  d8 de 89 57             (20 octets)

*/

	testvector t2 = {
		"Test 2",
		"password", 8, "salt", 4, 2, 20,
		{0xea, 0x6c, 0x01, 0x4d, 0xc7, 0x2d, 0x6f, 0x8c,
			    0xcd, 0x1e, 0xd9, 0x2a, 0xce, 0x1d, 0x41, 0xf0,
		    0xd8, 0xde, 0x89, 0x57}
	};

	tv = &t2;
	res = do_test(tv);
	if (res != 0) {
		printf("%s failed\n", tv->t);
		return res;
	}

/*
             Input:
                  P = "password" (8 octets)
                  S = "salt" (4 octets)
                  c = 4096
                  dkLen = 20

                Output:
                  DK = 4b 00 79 01 b7 65 48 9a
                       be ad 49 d9 26 f7 21 d0
                       65 a4 29 c1             (20 octets)


*/
	testvector t3 = {
		"Test 3",
		"password", 8, "salt", 4, 4096, 20,
		{0x4b, 0x00, 0x79, 0x01, 0xb7, 0x65, 0x48, 0x9a,
			    0xbe, 0xad, 0x49, 0xd9, 0x26, 0xf7, 0x21, 0xd0,
		    0x65, 0xa4, 0x29, 0xc1}
	};

	tv = &t3;
	res = do_test(tv);
	if (res != 0) {
		printf("%s failed\n", tv->t);
		return res;
	}

/*
                  Input:
                     P = "password" (8 octets)
                     S = "salt" (4 octets)
                     c = 16777216
                     dkLen = 20

                   Output:
                     DK = ee fe 3d 61 cd 4d a4 e4
                          e9 94 5b 3d 6b a2 15 8c
                          26 34 e9 84             (20 octets)

*/
	testvector t4 = {
		"Test 4",
		"password", 8, "salt", 4, 16777216, 20,
		{0xee, 0xfe, 0x3d, 0x61, 0xcd, 0x4d, 0xa4, 0xe4,
			    0xe9, 0x94, 0x5b, 0x3d, 0x6b, 0xa2, 0x15, 0x8c,
		    0x26, 0x34, 0xe9, 0x84}
	};

	tv = &t4;
	// res = do_test(tv);
	if (res != 0) {
		printf("%s failed\n", tv->t);
		return res;
	}

/*
                     Input:
                        P = "passwordPASSWORDpassword" (24 octets)
                        S = "saltSALTsaltSALTsaltSALTsaltSALTsalt" (36 octets)
                        c = 4096
                        dkLen = 25

                      Output:
                        DK = 3d 2e ec 4f e4 1c 84 9b
                             80 c8 d8 36 62 c0 e4 4a
                             8b 29 1a 96 4c f2 f0 70
                             38                      (25 octets)

*/
	testvector t5 = {
		"Test 5",
		"passwordPASSWORDpassword", 24,
		    "saltSALTsaltSALTsaltSALTsaltSALTsalt", 36, 4096, 25,
		{0x3d, 0x2e, 0xec, 0x4f, 0xe4, 0x1c, 0x84, 0x9b,
			    0x80, 0xc8, 0xd8, 0x36, 0x62, 0xc0, 0xe4, 0x4a,
			    0x8b, 0x29, 0x1a, 0x96, 0x4c, 0xf2, 0xf0, 0x70,
		    0x38}
	};

	tv = &t5;
	res = do_test(tv);
	if (res != 0) {
		printf("%s failed\n", tv->t);
		return res;
	}

/*
                        Input:
                           P = "pass\0word" (9 octets)
                           S = "sa\0lt" (5 octets)
                           c = 4096
                           dkLen = 16

                         Output:
                           DK = 56 fa 6a a7 55 48 09 9d
                                cc 37 d7 f0 34 25 e0 c3 (16 octets)
*/
	testvector t6 = {
		"Test 6",
		"pass\0word", 9, "sa\0lt", 5, 4096, 16,
		{0x56, 0xfa, 0x6a, 0xa7, 0x55, 0x48, 0x09, 0x9d,
			    0xcc, 0x37, 0xd7, 0xf0, 0x34, 0x25, 0xe0, 0xc3,
		    }
	};

	tv = &t6;
	res = do_test(tv);
	if (res != 0) {
		printf("%s failed\n", tv->t);
		return res;
	}

	printf("All tests successful\n");
	return 0;
}

#endif
/*
int main()
{
}*/


================================================
FILE: lib/pbkdf2-sha1.h
================================================
#pragma once

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void sha1(const unsigned char *input, int ilen, unsigned char output[20]);

void sha1_hmac(const unsigned char *key, int keylen, const unsigned char *input, int ilen, unsigned char output[20]);

void PKCS5_PBKDF2_HMAC_SHA1(const unsigned char *password, size_t plen,
    const unsigned char *salt, size_t slen,
    const unsigned long iteration_count, const unsigned long key_length,
    unsigned char *output);


================================================
FILE: lib/pbkdf2-sha256.cpp
================================================
/*
   this file is from https://github.com/kholia/PKCS5_PBKDF2, with additional code of hkdf_sha256

 *  FIPS-180-2 compliant SHA-256 implementation
 *
 *  Copyright (C) 2006-2010, Brainspark B.V.
 *
 *  This file is part of PolarSSL (http://www.polarssl.org)
 *  Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org>
 *
 *  All rights reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
/*
 *  The SHA-256 Secure Hash Standard was published by NIST in 2002.
 *
 *  http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
 */

#include <string.h>
#include <stdio.h>
#include <stdlib.h>


#if defined(TEST) ||defined(DEBUG)
#undef TEST 
#undef DEBUG
#warning "undefined TEST/DEBUG"
#endif


typedef struct {
	unsigned long total[2];	/*!< number of bytes processed  */
	unsigned long state[8];	/*!< intermediate digest state  */
	unsigned char buffer[64];	/*!< data block being processed */

	unsigned char ipad[64];	/*!< HMAC: inner padding        */
	unsigned char opad[64];	/*!< HMAC: outer padding        */
	int is224;		/*!< 0 => SHA-256, else SHA-224 */
} sha2_context;


/*
 * 32-bit integer manipulation macros (big endian)
 */
#ifndef GET_ULONG_BE
#define GET_ULONG_BE(n,b,i)                             \
{                                                       \
    (n) = ( (unsigned long) (b)[(i)    ] << 24 )        \
        | ( (unsigned long) (b)[(i) + 1] << 16 )        \
        | ( (unsigned long) (b)[(i) + 2] <<  8 )        \
        | ( (unsigned long) (b)[(i) + 3]       );       \
}
#endif

#ifndef PUT_ULONG_BE
#define PUT_ULONG_BE(n,b,i)                             \
{                                                       \
    (b)[(i)    ] = (unsigned char) ( (n) >> 24 );       \
    (b)[(i) + 1] = (unsigned char) ( (n) >> 16 );       \
    (b)[(i) + 2] = (unsigned char) ( (n) >>  8 );       \
    (b)[(i) + 3] = (unsigned char) ( (n)       );       \
}
#endif

/*
 * SHA-256 context setup
 */
void sha2_starts( sha2_context *ctx, int is224 )
{
    ctx->total[0] = 0;
    ctx->total[1] = 0;

    if( is224 == 0 )
    {
        /* SHA-256 */
        ctx->state[0] = 0x6A09E667;
        ctx->state[1] = 0xBB67AE85;
        ctx->state[2] = 0x3C6EF372;
        ctx->state[3] = 0xA54FF53A;
        ctx->state[4] = 0x510E527F;
        ctx->state[5] = 0x9B05688C;
        ctx->state[6] = 0x1F83D9AB;
        ctx->state[7] = 0x5BE0CD19;
    }
    else
    {
        /* SHA-224 */
        ctx->state[0] = 0xC1059ED8;
        ctx->state[1] = 0x367CD507;
        ctx->state[2] = 0x3070DD17;
        ctx->state[3] = 0xF70E5939;
        ctx->state[4] = 0xFFC00B31;
        ctx->state[5] = 0x68581511;
        ctx->state[6] = 0x64F98FA7;
        ctx->state[7] = 0xBEFA4FA4;
    }

    ctx->is224 = is224;
}

static void sha2_process( sha2_context *ctx, const unsigned char data[64] )
{
    unsigned long temp1, temp2, W[64];
    unsigned long A, B, C, D, E, F, G, H;

    GET_ULONG_BE( W[ 0], data,  0 );
    GET_ULONG_BE( W[ 1], data,  4 );
    GET_ULONG_BE( W[ 2], data,  8 );
    GET_ULONG_BE( W[ 3], data, 12 );
    GET_ULONG_BE( W[ 4], data, 16 );
    GET_ULONG_BE( W[ 5], data, 20 );
    GET_ULONG_BE( W[ 6], data, 24 );
    GET_ULONG_BE( W[ 7], data, 28 );
    GET_ULONG_BE( W[ 8], data, 32 );
    GET_ULONG_BE( W[ 9], data, 36 );
    GET_ULONG_BE( W[10], data, 40 );
    GET_ULONG_BE( W[11], data, 44 );
    GET_ULONG_BE( W[12], data, 48 );
    GET_ULONG_BE( W[13], data, 52 );
    GET_ULONG_BE( W[14], data, 56 );
    GET_ULONG_BE( W[15], data, 60 );

#define  SHR(x,n) ((x & 0xFFFFFFFF) >> n)
#define ROTR(x,n) (SHR(x,n) | (x << (32 - n)))

#define S0(x) (ROTR(x, 7) ^ ROTR(x,18) ^  SHR(x, 3))
#define S1(x) (ROTR(x,17) ^ ROTR(x,19) ^  SHR(x,10))

#define S2(x) (ROTR(x, 2) ^ ROTR(x,13) ^ ROTR(x,22))
#define S3(x) (ROTR(x, 6) ^ ROTR(x,11) ^ ROTR(x,25))

#define F0(x,y,z) ((x & y) | (z & (x | y)))
#define F1(x,y,z) (z ^ (x & (y ^ z)))

#define R(t)                                    \
(                                               \
    W[t] = S1(W[t -  2]) + W[t -  7] +          \
           S0(W[t - 15]) + W[t - 16]            \
)

#define P(a,b,c,d,e,f,g,h,x,K)                  \
{                                               \
    temp1 = h + S3(e) + F1(e,f,g) + K + x;      \
    temp2 = S2(a) + F0(a,b,c);                  \
    d += temp1; h = temp1 + temp2;              \
}

	A = ctx->state[0];
	B = ctx->state[1];
	C = ctx->state[2];
	D = ctx->state[3];
	E = ctx->state[4];
	F = ctx->state[5];
	G = ctx->state[6];
	H = ctx->state[7];

    P( A, B, C, D, E, F, G, H, W[ 0], 0x428A2F98 );
    P( H, A, B, C, D, E, F, G, W[ 1], 0x71374491 );
    P( G, H, A, B, C, D, E, F, W[ 2], 0xB5C0FBCF );
    P( F, G, H, A, B, C, D, E, W[ 3], 0xE9B5DBA5 );
    P( E, F, G, H, A, B, C, D, W[ 4], 0x3956C25B );
    P( D, E, F, G, H, A, B, C, W[ 5], 0x59F111F1 );
    P( C, D, E, F, G, H, A, B, W[ 6], 0x923F82A4 );
    P( B, C, D, E, F, G, H, A, W[ 7], 0xAB1C5ED5 );
    P( A, B, C, D, E, F, G, H, W[ 8], 0xD807AA98 );
    P( H, A, B, C, D, E, F, G, W[ 9], 0x12835B01 );
    P( G, H, A, B, C, D, E, F, W[10], 0x243185BE );
    P( F, G, H, A, B, C, D, E, W[11], 0x550C7DC3 );
    P( E, F, G, H, A, B, C, D, W[12], 0x72BE5D74 );
    P( D, E, F, G, H, A, B, C, W[13], 0x80DEB1FE );
    P( C, D, E, F, G, H, A, B, W[14], 0x9BDC06A7 );
    P( B, C, D, E, F, G, H, A, W[15], 0xC19BF174 );
    P( A, B, C, D, E, F, G, H, R(16), 0xE49B69C1 );
    P( H, A, B, C, D, E, F, G, R(17), 0xEFBE4786 );
    P( G, H, A, B, C, D, E, F, R(18), 0x0FC19DC6 );
    P( F, G, H, A, B, C, D, E, R(19), 0x240CA1CC );
    P( E, F, G, H, A, B, C, D, R(20), 0x2DE92C6F );
    P( D, E, F, G, H, A, B, C, R(21), 0x4A7484AA );
    P( C, D, E, F, G, H, A, B, R(22), 0x5CB0A9DC );
    P( B, C, D, E, F, G, H, A, R(23), 0x76F988DA );
    P( A, B, C, D, E, F, G, H, R(24), 0x983E5152 );
    P( H, A, B, C, D, E, F, G, R(25), 0xA831C66D );
    P( G, H, A, B, C, D, E, F, R(26), 0xB00327C8 );
    P( F, G, H, A, B, C, D, E, R(27), 0xBF597FC7 );
    P( E, F, G, H, A, B, C, D, R(28), 0xC6E00BF3 );
    P( D, E, F, G, H, A, B, C, R(29), 0xD5A79147 );
    P( C, D, E, F, G, H, A, B, R(30), 0x06CA6351 );
    P( B, C, D, E, F, G, H, A, R(31), 0x14292967 );
    P( A, B, C, D, E, F, G, H, R(32), 0x27B70A85 );
    P( H, A, B, C, D, E, F, G, R(33), 0x2E1B2138 );
    P( G, H, A, B, C, D, E, F, R(34), 0x4D2C6DFC );
    P( F, G, H, A, B, C, D, E, R(35), 0x53380D13 );
    P( E, F, G, H, A, B, C, D, R(36), 0x650A7354 );
    P( D, E, F, G, H, A, B, C, R(37), 0x766A0ABB );
    P( C, D, E, F, G, H, A, B, R(38), 0x81C2C92E );
    P( B, C, D, E, F, G, H, A, R(39), 0x92722C85 );
    P( A, B, C, D, E, F, G, H, R(40), 0xA2BFE8A1 );
    P( H, A, B, C, D, E, F, G, R(41), 0xA81A664B );
    P( G, H, A, B, C, D, E, F, R(42), 0xC24B8B70 );
    P( F, G, H, A, B, C, D, E, R(43), 0xC76C51A3 );
    P( E, F, G, H, A, B, C, D, R(44), 0xD192E819 );
    P( D, E, F, G, H, A, B, C, R(45), 0xD6990624 );
    P( C, D, E, F, G, H, A, B, R(46), 0xF40E3585 );
    P( B, C, D, E, F, G, H, A, R(47), 0x106AA070 );
    P( A, B, C, D, E, F, G, H, R(48), 0x19A4C116 );
    P( H, A, B, C, D, E, F, G, R(49), 0x1E376C08 );
    P( G, H, A, B, C, D, E, F, R(50), 0x2748774C );
    P( F, G, H, A, B, C, D, E, R(51), 0x34B0BCB5 );
    P( E, F, G, H, A, B, C, D, R(52), 0x391C0CB3 );
    P( D, E, F, G, H, A, B, C, R(53), 0x4ED8AA4A );
    P( C, D, E, F, G, H, A, B, R(54), 0x5B9CCA4F );
    P( B, C, D, E, F, G, H, A, R(55), 0x682E6FF3 );
    P( A, B, C, D, E, F, G, H, R(56), 0x748F82EE );
    P( H, A, B, C, D, E, F, G, R(57), 0x78A5636F );
    P( G, H, A, B, C, D, E, F, R(58), 0x84C87814 );
    P( F, G, H, A, B, C, D, E, R(59), 0x8CC70208 );
    P( E, F, G, H, A, B, C, D, R(60), 0x90BEFFFA );
    P( D, E, F, G, H, A, B, C, R(61), 0xA4506CEB );
    P( C, D, E, F, G, H, A, B, R(62), 0xBEF9A3F7 );
    P( B, C, D, E, F, G, H, A, R(63), 0xC67178F2 );

    ctx->state[0] += A;
    ctx->state[1] += B;
    ctx->state[2] += C;
    ctx->state[3] += D;
    ctx->state[4] += E;
    ctx->state[5] += F;
    ctx->state[6] += G;
    ctx->state[7] += H;
}

/*
 * SHA-256 process buffer
 */
void sha2_update( sha2_context *ctx, const unsigned char *input, size_t ilen )
{
    size_t fill;
    unsigned long left;

    if( ilen <= 0 )
        return;

    left = ctx->total[0] & 0x3F;
    fill = 64 - left;

    ctx->total[0] += (unsigned long) ilen;
    ctx->total[0] &= 0xFFFFFFFF;

    if( ctx->total[0] < (unsigned long) ilen )
        ctx->total[1]++;

    if( left && ilen >= fill )
    {
        memcpy( (void *) (ctx->buffer + left),
                (void *) input, fill );
        sha2_process( ctx, ctx->buffer );
        input += fill;
        ilen  -= fill;
        left = 0;
    }

    while( ilen >= 64 )
    {
        sha2_process( ctx, input );
        input += 64;
        ilen  -= 64;
    }

    if( ilen > 0 )
    {
        memcpy( (void *) (ctx->buffer + left),
                (void *) input, ilen );
    }
}

static const unsigned char sha2_padding[64] =
{
 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};

/*
 * SHA-256 final digest
 */
void sha2_finish( sha2_context *ctx, unsigned char output[32] )
{
    unsigned long last, padn;
    unsigned long high, low;
    unsigned char msglen[8];

    high = ( ctx->total[0] >> 29 )
         | ( ctx->total[1] <<  3 );
    low  = ( ctx->total[0] <<  3 );

    PUT_ULONG_BE( high, msglen, 0 );
    PUT_ULONG_BE( low,  msglen, 4 );

    last = ctx->total[0] & 0x3F;
    padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );

    sha2_update( ctx, (unsigned char *) sha2_padding, padn );
    sha2_update( ctx, msglen, 8 );

    PUT_ULONG_BE( ctx->state[0], output,  0 );
    PUT_ULONG_BE( ctx->state[1], output,  4 );
    PUT_ULONG_BE( ctx->state[2], output,  8 );
    PUT_ULONG_BE( ctx->state[3], output, 12 );
    PUT_ULONG_BE( ctx->state[4], output, 16 );
    PUT_ULONG_BE( ctx->state[5], output, 20 );
    PUT_ULONG_BE( ctx->state[6], output, 24 );

    if( ctx->is224 == 0 )
        PUT_ULONG_BE( ctx->state[7], output, 28 );
}

/*
 * output = SHA-256( input buffer )
 */
void sha2( const unsigned char *input, size_t ilen,
           unsigned char output[32], int is224 )
{
    sha2_context ctx;

    sha2_starts( &ctx, is224 );
    sha2_update( &ctx, input, ilen );
    sha2_finish( &ctx, output );

    memset( &ctx, 0, sizeof( sha2_context ) );
}

/*
 * SHA-256 HMAC context setup
 */
void sha2_hmac_starts( sha2_context *ctx, const unsigned char *key, size_t keylen,
                       int is224 )
{
    size_t i;
    unsigned char sum[32];

    if( keylen > 64 )
    {
        sha2( key, keylen, sum, is224 );
        keylen = ( is224 ) ? 28 : 32;
        key = sum;
    }

    memset( ctx->ipad, 0x36, 64 );
    memset( ctx->opad, 0x5C, 64 );

    for( i = 0; i < keylen; i++ )
    {
        ctx->ipad[i] = (unsigned char)( ctx->ipad[i] ^ key[i] );
        ctx->opad[i] = (unsigned char)( ctx->opad[i] ^ key[i] );
    }

    sha2_starts( ctx, is224 );
    sha2_update( ctx, ctx->ipad, 64 );

    memset( sum, 0, sizeof( sum ) );
}

/*
 * SHA-256 HMAC process buffer
 */
void sha2_hmac_update( sha2_context *ctx, const unsigned char *input, size_t ilen )
{
    sha2_update( ctx, input, ilen );
}

/*
 * SHA-256 HMAC final digest
 */
void sha2_hmac_finish( sha2_context *ctx, unsigned char output[32] )
{
    int is224, hlen;
    unsigned char tmpbuf[32];

    is224 = ctx->is224;
    hlen = ( is224 == 0 ) ? 32 : 28;

    sha2_finish( ctx, tmpbuf );
    sha2_starts( ctx, is224 );
    sha2_update( ctx, ctx->opad, 64 );
    sha2_update( ctx, tmpbuf, hlen );
    sha2_finish( ctx, output );

    memset( tmpbuf, 0, sizeof( tmpbuf ) );
}

/*
 * SHA-256 HMAC context reset
 */
void sha2_hmac_reset( sha2_context *ctx )
{
    sha2_starts( ctx, ctx->is224 );
    sha2_update( ctx, ctx->ipad, 64 );
}

/*
 * output = HMAC-SHA-256( hmac key, input buffer )
 */
void sha2_hmac( const unsigned char *key, size_t keylen,
                const unsigned char *input, size_t ilen,
                unsigned char output[32], int is224 )
{
    sha2_context ctx;

    sha2_hmac_starts( &ctx, key, keylen, is224 );
    sha2_hmac_update( &ctx, input, ilen );
    sha2_hmac_finish( &ctx, output );

    memset( &ctx, 0, sizeof( sha2_context ) );
}


#ifndef min
#define min( a, b ) ( ((a) < (b)) ? (a) : (b) )
#endif

void PKCS5_PBKDF2_HMAC_SHA256(unsigned char *password, size_t plen,
    unsigned char *salt, size_t slen,
    const unsigned long iteration_count, const unsigned long key_length,
    unsigned char *output)
{
	sha2_context ctx;
	sha2_starts(&ctx, 0);

	// Size of the generated digest
	unsigned char md_size = 32;
	unsigned char md1[32];
	unsigned char work[32];

	unsigned long counter = 1;
	unsigned long generated_key_length = 0;
	while (generated_key_length < key_length) {
		// U1 ends up in md1 and work
		unsigned char c[4];
		c[0] = (counter >> 24) & 0xff;
		c[1] = (counter >> 16) & 0xff;
		c[2] = (counter >> 8) & 0xff;
		c[3] = (counter >> 0) & 0xff;

		sha2_hmac_starts(&ctx, password, plen, 0);
		sha2_hmac_update(&ctx, salt, slen);
		sha2_hmac_update(&ctx, c, 4);
		sha2_hmac_finish(&ctx, md1);
		memcpy(work, md1, md_size);

		unsigned long ic = 1;
		for (ic = 1; ic < iteration_count; ic++) {
			// U2 ends up in md1
			sha2_hmac_starts(&ctx, password, plen, 0);
			sha2_hmac_update(&ctx, md1, md_size);
			sha2_hmac_finish(&ctx, md1);
			// U1 xor U2
			unsigned long i = 0;
			for (i = 0; i < md_size; i++) {
				work[i] ^= md1[i];
			}
			// and so on until iteration_count
		}

		// Copy the generated bytes to the key
		unsigned long bytes_to_write =
		    min((key_length - generated_key_length), md_size);
		memcpy(output + generated_key_length, work, bytes_to_write);
		generated_key_length += bytes_to_write;
		++counter;
	}
}


#ifdef TEST
/*
 * FIPS-180-2 test vectors
 */
static unsigned char sha2_test_buf[3][57] = 
{
    { "abc" },
    { "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" },
    { "" }
};

static const int sha2_test_buflen[3] =
{
    3, 56, 1000
};

static const unsigned char sha2_test_sum[6][32] =
{
    /*
     * SHA-224 test vectors
     */
    { 0x23, 0x09, 0x7D, 0x22, 0x34, 0x05, 0xD8, 0x22,
      0x86, 0x42, 0xA4, 0x77, 0xBD, 0xA2, 0x55, 0xB3,
      0x2A, 0xAD, 0xBC, 0xE4, 0xBD, 0xA0, 0xB3, 0xF7,
      0xE3, 0x6C, 0x9D, 0xA7 },
    { 0x75, 0x38, 0x8B, 0x16, 0x51, 0x27, 0x76, 0xCC,
      0x5D, 0xBA, 0x5D, 0xA1, 0xFD, 0x89, 0x01, 0x50,
      0xB0, 0xC6, 0x45, 0x5C, 0xB4, 0xF5, 0x8B, 0x19,
      0x52, 0x52, 0x25, 0x25 },
    { 0x20, 0x79, 0x46, 0x55, 0x98, 0x0C, 0x91, 0xD8,
      0xBB, 0xB4, 0xC1, 0xEA, 0x97, 0x61, 0x8A, 0x4B,
      0xF0, 0x3F, 0x42, 0x58, 0x19, 0x48, 0xB2, 0xEE,
      0x4E, 0xE7, 0xAD, 0x67 },

    /*
     * SHA-256 test vectors
     */
    { 0xBA, 0x78, 0x16, 0xBF, 0x8F, 0x01, 0xCF, 0xEA,
      0x41, 0x41, 0x40, 0xDE, 0x5D, 0xAE, 0x22, 0x23,
      0xB0, 0x03, 0x61, 0xA3, 0x96, 0x17, 0x7A, 0x9C,
      0xB4, 0x10, 0xFF, 0x61, 0xF2, 0x00, 0x15, 0xAD },
    { 0x24, 0x8D, 0x6A, 0x61, 0xD2, 0x06, 0x38, 0xB8,
      0xE5, 0xC0, 0x26, 0x93, 0x0C, 0x3E, 0x60, 0x39,
      0xA3, 0x3C, 0xE4, 0x59, 0x64, 0xFF, 0x21, 0x67,
      0xF6, 0xEC, 0xED, 0xD4, 0x19, 0xDB, 0x06, 0xC1 },
    { 0xCD, 0xC7, 0x6E, 0x5C, 0x99, 0x14, 0xFB, 0x92,
      0x81, 0xA1, 0xC7, 0xE2, 0x84, 0xD7, 0x3E, 0x67,
      0xF1, 0x80, 0x9A, 0x48, 0xA4, 0x97, 0x20, 0x0E,
      0x04, 0x6D, 0x39, 0xCC, 0xC7, 0x11, 0x2C, 0xD0 }
};

/*
 * RFC 4231 test vectors
 */
static unsigned char sha2_hmac_test_key[7][26] = {
	{"\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
		    "\x0B\x0B\x0B\x0B"},
	{"Jefe"},
	{"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
		    "\xAA\xAA\xAA\xAA"},
	{"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10"
		    "\x11\x12\x13\x14\x15\x16\x17\x18\x19"},
	{"\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C"
		    "\x0C\x0C\x0C\x0C"},
	{""},			/* 0xAA 131 times */
	{""}
};

static const int sha2_hmac_test_keylen[7] = {
	20, 4, 20, 25, 20, 131, 131
};

static unsigned char sha2_hmac_test_buf[7][153] =
{
    { "Hi There" },
    { "what do ya want for nothing?" },
    { "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
      "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
      "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
      "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
      "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD" },
    { "\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD"
      "\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD"
      "\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD"
      "\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD"
      "\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD\xCD" },
    { "Test With Truncation" },
    { "Test Using Larger Than Block-Size Key - Hash Key First" },
    { "This is a test using a larger than block-size key "
      "and a larger than block-size data. The key needs to "
      "be hashed before being used by the HMAC algorithm." }
};

static const int sha2_hmac_test_buflen[7] =
{
    8, 28, 50, 50, 20, 54, 152
};

static const unsigned char sha2_hmac_test_sum[14][32] =
{
    /*
     * HMAC-SHA-224 test vectors
     */
    { 0x89, 0x6F, 0xB1, 0x12, 0x8A, 0xBB, 0xDF, 0x19,
      0x68, 0x32, 0x10, 0x7C, 0xD4, 0x9D, 0xF3, 0x3F,
      0x47, 0xB4, 0xB1, 0x16, 0x99, 0x12, 0xBA, 0x4F,
      0x53, 0x68, 0x4B, 0x22 },
    { 0xA3, 0x0E, 0x01, 0x09, 0x8B, 0xC6, 0xDB, 0xBF,
      0x45, 0x69, 0x0F, 0x3A, 0x7E, 0x9E, 0x6D, 0x0F,
      0x8B, 0xBE, 0xA2, 0xA3, 0x9E, 0x61, 0x48, 0x00,
      0x8F, 0xD0, 0x5E, 0x44 },
    { 0x7F, 0xB3, 0xCB, 0x35, 0x88, 0xC6, 0xC1, 0xF6,
      0xFF, 0xA9, 0x69, 0x4D, 0x7D, 0x6A, 0xD2, 0x64,
      0x93, 0x65, 0xB0, 0xC1, 0xF6, 0x5D, 0x69, 0xD1,
      0xEC, 0x83, 0x33, 0xEA },
    { 0x6C, 0x11, 0x50, 0x68, 0x74, 0x01, 0x3C, 0xAC,
      0x6A, 0x2A, 0xBC, 0x1B, 0xB3, 0x82, 0x62, 0x7C,
      0xEC, 0x6A, 0x90, 0xD8, 0x6E, 0xFC, 0x01, 0x2D,
      0xE7, 0xAF, 0xEC, 0x5A },
    { 0x0E, 0x2A, 0xEA, 0x68, 0xA9, 0x0C, 0x8D, 0x37,
      0xC9, 0x88, 0xBC, 0xDB, 0x9F, 0xCA, 0x6F, 0xA8 },
    { 0x95, 0xE9, 0xA0, 0xDB, 0x96, 0x20, 0x95, 0xAD,
      0xAE, 0xBE, 0x9B, 0x2D, 0x6F, 0x0D, 0xBC, 0xE2,
      0xD4, 0x99, 0xF1, 0x12, 0xF2, 0xD2, 0xB7, 0x27,
      0x3F, 0xA6, 0x87, 0x0E },
    { 0x3A, 0x85, 0x41, 0x66, 0xAC, 0x5D, 0x9F, 0x02,
      0x3F, 0x54, 0xD5, 0x17, 0xD0, 0xB3, 0x9D, 0xBD,
      0x94, 0x67, 0x70, 0xDB, 0x9C, 0x2B, 0x95, 0xC9,
      0xF6, 0xF5, 0x65, 0xD1 },

    /*
     * HMAC-SHA-256 test vectors
     */
    { 0xB0, 0x34, 0x4C, 0x61, 0xD8, 0xDB, 0x38, 0x53,
      0x5C, 0xA8, 0xAF, 0xCE, 0xAF, 0x0B, 0xF1, 0x2B,
      0x88, 0x1D, 0xC2, 0x00, 0xC9, 0x83, 0x3D, 0xA7,
      0x26, 0xE9, 0x37, 0x6C, 0x2E, 0x32, 0xCF, 0xF7 },
    { 0x5B, 0xDC, 0xC1, 0x46, 0xBF, 0x60, 0x75, 0x4E,
      0x6A, 0x04, 0x24, 0x26, 0x08, 0x95, 0x75, 0xC7,
      0x5A, 0x00, 0x3F, 0x08, 0x9D, 0x27, 0x39, 0x83,
      0x9D, 0xEC, 0x58, 0xB9, 0x64, 0xEC, 0x38, 0x43 },
    { 0x77, 0x3E, 0xA9, 0x1E, 0x36, 0x80, 0x0E, 0x46,
      0x85, 0x4D, 0xB8, 0xEB, 0xD0, 0x91, 0x81, 0xA7,
      0x29, 0x59, 0x09, 0x8B, 0x3E, 0xF8, 0xC1, 0x22,
      0xD9, 0x63, 0x55, 0x14, 0xCE, 0xD5, 0x65, 0xFE },
    { 0x82, 0x55, 0x8A, 0x38, 0x9A, 0x44, 0x3C, 0x0E,
      0xA4, 0xCC, 0x81, 0x98, 0x99, 0xF2, 0x08, 0x3A,
      0x85, 0xF0, 0xFA, 0xA3, 0xE5, 0x78, 0xF8, 0x07,
      0x7A, 0x2E, 0x3F, 0xF4, 0x67, 0x29, 0x66, 0x5B },
    { 0xA3, 0xB6, 0x16, 0x74, 0x73, 0x10, 0x0E, 0xE0,
      0x6E, 0x0C, 0x79, 0x6C, 0x29, 0x55, 0x55, 0x2B },
    { 0x60, 0xE4, 0x31, 0x59, 0x1E, 0xE0, 0xB6, 0x7F,
      0x0D, 0x8A, 0x26, 0xAA, 0xCB, 0xF5, 0xB7, 0x7F,
      0x8E, 0x0B, 0xC6, 0x21, 0x37, 0x28, 0xC5, 0x14,
      0x05, 0x46, 0x04, 0x0F, 0x0E, 0xE3, 0x7F, 0x54 },
    { 0x9B, 0x09, 0xFF, 0xA7, 0x1B, 0x94, 0x2F, 0xCB,
      0x27, 0x63, 0x5F, 0xBC, 0xD5, 0xB0, 0xE9, 0x44,
      0xBF, 0xDC, 0x63, 0x64, 0x4F, 0x07, 0x13, 0x93,
      0x8A, 0x7F, 0x51, 0x53, 0x5C, 0x3A, 0x35, 0xE2 }
};
typedef struct {
	char *t;
	char *p;
	int plen;
	char *s;
	int slen;
	int c;
	int dkLen;
	char dk[1024];		// Remember to set this to max dkLen
} testvector;

int do_test(testvector * tv)
{
	printf("Started %s\n", tv->t);
	fflush(stdout);
	char *key = malloc(tv->dkLen);
	if (key == 0) {
		return -1;
	}

	PKCS5_PBKDF2_HMAC((unsigned char*)tv->p, tv->plen,
			(unsigned char*)tv->s, tv->slen, tv->c,
			tv->dkLen, (unsigned char*)key);

	if (memcmp(tv->dk, key, tv->dkLen) != 0) {
		// Failed
		return -1;
	}

	return 0;
}

/*
 * Checkup routine
 */
int main()
{
	int verbose = 1;
	int i, j, k, buflen;
	unsigned char buf[1024];
	unsigned char sha2sum[32];
	sha2_context ctx;

	for (i = 0; i < 6; i++) {
		j = i % 3;
		k = i < 3;

		if (verbose != 0)
			printf("  SHA-%d test #%d: ", 256 - k * 32, j + 1);

		sha2_starts(&ctx, k);

		if (j == 2) {
			memset(buf, 'a', buflen = 1000);

			for (j = 0; j < 1000; j++)
				sha2_update(&ctx, buf, buflen);
		} else
			sha2_update(&ctx, sha2_test_buf[j],
			    sha2_test_buflen[j]);

		sha2_finish(&ctx, sha2sum);

		if (memcmp(sha2sum, sha2_test_sum[i], 32 - k * 4) != 0) {
			if (verbose != 0)
				printf("failed\n");

			return (1);
		}

		if (verbose != 0)
			printf("passed\n");
	}

	if (verbose != 0)
		printf("\n");

	for (i = 0; i < 14; i++) {
		j = i % 7;
		k = i < 7;

		if (verbose != 0)
			printf("  HMAC-SHA-%d test #%d: ", 256 - k * 32,
			    j + 1);

		if (j == 5 || j == 6) {
			memset(buf, '\xAA', buflen = 131);
			sha2_hmac_starts(&ctx, buf, buflen, k);
		} else
			sha2_hmac_starts(&ctx, sha2_hmac_test_key[j],
			    sha2_hmac_test_keylen[j], k);

		sha2_hmac_update(&ctx, sha2_hmac_test_buf[j],
		    sha2_hmac_test_buflen[j]);

		sha2_hmac_finish(&ctx, sha2sum);

		buflen = (j == 4) ? 16 : 32 - k * 4;

		if (memcmp(sha2sum, sha2_hmac_test_sum[i], buflen) != 0) {
			if (verbose != 0)
				printf("failed\n");

			return (1);
		}

		if (verbose != 0)
			printf("passed\n");
	}

	if (verbose != 0)
		printf("\n");

	testvector *tv = 0;
	int res = 0;

	testvector t1 = {
		"Test 1",
		"password", 8, "salt", 4, 1, 32,
		.dk = { 0x12, 0x0f, 0xb6, 0xcf, 0xfc, 0xf8, 0xb3, 0x2c,
			0x43, 0xe7, 0x22, 0x52, 0x56, 0xc4, 0xf8, 0x37,
			0xa8, 0x65, 0x48, 0xc9, 0x2c, 0xcc, 0x35, 0x48,
			0x08, 0x05, 0x98, 0x7c, 0xb7, 0x0b, 0xe1, 0x7b }
	};

	tv = &t1;
	res = do_test(tv);
	if (res != 0) {
		printf("%s failed\n", tv->t);
		return res;
	}

	testvector t2 = {
		"Test 2",
		"password", 8, "salt", 4, 2, 32, {
			0xae, 0x4d, 0x0c, 0x95, 0xaf, 0x6b, 0x46, 0xd3,
			0x2d, 0x0a, 0xdf, 0xf9, 0x28, 0xf0, 0x6d, 0xd0,
			0x2a, 0x30, 0x3f, 0x8e, 0xf3, 0xc2, 0x51, 0xdf,
			0xd6, 0xe2, 0xd8, 0x5a, 0x95, 0x47, 0x4c, 0x43 }
	};

	tv = &t2;
	res = do_test(tv);
	if (res != 0) {
		printf("%s failed\n", tv->t);
		return res;
	}

	testvector t3 = {
		"Test 3",
		"password", 8, "salt", 4, 4096, 32, {
			0xc5, 0xe4, 0x78, 0xd5, 0x92, 0x88, 0xc8, 0x41,
			0xaa, 0x53, 0x0d, 0xb6, 0x84, 0x5c, 0x4c, 0x8d,
			0x96, 0x28, 0x93, 0xa0, 0x01, 0xce, 0x4e, 0x11,
			0xa4, 0x96, 0x38, 0x73, 0xaa, 0x98, 0x13, 0x4a }
	};

	tv = &t3;
	res = do_test(tv);
	if (res != 0) {
		printf("%s failed\n", tv->t);
		return res;
	}

	testvector t4 = {
		"Test 4",
		"password", 8, "salt", 4, 16777216, 32, {
			0xcf, 0x81, 0xc6, 0x6f, 0xe8, 0xcf, 0xc0, 0x4d,
			0x1f, 0x31, 0xec, 0xb6, 0x5d, 0xab, 0x40, 0x89,
			0xf7, 0xf1, 0x79, 0xe8, 0x9b, 0x3b, 0x0b, 0xcb,
			0x17, 0xad, 0x10, 0xe3, 0xac, 0x6e, 0xba, 0x46 }
	};

	tv = &t4;
	// res = do_test(tv);
	if (res != 0) {
		printf("%s failed\n", tv->t);
		return res;
	}

	testvector t5 = {
		"Test 5",
		"passwordPASSWORDpassword", 24,
		"saltSALTsaltSALTsaltSALTsaltSALTsalt", 36, 4096, 40, {
			0x34, 0x8c, 0x89, 0xdb, 0xcb, 0xd3, 0x2b, 0x2f,
			0x32, 0xd8, 0x14, 0xb8, 0x11, 0x6e, 0x84, 0xcf,
			0x2b, 0x17, 0x34, 0x7e, 0xbc, 0x18, 0x00, 0x18,
			0x1c, 0x4e, 0x2a, 0x1f, 0xb8, 0xdd, 0x53, 0xe1,
			0xc6, 0x35, 0x51, 0x8c, 0x7d, 0xac, 0x47, 0xe9 }
	};

	tv = &t5;
	res = do_test(tv);
	if (res != 0) {
		printf("%s failed\n", tv->t);
		return res;
	}

	testvector t6 = {
		"Test 6",
		"pass\0word", 9, "sa\0lt", 5, 4096, 16, {
			0x89, 0xb6, 0x9d, 0x05, 0x16, 0xf8, 0x29, 0x89,
			0x3c, 0x69, 0x62, 0x26, 0x65, 0x0a, 0x86, 0x87 }
	};

	tv = &t6;
	res = do_test(tv);
	if (res != 0) {
		printf("%s failed\n", tv->t);
		return res;
	}

	return (0);
}

#endif


const int sha256_len=32;

#define MBEDTLS_MD_MAX_SIZE  64 
#define MBEDTLS_ERR_HKDF_BAD_INPUT_DATA   -0x5F80

static void * (* const volatile memset_func)( void *, int, size_t ) = memset;

void mbedtls_platform_zeroize( void *buf, size_t len )
{
	    memset_func( buf, 0, len );
}

int hkdf_sha256_extract(
                          const unsigned char *salt, size_t salt_len,
                          const unsigned char *ikm, size_t ikm_len,
                          unsigned char *prk )
{
    unsigned char null_salt[MBEDTLS_MD_MAX_SIZE] = { '\0' };

    if( salt == NULL )
    {
        size_t hash_len;

        hash_len = sha256_len;

        if( hash_len == 0 )
        {
            return MBEDTLS_ERR_HKDF_BAD_INPUT_DATA;
        }

        salt = null_salt;
        salt_len = hash_len;
    }

    sha2_hmac (salt, salt_len, ikm, ikm_len, prk ,0);
    return 0;
}

int hkdf_sha256_expand( const unsigned char *prk,
                         size_t prk_len, const unsigned char *info,
                         size_t info_len, unsigned char *okm, size_t okm_len )
{
    size_t hash_len;
    size_t where = 0;
    size_t n;
    size_t t_len = 0;
    size_t i;
    int ret = 0;
    sha2_context ctx;
    unsigned char t[MBEDTLS_MD_MAX_SIZE];

    if( okm == NULL )
    {
        return( MBEDTLS_ERR_HKDF_BAD_INPUT_DATA );
    }

    hash_len = sha256_len;

    if( prk_len < hash_len || hash_len == 0 )
    {
        return( MBEDTLS_ERR_HKDF_BAD_INPUT_DATA );
    }

    if( info == NULL )
    {
        info = (const unsigned char *) "";
        info_len = 0;
    }

    n = okm_len / hash_len;

    if( (okm_len % hash_len) != 0 )
    {
        n++;
    }

    if( n > 255 )
    {
        return( MBEDTLS_ERR_HKDF_BAD_INPUT_DATA );
    }

    //mbedtls_md_init( &ctx );   //old code
    memset( &ctx, 0, sizeof( ctx) );  //its not necessary

    /*
    if( (ret = mbedtls_md_setup( &ctx, md, 1) ) != 0 )
    {
        goto exit;
    }*/


    /* RFC 5869 Section 2.3. */
    for( i = 1; i <= n; i++ )
    {
        size_t num_to_copy;
        unsigned char c = i & 0xff;

        sha2_hmac_starts( &ctx, prk, prk_len,0 );

        sha2_hmac_update( &ctx, t, t_len );

        sha2_hmac_update( &ctx, info, info_len );

        /* The constant concatenated to the end of each t(n) is a single octet.
         * */
        sha2_hmac_update( &ctx, &c, 1 );

        sha2_hmac_finish( &ctx, t );
        num_to_copy = i != n ? hash_len : okm_len - where;
        memcpy( okm + where, t, num_to_copy );
        where += hash_len;
        t_len = hash_len;
    }

//exit:
    //mbedtls_md_free( &ctx );  //old code
    mbedtls_platform_zeroize( &ctx, sizeof( ctx ) ); //not necessary too

    mbedtls_platform_zeroize( t, sizeof( t ) );

    return( ret );
}

int hkdf_sha256( const unsigned char *salt,
                  size_t salt_len, const unsigned char *ikm, size_t ikm_len,
                  const unsigned char *info, size_t info_len,
                  unsigned char *okm, size_t okm_len )
{
    int ret;
    unsigned char prk[MBEDTLS_MD_MAX_SIZE];

    ret = hkdf_sha256_extract( salt, salt_len, ikm, ikm_len, prk );

    if( ret == 0 )
    {
        ret = hkdf_sha256_expand( prk, sha256_len,
                                   info, info_len, okm, okm_len );
    }

    mbedtls_platform_zeroize( prk, sizeof( prk ) );

    return( ret );
}

#ifdef HKDF_SHA256_TEST

#include <assert.h>
int hex_to_number(char a)
{
	if(a>='0' &&a<='9')
		return a-'0';
	if(a>='a'&& a<='f') return a- 'a' +10;

	assert(0==1);
	return -1;
}
int base16_decode(const char *a,unsigned char *buf)
{
	int len=strlen(a);
	assert(len%2==0);

	for(int i=0,j=0;i<len;i+=2,j++)
	{
		unsigned char c= hex_to_number(a[i])*16+hex_to_number(a[i+1]);
		buf[j]=c;
	}
	return len/2;
}
int main()
{
    const struct {
    const char *ikm16, *salt16, *info16;
    int L;
    const char *okm16;
  } vecs[] = {
    { /* from A.1 */
      "0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b",
      "000102030405060708090a0b0c",
      "f0f1f2f3f4f5f6f7f8f9",
      42,
      "3cb25f25faacd57a90434f64d0362f2a2d2d0a90cf1a5a4c5db02d56ecc4c5bf"
        "34007208d5b887185865"
    },
    { /* from A.2 */
      "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"
        "202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f"
        "404142434445464748494a4b4c4d4e4f",
      "606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f"
        "808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9f"
        "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf",
      "b0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
        "d0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeef"
        "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff",
      82,
      "b11e398dc80327a1c8e7f78c596a49344f012eda2d4efad8a050cc4c19afa97c"
      "59045a99cac7827271cb41c65e590e09da3275600c2f09b8367793a9aca3db71"
      "cc30c58179ec3e87c14c01d5c1f3434f1d87"
    },
    { /* from A.3 */
      "0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b",
      "",
      "",
      42,
      "8da4e775a563c18f715f802a063c5a31b8a11f5c5ee1879ec3454e5f3c738d2d"
        "9d201395faa4b61a96c8",
    },
    { NULL, NULL, NULL, -1, NULL }
  };

    for(int i=0;i<3;i++)
    {
	    unsigned char ikm[200]; int ikm_len;
	    unsigned char salt[200];int salt_len;
	    unsigned char info[200]; int info_len;
	    unsigned char okm[200];

	    ikm_len=base16_decode(vecs[i].ikm16,ikm);
	    salt_len=base16_decode(vecs[i].salt16,salt);
	    info_len=base16_decode(vecs[i].info16,info);

	    base16_decode(vecs[i].okm16,okm);

	    int outlen=vecs[i].L;
	    unsigned char output[200];

	    int ret=hkdf_sha256(salt,
			    salt_len,ikm, ikm_len,
			    info, info_len,
			    output, outlen );
	    assert(ret==0);
	    for(int j=0;j<ikm_len;j++)
		    printf("<%02x>",(int)(ikm[j]));
	    printf("\n---------------------------\n");
	    for(int j=0;j<outlen;j++)
		    printf("<%02x>",(int)(output[j]));
	    printf("\n---------------------------\n");
	    for(int j=0;j<outlen;j++)
		    printf("<%02x>",(int)(okm[j]));
	    printf("\n===========================\n");
    }


}

#endif


================================================
FILE: lib/pbkdf2-sha256.h
================================================
#pragma once

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void PKCS5_PBKDF2_HMAC_SHA256(unsigned char *password, size_t plen,
    unsigned char *salt, size_t slen,
    const unsigned long iteration_count, const unsigned long key_length,
    unsigned char *output);

//void sha2( const unsigned char *input, size_t ilen,unsigned char output[32], int is224 );

int hkdf_sha256_extract(
                          const unsigned char *salt, size_t salt_len,
                          const unsigned char *ikm, size_t ikm_len,
                          unsigned char *prk );

int hkdf_sha256_expand( const unsigned char *prk,
                         size_t prk_len, const unsigned char *info,
                         size_t info_len, unsigned char *okm, size_t okm_len );

int hkdf_sha256( const unsigned char *salt,
                  size_t salt_len, const unsigned char *ikm, size_t ikm_len,
                  const unsigned char *info, size_t info_len,
                  unsigned char *okm, size_t okm_len );


================================================
FILE: libev/CVS/Entries
================================================
/Changes/1.315/Wed Jun 21 14:42:30 2017//
/LICENSE/1.11/Thu Jan 16 11:51:05 2014//
/Makefile.am/1.9/Wed Dec 21 18:16:08 2011//
/README/1.21/Fri Mar 30 17:43:55 2012//
/README.embed/1.29/Sat Nov 24 10:10:26 2007//
/Symbols.ev/1.14/Tue Jan 11 13:45:28 2011//
/Symbols.event/1.4/Tue May  8 15:52:13 2012//
/autogen.sh/1.3/Mon May 30 15:28:54 2011//
/configure.ac/1.42/Wed Dec 28 04:22:06 2016//
/ev++.h/1.63/Fri Dec  1 06:37:30 2017//
/ev.3/1.107/Wed Jun 21 14:42:30 2017//
/ev.c/1.481/Thu Jun  1 20:25:50 2017//
/ev.h/1.187/Wed Dec 28 04:22:06 2016//
/ev.pod/1.441/Thu Jul 13 10:46:52 2017//
/ev_epoll.c/1.72/Wed Jun 21 14:42:30 2017//
/ev_kqueue.c/1.56/Thu Feb 18 04:48:05 2016//
/ev_poll.c/1.40/Thu Feb 18 04:48:05 2016//
/ev_port.c/1.29/Thu Feb 18 04:48:05 2016//
/ev_select.c/1.56/Thu Feb 18 04:48:05 2016//
/ev_vars.h/1.58/Tue Sep  9 21:51:35 2014//
/ev_win32.c/1.18/Thu Nov 12 07:02:37 2015//
/ev_wrap.h/1.38/Tue Nov  6 20:56:50 2012//
/event.c/1.52/Mon Apr  2 23:14:41 2012//
/event.h/1.26/Mon Apr  2 23:15:27 2012//
/event_compat.h/1.8/Wed Feb 16 08:02:51 2011//
/import_libevent/1.29/Tue Apr 15 04:34:07 2008//
/libev.m4/1.16/Mon Oct 28 12:36:44 2013//
/update_ev_c/1.2/Wed Jan 18 12:13:14 2012//
/update_ev_wrap/1.6/Sun May  6 13:09:29 2012//
/update_symbols/1.1/Wed Dec 19 01:59:29 2007//
D


================================================
FILE: libev/CVS/Repository
================================================
libev


================================================
FILE: libev/CVS/Root
================================================
:pserver:anonymous@cvs.schmorp.de/schmorpforge


================================================
FILE: libev/Changes
================================================
Revision history for libev, a high-performance and full-featured event loop.

	- ANDROID => __ANDROID__ (reported by enh@google.com).
        - disable epoll_create1 on android because it has broken header files
          and google is unwilling to fix them (reported by enh@google.com).

4.24 Wed Dec 28 05:19:55 CET 2016
	- bump version to 4.24, as the release tarball inexplicably
          didn't have the right version in ev.h, even though the cvs-tagged
          version did have the right one (reported by Ales Teska).

4.23 Wed Nov 16 18:23:41 CET 2016
	- move some declarations at the beginning to help certain retarded
          microsoft compilers, even though their documentation claims
          otherwise (reported by Ruslan Osmanov).

4.22 Sun Dec 20 22:11:50 CET 2015
	- when epoll detects unremovable fds in the fd set, rebuild
          only the epoll descriptor, not the signal pipe, to avoid
          SIGPIPE in ev_async_send. This doesn't solve it on fork,
          so document what needs to be done in ev_loop_fork
          (analyzed by Benjamin Mahler).
	- remove superfluous sys/timeb.h include on win32
          (analyzed by Jason Madden).
        - updated libecb.

4.20 Sat Jun 20 13:01:43 CEST 2015
	- prefer noexcept over throw () with C++ 11.
        - update ecb.h due to incompatibilities with c11.
        - fix a potential aliasing issue when reading and writing
          watcher callbacks.

4.19 Thu Sep 25 08:18:25 CEST 2014
	- ev.h wasn't valid C++ anymore, which tripped compilers other than
          clang, msvc or gcc (analyzed by Raphael 'kena' Poss). Unfortunately,
          C++ doesn't support typedefs for function pointers fully, so the affected
          declarations have to spell out the types each time.
	- when not using autoconf, tighten the check for clock_gettime and related
          functionality.

4.18 Fri Sep  5 17:55:26 CEST 2014
	- events on files were not always generated properly with the
          epoll backend (testcase by Assaf Inbal).
	- mark event pipe fd as cloexec after a fork (analyzed by Sami Farin).
        - (ecb) support m68k, m88k and sh (patch by Miod Vallat).
        - use a reasonable fallback for EV_NSIG instead of erroring out
          when we can't detect the signal set size.
        - in the absence of autoconf, do not use the clock syscall
          on glibc >= 2.17 (avoids the syscall AND -lrt on systems
          doing clock_gettime in userspace).
        - ensure extern "C" function pointers are used for externally-visible
          loop callbacks (not watcher callbacks yet).
        - (ecb) work around memory barriers and volatile apparently both being
          broken in visual studio 2008 and later (analysed and patch by Nicolas Noble).

4.15 Fri Mar  1 12:04:50 CET 2013
        - destroying a non-default loop would stop the global waitpid
          watcher (Denis Bilenko).
	- queueing pending watchers of higher priority from a watcher now invokes
          them in a timely fashion (reported by Denis Bilenko).
	- add throw() to all libev functions that cannot throw exceptions, for
          further code size decrease when compiling for C++.
        - add throw () to callbacks that must not throw exceptions (allocator,
          syserr, loop acquire/release, periodic reschedule cbs).
	- fix event_base_loop return code, add event_get_callback, event_base_new,
          event_base_get_method calls to improve libevent 1.x emulation and add
          some libevent 2.x functionality (based on a patch by Jeff Davey).
        - add more memory fences to fix a bug reported by Jeff Davey. Better
          be overfenced than underprotected.
	- ev_run now returns a boolean status (true meaning watchers are
          still active).
	- ev_once: undef EV_ERROR in ev_kqueue.c, to avoid clashing with
          libev's EV_ERROR (reported by 191919).
	- (ecb) add memory fence support for xlC (Darin McBride).
	- (ecb) add memory fence support for gcc-mips (Anton Kirilov).
	- (ecb) add memory fence support for gcc-alpha (Christian Weisgerber).
        - work around some kernels losing file descriptors by leaking
          the kqueue descriptor in the child.
        - work around linux inotify not reporting IN_ATTRIB changes for directories
          in many cases.
        - include sys/syscall.h instead of plain syscall.h.
        - check for io watcher loops in ev_verify, check for the most
          common reported usage bug in ev_io_start.
        - choose socket vs. WSASocket at compiletime using EV_USE_WSASOCKET.
        - always use WSASend/WSARecv directly on windows, hoping that this
          works in all cases (unlike read/write/send/recv...).
        - try to detect signals around a fork faster (test program by
          Denis Bilenko).
        - work around recent glibc versions that leak memory in realloc.
        - rename ev::embed::set to ev::embed::set_embed to avoid clashing
          the watcher base set (loop) method.
        - rewrite the async/signal pipe logic to always keep a valid fd, which
          simplifies (and hopefully correctifies :) the race checking
          on fork, at the cost of one extra fd.
        - add fat, msdos, jffs2, ramfs, ntfs and btrfs to the list of
          inotify-supporting filesystems.
        - move orig_CFLAGS assignment to after AC_INIT, as newer autoconf
          versions ignore it before
          (https://bugzilla.redhat.com/show_bug.cgi?id=908096).
        - add some untested android support.
        - enum expressions must be of type int (reported by Juan Pablo L).

4.11 Sat Feb  4 19:52:39 CET 2012
	- INCOMPATIBLE CHANGE: ev_timer_again now clears the pending status, as
          was documented already, but not implemented in the repeating case.
        - new compiletime symbols: EV_NO_SMP and EV_NO_THREADS.
	- fix a race where the workaround against the epoll fork bugs
          caused signals to not be handled anymore.
	- correct backend_fudge for most backends, and implement a windows
          specific workaround to avoid looping because we call both
          select and Sleep, both with different time resolutions.
        - document range and guarantees of ev_sleep.
        - document reasonable ranges for periodics interval and offset.
        - rename backend_fudge to backend_mintime to avoid future confusion :)
	- change the default periodic reschedule function to hopefully be more
          exact and correct even in corner cases or in the far future.
        - do not rely on -lm anymore: use it when available but use our
          own floor () if it is missing. This should make it easier to embed,
          as no external libraries are required.
        - strategically import macros from libecb and mark rarely-used functions
          as cache-cold (saving almost 2k code size on typical amd64 setups).
        - add Symbols.ev and Symbols.event files, that were missing.
        - fix backend_mintime value for epoll (was 1/1024, is 1/1000 now).
        - fix #3 "be smart about timeouts" to not "deadlock" when
          timeout == now, also improve the section overall.
        - avoid "AVOIDING FINISHING BEFORE RETURNING" idiom.
        - support new EV_API_STATIC mode to make all libev symbols
          static.
        - supply default CFLAGS of -g -O3 with gcc when original CFLAGS
          were empty.

4.04 Wed Feb 16 09:01:51 CET 2011
	- fix two problems in the native win32 backend, where reuse of fd's
          with different underlying handles caused handles not to be removed
          or added to the select set (analyzed and tested by Bert Belder).
	- do no rely on ceil() in ev_e?poll.c.
        - backport libev to HP-UX versions before 11 v3.
        - configure did not detect nanosleep and clock_gettime properly when
          they are available in the libc (as opposed to -lrt).

4.03 Tue Jan 11 14:37:25 CET 2011
	- officially support polling files with all backends.
	- support files, /dev/zero etc. the same way as select in the epoll
          backend, by generating events on our own.
        - ports backend: work around solaris bug 6874410 and many related ones
          (EINTR, maybe more), with no performance loss (note that the solaris
          bug report is actually wrong, reality is far more bizarre and broken
          than that).
	- define EV_READ/EV_WRITE as macros in event.h, as some programs use
          #ifdef to test for them.
        - new (experimental) function: ev_feed_signal.
        - new (to become default) EVFLAG_NOSIGMASK flag.
        - new EVBACKEND_MASK symbol.
        - updated COMMON IDIOMS SECTION.

4.01 Fri Nov  5 21:51:29 CET 2010
        - automake fucked it up, apparently, --add-missing -f is not quite enough
          to make it update its files, so 4.00 didn't install ev++.h and
          event.h on make install. grrr.
        - ev_loop(count|depth) didn't return anything (Robin Haberkorn).
        - change EV_UNDEF to 0xffffffff to silence some overzealous compilers.
        - use "(libev) " prefix for all libev error messages now.

4.00 Mon Oct 25 12:32:12 CEST 2010
	- "PORTING FROM LIBEV 3.X TO 4.X" (in ev.pod) is recommended reading.
	- ev_embed_stop did not correctly stop the watcher (very good
          testcase by Vladimir Timofeev).
        - ev_run will now always update the current loop time - it erroneously
          didn't when idle watchers were active, causing timers not to fire.
        - fix a bug where a timeout of zero caused the timer not to fire
          in the libevent emulation (testcase by Péter Szabó).
	- applied win32 fixes by Michael Lenaghan (also James Mansion).
	- replace EV_MINIMAL by EV_FEATURES.
        - prefer EPOLL_CTL_ADD over EPOLL_CTL_MOD in some more cases, as it
          seems the former is *much* faster than the latter.
        - linux kernel version detection (for inotify bug workarounds)
          did not work properly.
        - reduce the number of spurious wake-ups with the ports backend.
        - remove dependency on sys/queue.h on freebsd (patch by Vanilla Hsu).
        - do async init within ev_async_start, not ev_async_set, which avoids
          an API quirk where the set function must be called in the C++ API
          even when there is nothing to set.
        - add (undocumented) EV_ENABLE when adding events with kqueue,
          this might help with OS X, which seems to need it despite documenting
          not to need it (helpfully pointed out by Tilghman Lesher).
        - do not use poll by default on freebsd, it's broken (what isn't
          on freebsd...).
        - allow to embed epoll on kernels >= 2.6.32.
        - configure now prepends -O3, not appends it, so one can still
          override it.
        - ev.pod: greatly expanded the portability section, added a porting
          section, a description of watcher states and made lots of minor fixes.
        - disable poll backend on AIX, the poll header spams the namespace
          and it's not worth working around dead platforms (reported
          and analyzed by Aivars Kalvans).
        - improve header file compatibility of the standalone eventfd code
          in an obscure case.
        - implement EV_AVOID_STDIO option.
        - do not use sscanf to parse linux version number (smaller, faster,
          no sscanf dependency).
        - new EV_CHILD_ENABLE and EV_SIGNAL_ENABLE configurable settings.
        - update libev.m4 HAVE_CLOCK_SYSCALL test for newer glibcs.
        - add section on accept() problems to the manpage.
        - rename EV_TIMEOUT to EV_TIMER.
        - rename ev_loop_count/depth/verify/loop/unloop.
        - remove ev_default_destroy and ev_default_fork.
        - switch to two-digit minor version.
        - work around an apparent gentoo compiler bug.
        - define _DARWIN_UNLIMITED_SELECT. just so.
        - use enum instead of #define for most constants.
        - improve compatibility to older C++ compilers.
        - (experimental) ev_run/ev_default_loop/ev_break/ev_loop_new have now
          default arguments when compiled as C++.
        - enable automake dependency tracking.
        - ev_loop_new no longer leaks memory when loop creation failed.
        - new ev_cleanup watcher type.

3.9  Thu Dec 31 07:59:59 CET 2009
	- signalfd is no longer used by default and has to be requested
          explicitly - this means that easy to catch bugs become hard to
          catch race conditions, but the users have spoken.
        - point out the unspecified signal mask in the documentation, and
          that this is a race condition regardless of EV_SIGNALFD.
	- backport inotify code to C89.
        - inotify file descriptors could leak into child processes.
        - ev_stat watchers could keep an erroneous extra ref on the loop,
          preventing exit when unregistering all watchers (testcases
          provided by ry@tinyclouds.org).
        - implement EV_WIN32_HANDLE_TO_FD and EV_WIN32_CLOSE_FD configuration
          symbols to make it easier for apps to do their own fd management.
        - support EV_IDLE_ENABLE being disabled in ev++.h
          (patch by Didier Spezia).
        - take advantage of inotify_init1, if available, to set cloexec/nonblock
          on fd creation, to avoid races.
        - the signal handling pipe wasn't always initialised under windows
          (analysed by lekma).
        - changed minimum glibc requirement from glibc 2.9 to 2.7, for
          signalfd.
        - add missing string.h include (Denis F. Latypoff).
        - only replace ev_stat.prev when we detect an actual difference,
          so prev is (almost) always different to attr. this might
          have caused the problems with 04_stat.t.
        - add ev::timer->remaining () method to C++ API.

3.8  Sun Aug  9 14:30:45 CEST 2009
	- incompatible change: do not necessarily reset signal handler
          to SIG_DFL when a sighandler is stopped.
        - ev_default_destroy did not properly free or zero some members,
          potentially causing crashes and memory corruption on repeated
          ev_default_destroy/ev_default_loop calls.
	- take advantage of signalfd on GNU/Linux systems.
	- document that the signal mask might be in an unspecified
          state when using libev's signal handling.
        - take advantage of some GNU/Linux calls to set cloexec/nonblock
          on fd creation, to avoid race conditions.

3.7  Fri Jul 17 16:36:32 CEST 2009
	- ev_unloop and ev_loop wrongly used a global variable to exit loops,
          instead of using a per-loop variable (bug caught by accident...).
	- the ev_set_io_collect_interval interpretation has changed.
        - add new functionality: ev_set_userdata, ev_userdata,
          ev_set_invoke_pending_cb, ev_set_loop_release_cb,
          ev_invoke_pending, ev_pending_count, together with a long example
          about thread locking.
        - add ev_timer_remaining (as requested by Denis F. Latypoff).
        - add ev_loop_depth.
        - calling ev_unloop in fork/prepare watchers will no longer poll
          for new events.
	- Denis F. Latypoff corrected many typos in example code snippets.
        - honor autoconf detection of EV_USE_CLOCK_SYSCALL, also double-
          check that the syscall number is available before trying to
          use it (reported by ry@tinyclouds).
        - use GetSystemTimeAsFileTime instead of _timeb on windows, for
          slightly higher accuracy.
        - properly declare ev_loop_verify and ev_now_update even when
          !EV_MULTIPLICITY.
        - do not compile in any priority code when EV_MAXPRI == EV_MINPRI.
        - support EV_MINIMAL==2 for a reduced API.
        - actually 0-initialise struct sigaction when installing signals.
        - add section on hibernate and stopped processes to ev_timer docs.

3.6  Tue Apr 28 02:49:30 CEST 2009
	- multiple timers becoming ready within an event loop iteration
          will be invoked in the "correct" order now.
	- do not leave the event loop early just because we have no active
          watchers, fixing a problem when embedding a kqueue loop
          that has active kernel events but no registered watchers
          (reported by blacksand blacksand).
	- correctly zero the idx values for arrays, so destroying and
          reinitialising the default loop actually works (patch by
          Malek Hadj-Ali).
        - implement ev_suspend and ev_resume.
        - new EV_CUSTOM revents flag for use by applications.
        - add documentation section about priorities.
        - add a glossary to the documentation.
        - extend the ev_fork description slightly.
        - optimize a jump out of call_pending.

3.53 Sun Feb 15 02:38:20 CET 2009
	- fix a bug in event pipe creation on win32 that would cause a
          failed assertion on event loop creation (patch by Malek Hadj-Ali).
	- probe for CLOCK_REALTIME support at runtime as well and fall
          back to gettimeofday if there is an error, to support older
          operating systems with newer header files/libraries.
        - prefer gettimeofday over clock_gettime with USE_CLOCK_SYSCALL
          (default most everywhere), otherwise not.

3.52 Wed Jan  7 21:43:02 CET 2009
	- fix compilation of select backend in fd_set mode when NFDBITS is
          missing (to get it to compile on QNX, reported by Rodrigo Campos).
        - better select-nfds handling when select backend is in fd_set mode.
        - diagnose fd_set overruns when select backend is in fd_set mode.
        - due to a thinko, instead of disabling everything but
          select on the borked OS X platform, everything but select was
          allowed (reported by Emanuele Giaquinta).
        - actually verify that local and remote port are matching in
          libev's socketpair emulation, which makes denial-of-service
          attacks harder (but not impossible - it's windows). Make sure
          it even works under vista, which thinks that getpeer/sockname
          should return fantasy port numbers.
        - include "libev" in all assertion messages for potentially
          clearer diagnostics.
        - event_get_version (libevent compatibility) returned
          a useless string instead of the expected version string
          (patch by W.C.A. Wijngaards).

3.51 Wed Dec 24 23:00:11 CET 2008
        - fix a bug where an inotify watcher was added twice, causing
          freezes on hash collisions (reported and analysed by Graham Leggett).
	- new config symbol, EV_USE_CLOCK_SYSCALL, to make libev use
          a direct syscall - slower, but no dependency on librt et al.
        - assume negative return values != -1 signals success of port_getn
          (http://cvs.epicsol.org/cgi/viewcvs.cgi/epic5/source/newio.c?rev=1.52)
          (no known failure reports, but it doesn't hurt).
        - fork detection in ev_embed now stops and restarts the watcher
          automatically.
        - EXPERIMENTAL: default the method to operator () in ev++.h,
          to make it nicer to use functors (requested by Benedek László).
        - fixed const object callbacks in ev++.h.
        - replaced loop_ref argument of watcher.set (loop) by a direct
          ev_loop * in ev++.h, to avoid clashes with functor patch.
        - do not try to watch the empty string via inotify.
        - inotify watchers could be leaked under certain circumstances.
        - OS X 10.5 is actually even more broken than earlier versions,
          so fall back to select on that piece of garbage.
        - fixed some weirdness in the ev_embed documentation.

3.49 Wed Nov 19 11:26:53 CET 2008
	- ev_stat watchers will now use inotify as a mere hint on
          kernels <2.6.25, or if the filesystem is not in the
          "known to be good" list.
        - better mingw32 compatibility (it's not as borked as native win32)
          (analysed by Roger Pack).
        - include stdio.h in the example program, as too many people are
          confused by the weird C language otherwise. I guess the next thing
          I get told is that the "..." ellipses in the examples don't compile
          with their C compiler.

3.48 Thu Oct 30 09:02:37 CET 2008
	- further optimise away the EPOLL_CTL_ADD/MOD combo in the epoll
          backend by assuming the kernel event mask hasn't changed if
          ADD fails with EEXIST.
        - work around spurious event notification bugs in epoll by using
          a 32-bit generation counter. recreate kernel state if we receive
          spurious notifications or unwanted events. this is very costly,
          but I didn't come up with this horrible design.
        - use memset to initialise most arrays now and do away with the
          init functions.
        - expand time-out strategies into a "Be smart about timeouts" section.
        - drop the "struct" from all ev_watcher declarations in the
          documentation and did other clarifications (yeah, it was a mistake
          to have a struct AND a function called ev_loop).
	- fix a bug where ev_default would not initialise the default
          loop again after it was destroyed with ev_default_destroy.
        - rename syserr to ev_syserr to avoid name clashes when embedding,
          do similar changes for event.c.

3.45 Tue Oct 21 21:59:26 CEST 2008
	- disable inotify usage on linux <2.6.25, as it is broken
          (reported by Yoann Vandoorselaere).
        - ev_stat erroneously would try to add inotify watchers
          even when inotify wasn't available (this should only
          have a performance impact).
	- ev_once now passes both timeout and io to the callback if both
          occur concurrently, instead of giving timeouts precedence.
	- disable EV_USE_INOTIFY when sys/inotify.h is too old.

3.44 Mon Sep 29 05:18:39 CEST 2008
	- embed watchers now automatically invoke ev_loop_fork on the
          embedded loop when the parent loop forks.
	- new function: ev_now_update (loop).
	- verify_watcher was not marked static.
        - improve the "associating..." manpage section.
        - documentation tweaks here and there.

3.43 Sun Jul  6 05:34:41 CEST 2008
	- include more include files on windows to get struct _stati64
          (reported by Chris Hulbert, but doesn't quite fix his issue).
	- add missing #include <io.h> in ev.c on windows (reported by
          Matt Tolton).

3.42 Tue Jun 17 12:12:07 CEST 2008
	- work around yet another windows bug: FD_SET actually adds fd's
          multiple times to the fd_*SET*, despite official MSN docs claiming
          otherwise. Reported and well-analysed by Matt Tolton.
	- define NFDBITS to 0 when EV_SELECT_IS_WINSOCKET to make it compile
          (reported any analysed by Chris Hulbert).
        - fix a bug in ev_ebadf (this function is only used to catch
          programming errors in the libev user). reported by Matt Tolton.
        - fix a bug in fd_intern on win32 (could lead to compile errors
          under some circumstances, but would work correctly if it compiles).
          reported by Matt Tolton.
        - (try to) work around missing lstat on windows.
	- pass in the write fd set as except fd set under windows. windows
          is so uncontrollably lame that it requires this. this means that
          switching off oobinline is not supported (but tcp/ip doesn't
          have oob, so that would be stupid anyways.
        - use posix module symbol to auto-detect monotonic clock presence
          and some other default values.

3.41 Fri May 23 18:42:54 CEST 2008
	- work around an obscure bug in winsocket select: if you
          provide only empty fd sets then select returns WSAEINVAL. how sucky.
        - improve timer scheduling stability and reduce use of time_epsilon.
        - use 1-based 2-heap for EV_MINIMAL, simplifies code, reduces
          codesize and makes for better cache-efficiency.
        - use 3-based 4-heap for !EV_MINIMAL. this makes better use
          of cpu cache lines and gives better growth behaviour than
          2-based heaps.
        - cache timestamp within heap for !EV_MINIMAL, to avoid random
          memory accesses.
        - document/add EV_USE_4HEAP and EV_HEAP_CACHE_AT.
        - fix a potential aliasing issue in ev_timer_again.
        - add/document ev_periodic_at, retract direct access to ->at.
        - improve ev_stat docs.
        - add portability requirements section.
	- fix manpage headers etc.
        - normalise WSA error codes to lower range on windows.
        - add consistency check code that can be called automatically
          or on demand to check for internal structures (ev_loop_verify).

3.31 Wed Apr 16 20:45:04 CEST 2008
	- added last minute fix for ev_poll.c by Brandon Black.

3.3  Wed Apr 16 19:04:10 CEST 2008
        - event_base_loopexit should return 0 on success
          (W.C.A. Wijngaards).
	- added linux eventfd support.
        - try to autodetect epoll and inotify support
          by libc header version if not using autoconf.
        - new symbols: EV_DEFAULT_UC and EV_DEFAULT_UC_.
        - declare functions defined in ev.h as inline if
          C99 or gcc are available.
        - enable inlining with gcc versions 2 and 3.
        - work around broken poll implementations potentially
          not clearing revents field in ev_poll (Brandon Black)
          (no such systems are known at this time).
        - work around a bug in realloc on openbsd and darwin,
          also makes the erroneous valgrind complaints
          go away (noted by various people).
        - fix ev_async_pending, add c++ wrapper for ev_async
          (based on patch sent by Johannes Deisenhofer).
        - add sensible set method to ev::embed.
        - made integer constants type int in ev.h.

3.2  Wed Apr  2 17:11:19 CEST 2008
	- fix a 64 bit overflow issue in the select backend,
          by using fd_mask instead of int for the mask.
        - rename internal sighandler to avoid clash with very old perls.
        - entering ev_loop will not clear the ONESHOT or NONBLOCKING
          flags of any outer loops anymore.
        - add ev_async_pending.

3.1  Thu Mar 13 13:45:22 CET 2008
	- implement ev_async watchers.
        - only initialise signal pipe on demand.
	- make use of sig_atomic_t configurable.
        - improved documentation.

3.0  Mon Jan 28 13:14:47 CET 2008
	- API/ABI bump to version 3.0.
	- ev++.h includes "ev.h" by default now, not <ev.h>.
	- slightly improved documentation.
	- speed up signal detection after a fork.
        - only optionally return trace status changed in ev_child
          watchers.
        - experimental (and undocumented) loop wrappers for ev++.h.

2.01 Tue Dec 25 08:04:41 CET 2007
	- separate Changes file.
	- fix ev_path_set => ev_stat_set typo.
        - remove event_compat.h from the libev tarball.
        - change how include files are found.
        - doc updates.
        - update licenses, explicitly allow for GPL relicensing.

2.0  Sat Dec 22 17:47:03 CET 2007
        - new ev_sleep, ev_set_(io|timeout)_collect_interval.
        - removed epoll from embeddable fd set.
        - fix embed watchers.
	- renamed ev_embed.loop to other.
	- added exported Symbol tables.
        - undefine member wrapper macros at the end of ev.c.
        - respect EV_H in ev++.h.

1.86 Tue Dec 18 02:36:57 CET 2007
	- fix memleak on loop destroy (not relevant for perl).

1.85 Fri Dec 14 20:32:40 CET 2007
        - fix some aliasing issues w.r.t. timers and periodics
          (not relevant for perl).

(for historic versions refer to EV/Changes, found in the Perl interface)

0.1  Wed Oct 31 21:31:48 CET 2007
	- original version; hacked together in <24h.


================================================
FILE: libev/LICENSE
================================================
All files in libev are
Copyright (c)2007,2008,2009,2010,2011,2012,2013 Marc Alexander Lehmann.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above
      copyright notice, this list of conditions and the following
      disclaimer in the documentation and/or other materials provided
      with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Alternatively, the contents of this package may be used under the terms
of the GNU General Public License ("GPL") version 2 or any later version,
in which case the provisions of the GPL are applicable instead of the
above. If you wish to allow the use of your version of this package only
under the terms of the GPL and not to allow others to use your version of
this file under the BSD license, indicate your decision by deleting the
provisions above and replace them with the notice and other provisions
required by the GPL in this and the other files of this package. If you do
not delete the provisions above, a recipient may use your version of this
file under either the BSD or the GPL.


================================================
FILE: libev/Makefile.am
================================================
AUTOMAKE_OPTIONS = foreign

VERSION_INFO = 4:0:0

EXTRA_DIST = LICENSE Changes libev.m4 autogen.sh \
	     ev_vars.h ev_wrap.h \
	     ev_epoll.c ev_select.c ev_poll.c ev_kqueue.c ev_port.c ev_win32.c \
	     ev.3 ev.pod Symbols.ev Symbols.event

man_MANS = ev.3

include_HEADERS = ev.h ev++.h event.h

lib_LTLIBRARIES = libev.la

libev_la_SOURCES = ev.c event.c
libev_la_LDFLAGS = -version-info $(VERSION_INFO)

ev.3: ev.pod
	pod2man -n LIBEV -r "libev-$(VERSION)" -c "libev - high performance full featured event loop" -s3 <$< >$@


================================================
FILE: libev/README
================================================
libev is a high-performance event loop/event model with lots of features.
(see benchmark at http://libev.schmorp.de/bench.html)


ABOUT

   Homepage: http://software.schmorp.de/pkg/libev
   Mailinglist: libev@lists.schmorp.de
                http://lists.schmorp.de/cgi-bin/mailman/listinfo/libev
   Library Documentation: http://pod.tst.eu/http://cvs.schmorp.de/libev/ev.pod

   Libev is modelled (very losely) after libevent and the Event perl
   module, but is faster, scales better and is more correct, and also more
   featureful. And also smaller. Yay.

   Some of the specialties of libev not commonly found elsewhere are:
   
   - extensive and detailed, readable documentation (not doxygen garbage).
   - fully supports fork, can detect fork in various ways and automatically
     re-arms kernel mechanisms that do not support fork.
   - highly optimised select, poll, epoll, kqueue and event ports backends.
   - filesystem object (path) watching (with optional linux inotify support).
   - wallclock-based times (using absolute time, cron-like).
   - relative timers/timeouts (handle time jumps).
   - fast intra-thread communication between multiple
     event loops (with optional fast linux eventfd backend).
   - extremely easy to embed (fully documented, no dependencies,
     autoconf supported but optional).
   - very small codebase, no bloated library, simple code.
   - fully extensible by being able to plug into the event loop,
     integrate other event loops, integrate other event loop users.
   - very little memory use (small watchers, small event loop data).
   - optional C++ interface allowing method and function callbacks
     at no extra memory or runtime overhead.
   - optional Perl interface with similar characteristics (capable
     of running Glib/Gtk2 on libev).
   - support for other languages (multiple C++ interfaces, D, Ruby,
     Python) available from third-parties.

   Examples of programs that embed libev: the EV perl module, node.js,
   auditd, rxvt-unicode, gvpe (GNU Virtual Private Ethernet), the
   Deliantra MMORPG server (http://www.deliantra.net/), Rubinius (a
   next-generation Ruby VM), the Ebb web server, the Rev event toolkit.


CONTRIBUTORS

   libev was written and designed by Marc Lehmann and Emanuele Giaquinta.

   The following people sent in patches or made other noteworthy
   contributions to the design (for minor patches, see the Changes
   file. If I forgot to include you, please shout at me, it was an
   accident):

   W.C.A. Wijngaards
   Christopher Layne
   Chris Brody


================================================
FILE: libev/README.embed
================================================
This file is now included in the main libev documentation, see

   http://cvs.schmorp.de/libev/ev.html


================================================
FILE: libev/Symbols.ev
================================================
ev_async_send
ev_async_start
ev_async_stop
ev_backend
ev_break
ev_check_start
ev_check_stop
ev_child_start
ev_child_stop
ev_cleanup_start
ev_cleanup_stop
ev_clear_pending
ev_default_loop
ev_default_loop_ptr
ev_depth
ev_embed_start
ev_embed_stop
ev_embed_sweep
ev_embeddable_backends
ev_feed_event
ev_feed_fd_event
ev_feed_signal
ev_feed_signal_event
ev_fork_start
ev_fork_stop
ev_idle_start
ev_idle_stop
ev_invoke
ev_invoke_pending
ev_io_start
ev_io_stop
ev_iteration
ev_loop_destroy
ev_loop_fork
ev_loop_new
ev_now
ev_now_update
ev_once
ev_pending_count
ev_periodic_again
ev_periodic_start
ev_periodic_stop
ev_prepare_start
ev_prepare_stop
ev_recommended_backends
ev_ref
ev_resume
ev_run
ev_set_allocator
ev_set_invoke_pending_cb
ev_set_io_collect_interval
ev_set_loop_release_cb
ev_set_syserr_cb
ev_set_timeout_collect_interval
ev_set_userdata
ev_signal_start
ev_signal_stop
ev_sleep
ev_stat_start
ev_stat_stat
ev_stat_stop
ev_supported_backends
ev_suspend
ev_time
ev_timer_again
ev_timer_remaining
ev_timer_start
ev_timer_stop
ev_unref
ev_userdata
ev_verify
ev_version_major
ev_version_minor


================================================
FILE: libev/Symbols.event
================================================
event_active
event_add
event_base_dispatch
event_base_free
event_base_get_method
event_base_loop
event_base_loopexit
event_base_new
event_base_once
event_base_priority_init
event_base_set
event_del
event_dispatch
event_get_callback
event_get_method
event_get_version
event_init
event_loop
event_loopexit
event_once
event_pending
event_priority_init
event_priority_set
event_set


================================================
FILE: libev/autogen.sh
================================================
#!/bin/sh

autoreconf --install --symlink --force


================================================
FILE: libev/configure.ac
================================================
AC_INIT

orig_CFLAGS="$CFLAGS"

AC_CONFIG_SRCDIR([ev_epoll.c])

dnl also update ev.h!
AM_INIT_AUTOMAKE(libev,4.24)
AC_CONFIG_HEADERS([config.h])
AM_MAINTAINER_MODE

AC_PROG_CC

dnl Supply default CFLAGS, if not specified
if test -z "$orig_CFLAGS"; then
  if test x$GCC = xyes; then
    CFLAGS="-g -O3"
  fi
fi

AC_PROG_INSTALL
AC_PROG_LIBTOOL

m4_include([libev.m4])

AC_CONFIG_FILES([Makefile])
AC_OUTPUT


================================================
FILE: libev/ev++.h
================================================
/*
 * libev simple C++ wrapper classes
 *
 * Copyright (c) 2007,2008,2010 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

#ifndef EVPP_H__
#define EVPP_H__

#ifdef EV_H
# include EV_H
#else
# include "ev.h"
#endif

#ifndef EV_USE_STDEXCEPT
# define EV_USE_STDEXCEPT 1
#endif

#if EV_USE_STDEXCEPT
# include <stdexcept>
#endif

namespace ev {

  typedef ev_tstamp tstamp;

  enum {
    UNDEF    = EV_UNDEF,
    NONE     = EV_NONE,
    READ     = EV_READ,
    WRITE    = EV_WRITE,
#if EV_COMPAT3
    TIMEOUT  = EV_TIMEOUT,
#endif
    TIMER    = EV_TIMER,
    PERIODIC = EV_PERIODIC,
    SIGNAL   = EV_SIGNAL,
    CHILD    = EV_CHILD,
    STAT     = EV_STAT,
    IDLE     = EV_IDLE,
    CHECK    = EV_CHECK,
    PREPARE  = EV_PREPARE,
    FORK     = EV_FORK,
    ASYNC    = EV_ASYNC,
    EMBED    = EV_EMBED,
#   undef ERROR // some systems stupidly #define ERROR
    ERROR    = EV_ERROR
  };

  enum
  {
    AUTO      = EVFLAG_AUTO,
    NOENV     = EVFLAG_NOENV,
    FORKCHECK = EVFLAG_FORKCHECK,

    SELECT    = EVBACKEND_SELECT,
    POLL      = EVBACKEND_POLL,
    EPOLL     = EVBACKEND_EPOLL,
    KQUEUE    = EVBACKEND_KQUEUE,
    DEVPOLL   = EVBACKEND_DEVPOLL,
    PORT      = EVBACKEND_PORT
  };

  enum
  {
#if EV_COMPAT3
    NONBLOCK = EVLOOP_NONBLOCK,
    ONESHOT  = EVLOOP_ONESHOT,
#endif
    NOWAIT   = EVRUN_NOWAIT,
    ONCE     = EVRUN_ONCE
  };

  enum how_t
  {
    ONE = EVBREAK_ONE,
    ALL = EVBREAK_ALL
  };

  struct bad_loop
#if EV_USE_STDEXCEPT
  : std::runtime_error
#endif
  {
#if EV_USE_STDEXCEPT
    bad_loop ()
    : std::runtime_error ("libev event loop cannot be initialized, bad value of LIBEV_FLAGS?")
    {
    }
#endif
  };

#ifdef EV_AX
#  undef EV_AX
#endif

#ifdef EV_AX_
#  undef EV_AX_
#endif

#if EV_MULTIPLICITY
#  define EV_AX  raw_loop
#  define EV_AX_ raw_loop,
#else
#  define EV_AX
#  define EV_AX_
#endif

  struct loop_ref
  {
    loop_ref (EV_P) throw ()
#if EV_MULTIPLICITY
    : EV_AX (EV_A)
#endif
    {
    }

    bool operator == (const loop_ref &other) const throw ()
    {
#if EV_MULTIPLICITY
      return EV_AX == other.EV_AX;
#else
      return true;
#endif
    }

    bool operator != (const loop_ref &other) const throw ()
    {
#if EV_MULTIPLICITY
      return ! (*this == other);
#else
      return false;
#endif
    }

#if EV_MULTIPLICITY
    bool operator == (const EV_P) const throw ()
    {
      return this->EV_AX == EV_A;
    }

    bool operator != (const EV_P) const throw ()
    {
      return ! (*this == EV_A);
    }

    operator struct ev_loop * () const throw ()
    {
      return EV_AX;
    }

    operator const struct ev_loop * () const throw ()
    {
      return EV_AX;
    }

    bool is_default () const throw ()
    {
      return EV_AX == ev_default_loop (0);
    }
#endif

#if EV_COMPAT3
    void loop (int flags = 0)
    {
      ev_run (EV_AX_ flags);
    }

    void unloop (how_t how = ONE) throw ()
    {
      ev_break (EV_AX_ how);
    }
#endif

    void run (int flags = 0)
    {
      ev_run (EV_AX_ flags);
    }

    void break_loop (how_t how = ONE) throw ()
    {
      ev_break (EV_AX_ how);
    }

    void post_fork () throw ()
    {
      ev_loop_fork (EV_AX);
    }

    unsigned int backend () const throw ()
    {
      return ev_backend (EV_AX);
    }

    tstamp now () const throw ()
    {
      return ev_now (EV_AX);
    }

    void ref () throw ()
    {
      ev_ref (EV_AX);
    }

    void unref () throw ()
    {
      ev_unref (EV_AX);
    }

#if EV_FEATURE_API
    unsigned int iteration () const throw ()
    {
      return ev_iteration (EV_AX);
    }

    unsigned int depth () const throw ()
    {
      return ev_depth (EV_AX);
    }

    void set_io_collect_interval (tstamp interval) throw ()
    {
      ev_set_io_collect_interval (EV_AX_ interval);
    }

    void set_timeout_collect_interval (tstamp interval) throw ()
    {
      ev_set_timeout_collect_interval (EV_AX_ interval);
    }
#endif

    // function callback
    void once (int fd, int events, tstamp timeout, void (*cb)(int, void *), void *arg = 0) throw ()
    {
      ev_once (EV_AX_ fd, events, timeout, cb, arg);
    }

    // method callback
    template<class K, void (K::*method)(int)>
    void once (int fd, int events, tstamp timeout, K *object) throw ()
    {
      once (fd, events, timeout, method_thunk<K, method>, object);
    }

    // default method == operator ()
    template<class K>
    void once (int fd, int events, tstamp timeout, K *object) throw ()
    {
      once (fd, events, timeout, method_thunk<K, &K::operator ()>, object);
    }

    template<class K, void (K::*method)(int)>
    static void method_thunk (int revents, void *arg)
    {
      (static_cast<K *>(arg)->*method)
        (revents);
    }

    // no-argument method callback
    template<class K, void (K::*method)()>
    void once (int fd, int events, tstamp timeout, K *object) throw ()
    {
      once (fd, events, timeout, method_noargs_thunk<K, method>, object);
    }

    template<class K, void (K::*method)()>
    static void method_noargs_thunk (int revents, void *arg)
    {
      (static_cast<K *>(arg)->*method)
        ();
    }

    // simpler function callback
    template<void (*cb)(int)>
    void once (int fd, int events, tstamp timeout) throw ()
    {
      once (fd, events, timeout, simpler_func_thunk<cb>);
    }

    template<void (*cb)(int)>
    static void simpler_func_thunk (int revents, void *arg)
    {
      (*cb)
        (revents);
    }

    // simplest function callback
    template<void (*cb)()>
    void once (int fd, int events, tstamp timeout) throw ()
    {
      once (fd, events, timeout, simplest_func_thunk<cb>);
    }

    template<void (*cb)()>
    static void simplest_func_thunk (int revents, void *arg)
    {
      (*cb)
        ();
    }

    void feed_fd_event (int fd, int revents) throw ()
    {
      ev_feed_fd_event (EV_AX_ fd, revents);
    }

    void feed_signal_event (int signum) throw ()
    {
      ev_feed_signal_event (EV_AX_ signum);
    }

#if EV_MULTIPLICITY
    struct ev_loop* EV_AX;
#endif

  };

#if EV_MULTIPLICITY
  struct dynamic_loop : loop_ref
  {

    dynamic_loop (unsigned int flags = AUTO) throw (bad_loop)
    : loop_ref (ev_loop_new (flags))
    {
      if (!EV_AX)
        throw bad_loop ();
    }

    ~dynamic_loop () throw ()
    {
      ev_loop_destroy (EV_AX);
      EV_AX = 0;
    }

  private:

    dynamic_loop (const dynamic_loop &);

    dynamic_loop & operator= (const dynamic_loop &);

  };
#endif

  struct default_loop : loop_ref
  {
    default_loop (unsigned int flags = AUTO) throw (bad_loop)
#if EV_MULTIPLICITY
    : loop_ref (ev_default_loop (flags))
#endif
    {
      if (
#if EV_MULTIPLICITY
          !EV_AX
#else
          !ev_default_loop (flags)
#endif
      )
        throw bad_loop ();
    }

  private:
    default_loop (const default_loop &);
    default_loop &operator = (const default_loop &);
  };

  inline loop_ref get_default_loop () throw ()
  {
#if EV_MULTIPLICITY
    return ev_default_loop (0);
#else
    return loop_ref ();
#endif
  }

#undef EV_AX
#undef EV_AX_

#undef EV_PX
#undef EV_PX_
#if EV_MULTIPLICITY
#  define EV_PX  loop_ref EV_A
#  define EV_PX_ loop_ref EV_A_
#else
#  define EV_PX
#  define EV_PX_
#endif

  template<class ev_watcher, class watcher>
  struct base : ev_watcher
  {
    #if EV_MULTIPLICITY
      EV_PX;

      // loop set
      void set (EV_P) throw ()
      {
        this->EV_A = EV_A;
      }
    #endif

    base (EV_PX) throw ()
    #if EV_MULTIPLICITY
      : EV_A (EV_A)
    #endif
    {
      ev_init (this, 0);
    }

    void set_ (const void *data, void (*cb)(EV_P_ ev_watcher *w, int revents)) throw ()
    {
      this->data = (void *)data;
      ev_set_cb (static_cast<ev_watcher *>(this), cb);
    }

    // function callback
    template<void (*function)(watcher &w, int)>
    void set (void *data = 0) throw ()
    {
      set_ (data, function_thunk<function>);
    }

    template<void (*function)(watcher &w, int)>
    static void function_thunk (EV_P_ ev_watcher *w, int revents)
    {
      function
        (*static_cast<watcher *>(w), revents);
    }

    // method callback
    template<class K, void (K::*method)(watcher &w, int)>
    void set (K *object) throw ()
    {
      set_ (object, method_thunk<K, method>);
    }

    // default method == operator ()
    template<class K>
    void set (K *object) throw ()
    {
      set_ (object, method_thunk<K, &K::operator ()>);
    }

    template<class K, void (K::*method)(watcher &w, int)>
    static void method_thunk (EV_P_ ev_watcher *w, int revents)
    {
      (static_cast<K *>(w->data)->*method)
        (*static_cast<watcher *>(w), revents);
    }

    // no-argument callback
    template<class K, void (K::*method)()>
    void set (K *object) throw ()
    {
      set_ (object, method_noargs_thunk<K, method>);
    }

    template<class K, void (K::*method)()>
    static void method_noargs_thunk (EV_P_ ev_watcher *w, int revents)
    {
      (static_cast<K *>(w->data)->*method)
        ();
    }

    void operator ()(int events = EV_UNDEF)
    {
      return
        ev_cb (static_cast<ev_watcher *>(this))
          (static_cast<ev_watcher *>(this), events);
    }

    bool is_active () const throw ()
    {
      return ev_is_active (static_cast<const ev_watcher *>(this));
    }

    bool is_pending () const throw ()
    {
      return ev_is_pending (static_cast<const ev_watcher *>(this));
    }

    void feed_event (int revents) throw ()
    {
      ev_feed_event (EV_A_ static_cast<ev_watcher *>(this), revents);
    }
  };

  inline tstamp now (EV_P) throw ()
  {
    return ev_now (EV_A);
  }

  inline void delay (tstamp interval) throw ()
  {
    ev_sleep (interval);
  }

  inline int version_major () throw ()
  {
    return ev_version_major ();
  }

  inline int version_minor () throw ()
  {
    return ev_version_minor ();
  }

  inline unsigned int supported_backends () throw ()
  {
    return ev_supported_backends ();
  }

  inline unsigned int recommended_backends () throw ()
  {
    return ev_recommended_backends ();
  }

  inline unsigned int embeddable_backends () throw ()
  {
    return ev_embeddable_backends ();
  }

  inline void set_allocator (void *(*cb)(void *ptr, long size) throw ()) throw ()
  {
    ev_set_allocator (cb);
  }

  inline void set_syserr_cb (void (*cb)(const char *msg) throw ()) throw ()
  {
    ev_set_syserr_cb (cb);
  }

  #if EV_MULTIPLICITY
    #define EV_CONSTRUCT(cppstem,cstem)	                                                \
      (EV_PX = get_default_loop ()) throw ()                                            \
        : base<ev_ ## cstem, cppstem> (EV_A)                                            \
      {                                                                                 \
      }
  #else
    #define EV_CONSTRUCT(cppstem,cstem)                                                 \
      () throw ()                                                                       \
      {                                                                                 \
      }
  #endif

  /* using a template here would require quite a few more lines,
   * so a macro solution was chosen */
  #define EV_BEGIN_WATCHER(cppstem,cstem)	                                        \
                                                                                        \
  struct cppstem : base<ev_ ## cstem, cppstem>                                          \
  {                                                                                     \
    void start () throw ()                                                              \
    {                                                                                   \
      ev_ ## cstem ## _start (EV_A_ static_cast<ev_ ## cstem *>(this));                 \
    }                                                                                   \
                                                                                        \
    void stop () throw ()                                                               \
    {                                                                                   \
      ev_ ## cstem ## _stop (EV_A_ static_cast<ev_ ## cstem *>(this));                  \
    }                                                                                   \
                                                                                        \
    cppstem EV_CONSTRUCT(cppstem,cstem)                                                 \
                                                                                        \
    ~cppstem () throw ()                                                                \
    {                                                                                   \
      stop ();                                                                          \
    }                                                                                   \
                                                                                        \
    using base<ev_ ## cstem, cppstem>::set;                                             \
                                                                                        \
  private:                                                                              \
                                                                                        \
    cppstem (const cppstem &o);                                                         \
                                                                                        \
    cppstem &operator =(const cppstem &o);                                              \
                                                                                        \
  public:

  #define EV_END_WATCHER(cppstem,cstem)	                                                \
  };

  EV_BEGIN_WATCHER (io, io)
    void set (int fd, int events) throw ()
    {
      int active = is_active ();
      if (active) stop ();
      ev_io_set (static_cast<ev_io *>(this), fd, events);
      if (active) start ();
    }

    void set (int events) throw ()
    {
      int active = is_active ();
      if (active) stop ();
      ev_io_set (static_cast<ev_io *>(this), fd, events);
      if (active) start ();
    }

    void start (int fd, int events) throw ()
    {
      set (fd, events);
      start ();
    }
  EV_END_WATCHER (io, io)

  EV_BEGIN_WATCHER (timer, timer)
    void set (ev_tstamp after, ev_tstamp repeat = 0.) throw ()
    {
      int active = is_active ();
      if (active) stop ();
      ev_timer_set (static_cast<ev_timer *>(this), after, repeat);
      if (active) start ();
    }

    void start (ev_tstamp after, ev_tstamp repeat = 0.) throw ()
    {
      set (after, repeat);
      start ();
    }

    void again () throw ()
    {
      ev_timer_again (EV_A_ static_cast<ev_timer *>(this));
    }

    ev_tstamp remaining ()
    {
      return ev_timer_remaining (EV_A_ static_cast<ev_timer *>(this));
    }
  EV_END_WATCHER (timer, timer)

  #if EV_PERIODIC_ENABLE
  EV_BEGIN_WATCHER (periodic, periodic)
    void set (ev_tstamp at, ev_tstamp interval = 0.) throw ()
    {
      int active = is_active ();
      if (active) stop ();
      ev_periodic_set (static_cast<ev_periodic *>(this), at, interval, 0);
      if (active) start ();
    }

    void start (ev_tstamp at, ev_tstamp interval = 0.) throw ()
    {
      set (at, interval);
      start ();
    }

    void again () throw ()
    {
      ev_periodic_again (EV_A_ static_cast<ev_periodic *>(this));
    }
  EV_END_WATCHER (periodic, periodic)
  #endif

  #if EV_SIGNAL_ENABLE
  EV_BEGIN_WATCHER (sig, signal)
    void set (int signum) throw ()
    {
      int active = is_active ();
      if (active) stop ();
      ev_signal_set (static_cast<ev_signal *>(this), signum);
      if (active) start ();
    }

    void start (int signum) throw ()
    {
      set (signum);
      start ();
    }
  EV_END_WATCHER (sig, signal)
  #endif

  #if EV_CHILD_ENABLE
  EV_BEGIN_WATCHER (child, child)
    void set (int pid, int trace = 0) throw ()
    {
      int active = is_active ();
      if (active) stop ();
      ev_child_set (static_cast<ev_child *>(this), pid, trace);
      if (active) start ();
    }

    void start (int pid, int trace = 0) throw ()
    {
      set (pid, trace);
      start ();
    }
  EV_END_WATCHER (child, child)
  #endif

  #if EV_STAT_ENABLE
  EV_BEGIN_WATCHER (stat, stat)
    void set (const char *path, ev_tstamp interval = 0.) throw ()
    {
      int active = is_active ();
      if (active) stop ();
      ev_stat_set (static_cast<ev_stat *>(this), path, interval);
      if (active) start ();
    }

    void start (const char *path, ev_tstamp interval = 0.) throw ()
    {
      stop ();
      set (path, interval);
      start ();
    }

    void update () throw ()
    {
      ev_stat_stat (EV_A_ static_cast<ev_stat *>(this));
    }
  EV_END_WATCHER (stat, stat)
  #endif

  #if EV_IDLE_ENABLE
  EV_BEGIN_WATCHER (idle, idle)
    void set () throw () { }
  EV_END_WATCHER (idle, idle)
  #endif

  #if EV_PREPARE_ENABLE
  EV_BEGIN_WATCHER (prepare, prepare)
    void set () throw () { }
  EV_END_WATCHER (prepare, prepare)
  #endif

  #if EV_CHECK_ENABLE
  EV_BEGIN_WATCHER (check, check)
    void set () throw () { }
  EV_END_WATCHER (check, check)
  #endif

  #if EV_EMBED_ENABLE
  EV_BEGIN_WATCHER (embed, embed)
    void set_embed (struct ev_loop *embedded_loop) throw ()
    {
      int active = is_active ();
      if (active) stop ();
      ev_embed_set (static_cast<ev_embed *>(this), embedded_loop);
      if (active) start ();
    }

    void start (struct ev_loop *embedded_loop) throw ()
    {
      set (embedded_loop);
      start ();
    }

    void sweep ()
    {
      ev_embed_sweep (EV_A_ static_cast<ev_embed *>(this));
    }
  EV_END_WATCHER (embed, embed)
  #endif

  #if EV_FORK_ENABLE
  EV_BEGIN_WATCHER (fork, fork)
    void set () throw () { }
  EV_END_WATCHER (fork, fork)
  #endif

  #if EV_ASYNC_ENABLE
  EV_BEGIN_WATCHER (async, async)
    void send () throw ()
    {
      ev_async_send (EV_A_ static_cast<ev_async *>(this));
    }

    bool async_pending () throw ()
    {
      return ev_async_pending (static_cast<ev_async *>(this));
    }
  EV_END_WATCHER (async, async)
  #endif

  #undef EV_PX
  #undef EV_PX_
  #undef EV_CONSTRUCT
  #undef EV_BEGIN_WATCHER
  #undef EV_END_WATCHER
}

#endif


================================================
FILE: libev/ev.3
================================================
.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.29)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{
.    if \nF \{
.        de IX
.        tm Index:\\$1\t\\n%\t"\\$2"
..
.        if !\nF==2 \{
.            nr % 0
.            nr F 2
.        \}
.    \}
.\}
.rr rF
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "LIBEV 3"
.TH LIBEV 3 "2017-06-21" "libev-4.24" "libev - high performance full featured event loop"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
libev \- a high performance full\-featured event loop written in C
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
.Vb 1
\&   #include <ev.h>
.Ve
.SS "\s-1EXAMPLE PROGRAM\s0"
.IX Subsection "EXAMPLE PROGRAM"
.Vb 2
\&   // a single header file is required
\&   #include <ev.h>
\&
\&   #include <stdio.h> // for puts
\&
\&   // every watcher type has its own typedef\*(Aqd struct
\&   // with the name ev_TYPE
\&   ev_io stdin_watcher;
\&   ev_timer timeout_watcher;
\&
\&   // all watcher callbacks have a similar signature
\&   // this callback is called when data is readable on stdin
\&   static void
\&   stdin_cb (EV_P_ ev_io *w, int revents)
\&   {
\&     puts ("stdin ready");
\&     // for one\-shot events, one must manually stop the watcher
\&     // with its corresponding stop function.
\&     ev_io_stop (EV_A_ w);
\&
\&     // this causes all nested ev_run\*(Aqs to stop iterating
\&     ev_break (EV_A_ EVBREAK_ALL);
\&   }
\&
\&   // another callback, this time for a time\-out
\&   static void
\&   timeout_cb (EV_P_ ev_timer *w, int revents)
\&   {
\&     puts ("timeout");
\&     // this causes the innermost ev_run to stop iterating
\&     ev_break (EV_A_ EVBREAK_ONE);
\&   }
\&
\&   int
\&   main (void)
\&   {
\&     // use the default event loop unless you have special needs
\&     struct ev_loop *loop = EV_DEFAULT;
\&
\&     // initialise an io watcher, then start it
\&     // this one will watch for stdin to become readable
\&     ev_io_init (&stdin_watcher, stdin_cb, /*STDIN_FILENO*/ 0, EV_READ);
\&     ev_io_start (loop, &stdin_watcher);
\&
\&     // initialise a timer watcher, then start it
\&     // simple non\-repeating 5.5 second timeout
\&     ev_timer_init (&timeout_watcher, timeout_cb, 5.5, 0.);
\&     ev_timer_start (loop, &timeout_watcher);
\&
\&     // now wait for events to arrive
\&     ev_run (loop, 0);
\&
\&     // break was called, so exit
\&     return 0;
\&   }
.Ve
.SH "ABOUT THIS DOCUMENT"
.IX Header "ABOUT THIS DOCUMENT"
This document documents the libev software package.
.PP
The newest version of this document is also available as an html-formatted
web page you might find easier to navigate when reading it for the first
time: <http://pod.tst.eu/http://cvs.schmorp.de/libev/ev.pod>.
.PP
While this document tries to be as complete as possible in documenting
libev, its usage and the rationale behind its design, it is not a tutorial
on event-based programming, nor will it introduce event-based programming
with libev.
.PP
Familiarity with event based programming techniques in general is assumed
throughout this document.
.SH "WHAT TO READ WHEN IN A HURRY"
.IX Header "WHAT TO READ WHEN IN A HURRY"
This manual tries to be very detailed, but unfortunately, this also makes
it very long. If you just want to know the basics of libev, I suggest
reading \*(L"\s-1ANATOMY OF A WATCHER\*(R"\s0, then the \*(L"\s-1EXAMPLE PROGRAM\*(R"\s0 above and
look up the missing functions in \*(L"\s-1GLOBAL FUNCTIONS\*(R"\s0 and the \f(CW\*(C`ev_io\*(C'\fR and
\&\f(CW\*(C`ev_timer\*(C'\fR sections in \*(L"\s-1WATCHER TYPES\*(R"\s0.
.SH "ABOUT LIBEV"
.IX Header "ABOUT LIBEV"
Libev is an event loop: you register interest in certain events (such as a
file descriptor being readable or a timeout occurring), and it will manage
these event sources and provide your program with events.
.PP
To do this, it must take more or less complete control over your process
(or thread) by executing the \fIevent loop\fR handler, and will then
communicate events via a callback mechanism.
.PP
You register interest in certain events by registering so-called \fIevent
watchers\fR, which are relatively small C structures you initialise with the
details of the event, and then hand it over to libev by \fIstarting\fR the
watcher.
.SS "\s-1FEATURES\s0"
.IX Subsection "FEATURES"
Libev supports \f(CW\*(C`select\*(C'\fR, \f(CW\*(C`poll\*(C'\fR, the Linux-specific \f(CW\*(C`epoll\*(C'\fR, the
BSD-specific \f(CW\*(C`kqueue\*(C'\fR and the Solaris-specific event port mechanisms
for file descriptor events (\f(CW\*(C`ev_io\*(C'\fR), the Linux \f(CW\*(C`inotify\*(C'\fR interface
(for \f(CW\*(C`ev_stat\*(C'\fR), Linux eventfd/signalfd (for faster and cleaner
inter-thread wakeup (\f(CW\*(C`ev_async\*(C'\fR)/signal handling (\f(CW\*(C`ev_signal\*(C'\fR)) relative
timers (\f(CW\*(C`ev_timer\*(C'\fR), absolute timers with customised rescheduling
(\f(CW\*(C`ev_periodic\*(C'\fR), synchronous signals (\f(CW\*(C`ev_signal\*(C'\fR), process status
change events (\f(CW\*(C`ev_child\*(C'\fR), and event watchers dealing with the event
loop mechanism itself (\f(CW\*(C`ev_idle\*(C'\fR, \f(CW\*(C`ev_embed\*(C'\fR, \f(CW\*(C`ev_prepare\*(C'\fR and
\&\f(CW\*(C`ev_check\*(C'\fR watchers) as well as file watchers (\f(CW\*(C`ev_stat\*(C'\fR) and even
limited support for fork events (\f(CW\*(C`ev_fork\*(C'\fR).
.PP
It also is quite fast (see this
benchmark <http://libev.schmorp.de/bench.html> comparing it to libevent
for example).
.SS "\s-1CONVENTIONS\s0"
.IX Subsection "CONVENTIONS"
Libev is very configurable. In this manual the default (and most common)
configuration will be described, which supports multiple event loops. For
more info about various configuration options please have a look at
\&\fB\s-1EMBED\s0\fR section in this manual. If libev was configured without support
for multiple event loops, then all functions taking an initial argument of
name \f(CW\*(C`loop\*(C'\fR (which is always of type \f(CW\*(C`struct ev_loop *\*(C'\fR) will not have
this argument.
.SS "\s-1TIME REPRESENTATION\s0"
.IX Subsection "TIME REPRESENTATION"
Libev represents time as a single floating point number, representing
the (fractional) number of seconds since the (\s-1POSIX\s0) epoch (in practice
somewhere near the beginning of 1970, details are complicated, don't
ask). This type is called \f(CW\*(C`ev_tstamp\*(C'\fR, which is what you should use
too. It usually aliases to the \f(CW\*(C`double\*(C'\fR type in C. When you need to do
any calculations on it, you should treat it as some floating point value.
.PP
Unlike the name component \f(CW\*(C`stamp\*(C'\fR might indicate, it is also used for
time differences (e.g. delays) throughout libev.
.SH "ERROR HANDLING"
.IX Header "ERROR HANDLING"
Libev knows three classes of errors: operating system errors, usage errors
and internal errors (bugs).
.PP
When libev catches an operating system error it cannot handle (for example
a system call indicating a condition libev cannot fix), it calls the callback
set via \f(CW\*(C`ev_set_syserr_cb\*(C'\fR, which is supposed to fix the problem or
abort. The default is to print a diagnostic message and to call \f(CW\*(C`abort
()\*(C'\fR.
.PP
When libev detects a usage error such as a negative timer interval, then
it will print a diagnostic message and abort (via the \f(CW\*(C`assert\*(C'\fR mechanism,
so \f(CW\*(C`NDEBUG\*(C'\fR will disable this checking): these are programming errors in
the libev caller and need to be fixed there.
.PP
Libev also has a few internal error-checking \f(CW\*(C`assert\*(C'\fRions, and also has
extensive consistency checking code. These do not trigger under normal
circumstances, as they indicate either a bug in libev or worse.
.SH "GLOBAL FUNCTIONS"
.IX Header "GLOBAL FUNCTIONS"
These functions can be called anytime, even before initialising the
library in any way.
.IP "ev_tstamp ev_time ()" 4
.IX Item "ev_tstamp ev_time ()"
Returns the current time as libev would use it. Please note that the
\&\f(CW\*(C`ev_now\*(C'\fR function is usually faster and also often returns the timestamp
you actually want to know. Also interesting is the combination of
\&\f(CW\*(C`ev_now_update\*(C'\fR and \f(CW\*(C`ev_now\*(C'\fR.
.IP "ev_sleep (ev_tstamp interval)" 4
.IX Item "ev_sleep (ev_tstamp interval)"
Sleep for the given interval: The current thread will be blocked
until either it is interrupted or the given time interval has
passed (approximately \- it might return a bit earlier even if not
interrupted). Returns immediately if \f(CW\*(C`interval <= 0\*(C'\fR.
.Sp
Basically this is a sub-second-resolution \f(CW\*(C`sleep ()\*(C'\fR.
.Sp
The range of the \f(CW\*(C`interval\*(C'\fR is limited \- libev only guarantees to work
with sleep times of up to one day (\f(CW\*(C`interval <= 86400\*(C'\fR).
.IP "int ev_version_major ()" 4
.IX Item "int ev_version_major ()"
.PD 0
.IP "int ev_version_minor ()" 4
.IX Item "int ev_version_minor ()"
.PD
You can find out the major and minor \s-1ABI\s0 version numbers of the library
you linked against by calling the functions \f(CW\*(C`ev_version_major\*(C'\fR and
\&\f(CW\*(C`ev_version_minor\*(C'\fR. If you want, you can compare against the global
symbols \f(CW\*(C`EV_VERSION_MAJOR\*(C'\fR and \f(CW\*(C`EV_VERSION_MINOR\*(C'\fR, which specify the
version of the library your program was compiled against.
.Sp
These version numbers refer to the \s-1ABI\s0 version of the library, not the
release version.
.Sp
Usually, it's a good idea to terminate if the major versions mismatch,
as this indicates an incompatible change. Minor versions are usually
compatible to older versions, so a larger minor version alone is usually
not a problem.
.Sp
Example: Make sure we haven't accidentally been linked against the wrong
version (note, however, that this will not detect other \s-1ABI\s0 mismatches,
such as \s-1LFS\s0 or reentrancy).
.Sp
.Vb 3
\&   assert (("libev version mismatch",
\&            ev_version_major () == EV_VERSION_MAJOR
\&            && ev_version_minor () >= EV_VERSION_MINOR));
.Ve
.IP "unsigned int ev_supported_backends ()" 4
.IX Item "unsigned int ev_supported_backends ()"
Return the set of all backends (i.e. their corresponding \f(CW\*(C`EV_BACKEND_*\*(C'\fR
value) compiled into this binary of libev (independent of their
availability on the system you are running on). See \f(CW\*(C`ev_default_loop\*(C'\fR for
a description of the set values.
.Sp
Example: make sure we have the epoll method, because yeah this is cool and
a must have and can we have a torrent of it please!!!11
.Sp
.Vb 2
\&   assert (("sorry, no epoll, no sex",
\&            ev_supported_backends () & EVBACKEND_EPOLL));
.Ve
.IP "unsigned int ev_recommended_backends ()" 4
.IX Item "unsigned int ev_recommended_backends ()"
Return the set of all backends compiled into this binary of libev and
also recommended for this platform, meaning it will work for most file
descriptor types. This set is often smaller than the one returned by
\&\f(CW\*(C`ev_supported_backends\*(C'\fR, as for example kqueue is broken on most BSDs
and will not be auto-detected unless you explicitly request it (assuming
you know what you are doing). This is the set of backends that libev will
probe for if you specify no backends explicitly.
.IP "unsigned int ev_embeddable_backends ()" 4
.IX Item "unsigned int ev_embeddable_backends ()"
Returns the set of backends that are embeddable in other event loops. This
value is platform-specific but can include backends not available on the
current system. To find which embeddable backends might be supported on
the current system, you would need to look at \f(CW\*(C`ev_embeddable_backends ()
& ev_supported_backends ()\*(C'\fR, likewise for recommended ones.
.Sp
See the description of \f(CW\*(C`ev_embed\*(C'\fR watchers for more info.
.IP "ev_set_allocator (void *(*cb)(void *ptr, long size) throw ())" 4
.IX Item "ev_set_allocator (void *(*cb)(void *ptr, long size) throw ())"
Sets the allocation function to use (the prototype is similar \- the
semantics are identical to the \f(CW\*(C`realloc\*(C'\fR C89/SuS/POSIX function). It is
used to allocate and free memory (no surprises here). If it returns zero
when memory needs to be allocated (\f(CW\*(C`size != 0\*(C'\fR), the library might abort
or take some potentially destructive action.
.Sp
Since some systems (at least OpenBSD and Darwin) fail to implement
correct \f(CW\*(C`realloc\*(C'\fR semantics, libev will use a wrapper around the system
\&\f(CW\*(C`realloc\*(C'\fR and \f(CW\*(C`free\*(C'\fR functions by default.
.Sp
You could override this function in high-availability programs to, say,
free some memory if it cannot allocate memory, to use a special allocator,
or even to sleep a while and retry until some memory is available.
.Sp
Example: Replace the libev allocator with one that waits a bit and then
retries (example requires a standards-compliant \f(CW\*(C`realloc\*(C'\fR).
.Sp
.Vb 6
\&   static void *
\&   persistent_realloc (void *ptr, size_t size)
\&   {
\&     for (;;)
\&       {
\&         void *newptr = realloc (ptr, size);
\&
\&         if (newptr)
\&           return newptr;
\&
\&         sleep (60);
\&       }
\&   }
\&
\&   ...
\&   ev_set_allocator (persistent_realloc);
.Ve
.IP "ev_set_syserr_cb (void (*cb)(const char *msg) throw ())" 4
.IX Item "ev_set_syserr_cb (void (*cb)(const char *msg) throw ())"
Set the callback function to call on a retryable system call error (such
as failed select, poll, epoll_wait). The message is a printable string
indicating the system call or subsystem causing the problem. If this
callback is set, then libev will expect it to remedy the situation, no
matter what, when it returns. That is, libev will generally retry the
requested operation, or, if the condition doesn't go away, do bad stuff
(such as abort).
.Sp
Example: This is basically the same thing that libev does internally, too.
.Sp
.Vb 6
\&   static void
\&   fatal_error (const char *msg)
\&   {
\&     perror (msg);
\&     abort ();
\&   }
\&
\&   ...
\&   ev_set_syserr_cb (fatal_error);
.Ve
.IP "ev_feed_signal (int signum)" 4
.IX Item "ev_feed_signal (int signum)"
This function can be used to \*(L"simulate\*(R" a signal receive. It is completely
safe to call this function at any time, from any context, including signal
handlers or random threads.
.Sp
Its main use is to customise signal handling in your process, especially
in the presence of threads. For example, you could block signals
by default in all threads (and specifying \f(CW\*(C`EVFLAG_NOSIGMASK\*(C'\fR when
creating any loops), and in one thread, use \f(CW\*(C`sigwait\*(C'\fR or any other
mechanism to wait for signals, then \*(L"deliver\*(R" them to libev by calling
\&\f(CW\*(C`ev_feed_signal\*(C'\fR.
.SH "FUNCTIONS CONTROLLING EVENT LOOPS"
.IX Header "FUNCTIONS CONTROLLING EVENT LOOPS"
An event loop is described by a \f(CW\*(C`struct ev_loop *\*(C'\fR (the \f(CW\*(C`struct\*(C'\fR is
\&\fInot\fR optional in this case unless libev 3 compatibility is disabled, as
libev 3 had an \f(CW\*(C`ev_loop\*(C'\fR function colliding with the struct name).
.PP
The library knows two types of such loops, the \fIdefault\fR loop, which
supports child process events, and dynamically created event loops which
do not.
.IP "struct ev_loop *ev_default_loop (unsigned int flags)" 4
.IX Item "struct ev_loop *ev_default_loop (unsigned int flags)"
This returns the \*(L"default\*(R" event loop object, which is what you should
normally use when you just need \*(L"the event loop\*(R". Event loop objects and
the \f(CW\*(C`flags\*(C'\fR parameter are described in more detail in the entry for
\&\f(CW\*(C`ev_loop_new\*(C'\fR.
.Sp
If the default loop is already initialised then this function simply
returns it (and ignores the flags. If that is troubling you, check
\&\f(CW\*(C`ev_backend ()\*(C'\fR afterwards). Otherwise it will create it with the given
flags, which should almost always be \f(CW0\fR, unless the caller is also the
one calling \f(CW\*(C`ev_run\*(C'\fR or otherwise qualifies as \*(L"the main program\*(R".
.Sp
If you don't know what event loop to use, use the one returned from this
function (or via the \f(CW\*(C`EV_DEFAULT\*(C'\fR macro).
.Sp
Note that this function is \fInot\fR thread-safe, so if you want to use it
from multiple threads, you have to employ some kind of mutex (note also
that this case is unlikely, as loops cannot be shared easily between
threads anyway).
.Sp
The default loop is the only loop that can handle \f(CW\*(C`ev_child\*(C'\fR watchers,
and to do this, it always registers a handler for \f(CW\*(C`SIGCHLD\*(C'\fR. If this is
a problem for your application you can either create a dynamic loop with
\&\f(CW\*(C`ev_loop_new\*(C'\fR which doesn't do that, or you can simply overwrite the
\&\f(CW\*(C`SIGCHLD\*(C'\fR signal handler \fIafter\fR calling \f(CW\*(C`ev_default_init\*(C'\fR.
.Sp
Example: This is the most typical usage.
.Sp
.Vb 2
\&   if (!ev_default_loop (0))
\&     fatal ("could not initialise libev, bad $LIBEV_FLAGS in environment?");
.Ve
.Sp
Example: Restrict libev to the select and poll backends, and do not allow
environment settings to be taken into account:
.Sp
.Vb 1
\&   ev_default_loop (EVBACKEND_POLL | EVBACKEND_SELECT | EVFLAG_NOENV);
.Ve
.IP "struct ev_loop *ev_loop_new (unsigned int flags)" 4
.IX Item "struct ev_loop *ev_loop_new (unsigned int flags)"
This will create and initialise a new event loop object. If the loop
could not be initialised, returns false.
.Sp
This function is thread-safe, and one common way to use libev with
threads is indeed to create one loop per thread, and using the default
loop in the \*(L"main\*(R" or \*(L"initial\*(R" thread.
.Sp
The flags argument can be used to specify special behaviour or specific
backends to use, and is usually specified as \f(CW0\fR (or \f(CW\*(C`EVFLAG_AUTO\*(C'\fR).
.Sp
The following flags are supported:
.RS 4
.ie n .IP """EVFLAG_AUTO""" 4
.el .IP "\f(CWEVFLAG_AUTO\fR" 4
.IX Item "EVFLAG_AUTO"
The default flags value. Use this if you have no clue (it's the right
thing, believe me).
.ie n .IP """EVFLAG_NOENV""" 4
.el .IP "\f(CWEVFLAG_NOENV\fR" 4
.IX Item "EVFLAG_NOENV"
If this flag bit is or'ed into the flag value (or the program runs setuid
or setgid) then libev will \fInot\fR look at the environment variable
\&\f(CW\*(C`LIBEV_FLAGS\*(C'\fR. Otherwise (the default), this environment variable will
override the flags completely if it is found in the environment. This is
useful to try out specific backends to test their performance, to work
around bugs, or to make libev threadsafe (accessing environment variables
cannot be done in a threadsafe way, but usually it works if no other
thread modifies them).
.ie n .IP """EVFLAG_FORKCHECK""" 4
.el .IP "\f(CWEVFLAG_FORKCHECK\fR" 4
.IX Item "EVFLAG_FORKCHECK"
Instead of calling \f(CW\*(C`ev_loop_fork\*(C'\fR manually after a fork, you can also
make libev check for a fork in each iteration by enabling this flag.
.Sp
This works by calling \f(CW\*(C`getpid ()\*(C'\fR on every iteration of the loop,
and thus this might slow down your event loop if you do a lot of loop
iterations and little real work, but is usually not noticeable (on my
GNU/Linux system for example, \f(CW\*(C`getpid\*(C'\fR is actually a simple 5\-insn sequence
without a system call and thus \fIvery\fR fast, but my GNU/Linux system also has
\&\f(CW\*(C`pthread_atfork\*(C'\fR which is even faster).
.Sp
The big advantage of this flag is that you can forget about fork (and
forget about forgetting to tell libev about forking, although you still
have to ignore \f(CW\*(C`SIGPIPE\*(C'\fR) when you use this flag.
.Sp
This flag setting cannot be overridden or specified in the \f(CW\*(C`LIBEV_FLAGS\*(C'\fR
environment variable.
.ie n .IP """EVFLAG_NOINOTIFY""" 4
.el .IP "\f(CWEVFLAG_NOINOTIFY\fR" 4
.IX Item "EVFLAG_NOINOTIFY"
When this flag is specified, then libev will not attempt to use the
\&\fIinotify\fR \s-1API\s0 for its \f(CW\*(C`ev_stat\*(C'\fR watchers. Apart from debugging and
testing, this flag can be useful to conserve inotify file descriptors, as
otherwise each loop using \f(CW\*(C`ev_stat\*(C'\fR watchers consumes one inotify handle.
.ie n .IP """EVFLAG_SIGNALFD""" 4
.el .IP "\f(CWEVFLAG_SIGNALFD\fR" 4
.IX Item "EVFLAG_SIGNALFD"
When this flag is specified, then libev will attempt to use the
\&\fIsignalfd\fR \s-1API\s0 for its \f(CW\*(C`ev_signal\*(C'\fR (and \f(CW\*(C`ev_child\*(C'\fR) watchers. This \s-1API\s0
delivers signals synchronously, which makes it both faster and might make
it possible to get the queued signal data. It can also simplify signal
handling with threads, as long as you properly block signals in your
threads that are not interested in handling them.
.Sp
Signalfd will not be used by default as this changes your signal mask, and
there are a lot of shoddy libraries and programs (glib's threadpool for
example) that can't properly initialise their signal masks.
.ie n .IP """EVFLAG_NOSIGMASK""" 4
.el .IP "\f(CWEVFLAG_NOSIGMASK\fR" 4
.IX Item "EVFLAG_NOSIGMASK"
When this flag is specified, then libev will avoid to modify the signal
mask. Specifically, this means you have to make sure signals are unblocked
when you want to receive them.
.Sp
This behaviour is useful when you want to do your own signal handling, or
want to handle signals only in specific threads and want to avoid libev
unblocking the signals.
.Sp
It's also required by \s-1POSIX\s0 in a threaded program, as libev calls
\&\f(CW\*(C`sigprocmask\*(C'\fR, whose behaviour is officially unspecified.
.Sp
This flag's behaviour will become the default in future versions of libev.
.ie n .IP """EVBACKEND_SELECT""  (value 1, portable select backend)" 4
.el .IP "\f(CWEVBACKEND_SELECT\fR  (value 1, portable select backend)" 4
.IX Item "EVBACKEND_SELECT (value 1, portable select backend)"
This is your standard \fIselect\fR\|(2) backend. Not \fIcompletely\fR standard, as
libev tries to roll its own fd_set with no limits on the number of fds,
but if that fails, expect a fairly low limit on the number of fds when
using this backend. It doesn't scale too well (O(highest_fd)), but its
usually the fastest backend for a low number of (low-numbered :) fds.
.Sp
To get good performance out of this backend you need a high amount of
parallelism (most of the file descriptors should be busy). If you are
writing a server, you should \f(CW\*(C`accept ()\*(C'\fR in a loop to accept as many
connections as possible during one iteration. You might also want to have
a look at \f(CW\*(C`ev_set_io_collect_interval ()\*(C'\fR to increase the amount of
readiness notifications you get per iteration.
.Sp
This backend maps \f(CW\*(C`EV_READ\*(C'\fR to the \f(CW\*(C`readfds\*(C'\fR set and \f(CW\*(C`EV_WRITE\*(C'\fR to the
\&\f(CW\*(C`writefds\*(C'\fR set (and to work around Microsoft Windows bugs, also onto the
\&\f(CW\*(C`exceptfds\*(C'\fR set on that platform).
.ie n .IP """EVBACKEND_POLL""    (value 2, poll backend, available everywhere except on windows)" 4
.el .IP "\f(CWEVBACKEND_POLL\fR    (value 2, poll backend, available everywhere except on windows)" 4
.IX Item "EVBACKEND_POLL (value 2, poll backend, available everywhere except on windows)"
And this is your standard \fIpoll\fR\|(2) backend. It's more complicated
than select, but handles sparse fds better and has no artificial
limit on the number of fds you can use (except it will slow down
considerably with a lot of inactive fds). It scales similarly to select,
i.e. O(total_fds). See the entry for \f(CW\*(C`EVBACKEND_SELECT\*(C'\fR, above, for
performance tips.
.Sp
This backend maps \f(CW\*(C`EV_READ\*(C'\fR to \f(CW\*(C`POLLIN | POLLERR | POLLHUP\*(C'\fR, and
\&\f(CW\*(C`EV_WRITE\*(C'\fR to \f(CW\*(C`POLLOUT | POLLERR | POLLHUP\*(C'\fR.
.ie n .IP """EVBACKEND_EPOLL""   (value 4, Linux)" 4
.el .IP "\f(CWEVBACKEND_EPOLL\fR   (value 4, Linux)" 4
.IX Item "EVBACKEND_EPOLL (value 4, Linux)"
Use the linux-specific \fIepoll\fR\|(7) interface (for both pre\- and post\-2.6.9
kernels).
.Sp
For few fds, this backend is a bit little slower than poll and select, but
it scales phenomenally better. While poll and select usually scale like
O(total_fds) where total_fds is the total number of fds (or the highest
fd), epoll scales either O(1) or O(active_fds).
.Sp
The epoll mechanism deserves honorable mention as the most misdesigned
of the more advanced event mechanisms: mere annoyances include silently
dropping file descriptors, requiring a system call per change per file
descriptor (and unnecessary guessing of parameters), problems with dup,
returning before the timeout value, resulting in additional iterations
(and only giving 5ms accuracy while select on the same platform gives
0.1ms) and so on. The biggest issue is fork races, however \- if a program
forks then \fIboth\fR parent and child process have to recreate the epoll
set, which can take considerable time (one syscall per file descriptor)
and is of course hard to detect.
.Sp
Epoll is also notoriously buggy \- embedding epoll fds \fIshould\fR work,
but of course \fIdoesn't\fR, and epoll just loves to report events for
totally \fIdifferent\fR file descriptors (even already closed ones, so
one cannot even remove them from the set) than registered in the set
(especially on \s-1SMP\s0 systems). Libev tries to counter these spurious
notifications by employing an additional generation counter and comparing
that against the events to filter out spurious ones, recreating the set
when required. Epoll also erroneously rounds down timeouts, but gives you
no way to know when and by how much, so sometimes you have to busy-wait
because epoll returns immediately despite a nonzero timeout. And last
not least, it also refuses to work with some file descriptors which work
perfectly fine with \f(CW\*(C`select\*(C'\fR (files, many character devices...).
.Sp
Epoll is truly the train wreck among event poll mechanisms, a frankenpoll,
cobbled together in a hurry, no thought to design or interaction with
others. Oh, the pain, will it ever stop...
.Sp
While stopping, setting and starting an I/O watcher in the same iteration
will result in some caching, there is still a system call per such
incident (because the same \fIfile descriptor\fR could point to a different
\&\fIfile description\fR now), so its best to avoid that. Also, \f(CW\*(C`dup ()\*(C'\fR'ed
file descriptors might not work very well if you register events for both
file descriptors.
.Sp
Best performance from this backend is achieved by not unregistering all
watchers for a file descriptor until it has been closed, if possible,
i.e. keep at least one watcher active per fd at all times. Stopping and
starting a watcher (without re-setting it) also usually doesn't cause
extra overhead. A fork can both result in spurious notifications as well
as in libev having to destroy and recreate the epoll object, which can
take considerable time and thus should be avoided.
.Sp
All this means that, in practice, \f(CW\*(C`EVBACKEND_SELECT\*(C'\fR can be as fast or
faster than epoll for maybe up to a hundred file descriptors, depending on
the usage. So sad.
.Sp
While nominally embeddable in other event loops, this feature is broken in
all kernel versions tested so far.
.Sp
This backend maps \f(CW\*(C`EV_READ\*(C'\fR and \f(CW\*(C`EV_WRITE\*(C'\fR in the same way as
\&\f(CW\*(C`EVBACKEND_POLL\*(C'\fR.
.ie n .IP """EVBACKEND_KQUEUE""  (value 8, most \s-1BSD\s0 clones)" 4
.el .IP "\f(CWEVBACKEND_KQUEUE\fR  (value 8, most \s-1BSD\s0 clones)" 4
.IX Item "EVBACKEND_KQUEUE (value 8, most BSD clones)"
Kqueue deserves special mention, as at the time of this writing, it
was broken on all BSDs except NetBSD (usually it doesn't work reliably
with anything but sockets and pipes, except on Darwin, where of course
it's completely useless). Unlike epoll, however, whose brokenness
is by design, these kqueue bugs can (and eventually will) be fixed
without \s-1API\s0 changes to existing programs. For this reason it's not being
\&\*(L"auto-detected\*(R" unless you explicitly specify it in the flags (i.e. using
\&\f(CW\*(C`EVBACKEND_KQUEUE\*(C'\fR) or libev was compiled on a known-to-be-good (\-enough)
system like NetBSD.
.Sp
You still can embed kqueue into a normal poll or select backend and use it
only for sockets (after having made sure that sockets work with kqueue on
the target platform). See \f(CW\*(C`ev_embed\*(C'\fR watchers for more info.
.Sp
It scales in the same way as the epoll backend, but the interface to the
kernel is more efficient (which says nothing about its actual speed, of
course). While stopping, setting and starting an I/O watcher does never
cause an extra system call as with \f(CW\*(C`EVBACKEND_EPOLL\*(C'\fR, it still adds up to
two event changes per incident. Support for \f(CW\*(C`fork ()\*(C'\fR is very bad (you
might have to leak fd's on fork, but it's more sane than epoll) and it
drops fds silently in similarly hard-to-detect cases.
.Sp
This backend usually performs well under most conditions.
.Sp
While nominally embeddable in other event loops, this doesn't work
everywhere, so you might need to test for this. And since it is broken
almost everywhere, you should only use it when you have a lot of sockets
(for which it usually works), by embedding it into another event loop
(e.g. \f(CW\*(C`EVBACKEND_SELECT\*(C'\fR or \f(CW\*(C`EVBACKEND_POLL\*(C'\fR (but \f(CW\*(C`poll\*(C'\fR is of course
also broken on \s-1OS X\s0)) and, did I mention it, using it only for sockets.
.Sp
This backend maps \f(CW\*(C`EV_READ\*(C'\fR into an \f(CW\*(C`EVFILT_READ\*(C'\fR kevent with
\&\f(CW\*(C`NOTE_EOF\*(C'\fR, and \f(CW\*(C`EV_WRITE\*(C'\fR into an \f(CW\*(C`EVFILT_WRITE\*(C'\fR kevent with
\&\f(CW\*(C`NOTE_EOF\*(C'\fR.
.ie n .IP """EVBACKEND_DEVPOLL"" (value 16, Solaris 8)" 4
.el .IP "\f(CWEVBACKEND_DEVPOLL\fR (value 16, Solaris 8)" 4
.IX Item "EVBACKEND_DEVPOLL (value 16, Solaris 8)"
This is not implemented yet (and might never be, unless you send me an
implementation). According to reports, \f(CW\*(C`/dev/poll\*(C'\fR only supports sockets
and is not embeddable, which would limit the usefulness of this backend
immensely.
.ie n .IP """EVBACKEND_PORT""    (value 32, Solaris 10)" 4
.el .IP "\f(CWEVBACKEND_PORT\fR    (value 32, Solaris 10)" 4
.IX Item "EVBACKEND_PORT (value 32, Solaris 10)"
This uses the Solaris 10 event port mechanism. As with everything on Solaris,
it's really slow, but it still scales very well (O(active_fds)).
.Sp
While this backend scales well, it requires one system call per active
file descriptor per loop iteration. For small and medium numbers of file
descriptors a \*(L"slow\*(R" \f(CW\*(C`EVBACKEND_SELECT\*(C'\fR or \f(CW\*(C`EVBACKEND_POLL\*(C'\fR backend
might perform better.
.Sp
On the positive side, this backend actually performed fully to
specification in all tests and is fully embeddable, which is a rare feat
among the OS-specific backends (I vastly prefer correctness over speed
hacks).
.Sp
On the negative side, the interface is \fIbizarre\fR \- so bizarre that
even sun itself gets it wrong in their code examples: The event polling
function sometimes returns events to the caller even though an error
occurred, but with no indication whether it has done so or not (yes, it's
even documented that way) \- deadly for edge-triggered interfaces where you
absolutely have to know whether an event occurred or not because you have
to re-arm the watcher.
.Sp
Fortunately libev seems to be able to work around these idiocies.
.Sp
This backend maps \f(CW\*(C`EV_READ\*(C'\fR and \f(CW\*(C`EV_WRITE\*(C'\fR in the same way as
\&\f(CW\*(C`EVBACKEND_POLL\*(C'\fR.
.ie n .IP """EVBACKEND_ALL""" 4
.el .IP "\f(CWEVBACKEND_ALL\fR" 4
.IX Item "EVBACKEND_ALL"
Try all backends (even potentially broken ones that wouldn't be tried
with \f(CW\*(C`EVFLAG_AUTO\*(C'\fR). Since this is a mask, you can do stuff such as
\&\f(CW\*(C`EVBACKEND_ALL & ~EVBACKEND_KQUEUE\*(C'\fR.
.Sp
It is definitely not recommended to use this flag, use whatever
\&\f(CW\*(C`ev_recommended_backends ()\*(C'\fR returns, or simply do not specify a backend
at all.
.ie n .IP """EVBACKEND_MASK""" 4
.el .IP "\f(CWEVBACKEND_MASK\fR" 4
.IX Item "EVBACKEND_MASK"
Not a backend at all, but a mask to select all backend bits from a
\&\f(CW\*(C`flags\*(C'\fR value, in case you want to mask out any backends from a flags
value (e.g. when modifying the \f(CW\*(C`LIBEV_FLAGS\*(C'\fR environment variable).
.RE
.RS 4
.Sp
If one or more of the backend flags are or'ed into the flags value,
then only these backends will be tried (in the reverse order as listed
here). If none are specified, all backends in \f(CW\*(C`ev_recommended_backends
()\*(C'\fR will be tried.
.Sp
Example: Try to create a event loop that uses epoll and nothing else.
.Sp
.Vb 3
\&   struct ev_loop *epoller = ev_loop_new (EVBACKEND_EPOLL | EVFLAG_NOENV);
\&   if (!epoller)
\&     fatal ("no epoll found here, maybe it hides under your chair");
.Ve
.Sp
Example: Use whatever libev has to offer, but make sure that kqueue is
used if available.
.Sp
.Vb 1
\&   struct ev_loop *loop = ev_loop_new (ev_recommended_backends () | EVBACKEND_KQUEUE);
.Ve
.RE
.IP "ev_loop_destroy (loop)" 4
.IX Item "ev_loop_destroy (loop)"
Destroys an event loop object (frees all memory and kernel state
etc.). None of the active event watchers will be stopped in the normal
sense, so e.g. \f(CW\*(C`ev_is_active\*(C'\fR might still return true. It is your
responsibility to either stop all watchers cleanly yourself \fIbefore\fR
calling this function, or cope with the fact afterwards (which is usually
the easiest thing, you can just ignore the watchers and/or \f(CW\*(C`free ()\*(C'\fR them
for example).
.Sp
Note that certain global state, such as signal state (and installed signal
handlers), will not be freed by this function, and related watchers (such
as signal and child watchers) would need to be stopped manually.
.Sp
This function is normally used on loop objects allocated by
\&\f(CW\*(C`ev_loop_new\*(C'\fR, but it can also be used on the default loop returned by
\&\f(CW\*(C`ev_default_loop\*(C'\fR, in which case it is not thread-safe.
.Sp
Note that it is not advisable to call this function on the default loop
except in the rare occasion where you really need to free its resources.
If you need dynamically allocated loops it is better to use \f(CW\*(C`ev_loop_new\*(C'\fR
and \f(CW\*(C`ev_loop_destroy\*(C'\fR.
.IP "ev_loop_fork (loop)" 4
.IX Item "ev_loop_fork (loop)"
This function sets a flag that causes subsequent \f(CW\*(C`ev_run\*(C'\fR iterations
to reinitialise the kernel state for backends that have one. Despite
the name, you can call it anytime you are allowed to start or stop
watchers (except inside an \f(CW\*(C`ev_prepare\*(C'\fR callback), but it makes most
sense after forking, in the child process. You \fImust\fR call it (or use
\&\f(CW\*(C`EVFLAG_FORKCHECK\*(C'\fR) in the child before resuming or calling \f(CW\*(C`ev_run\*(C'\fR.
.Sp
In addition, if you want to reuse a loop (via this function or
\&\f(CW\*(C`EVFLAG_FORKCHECK\*(C'\fR), you \fIalso\fR have to ignore \f(CW\*(C`SIGPIPE\*(C'\fR.
.Sp
Again, you \fIhave\fR to call it on \fIany\fR loop that you want to re-use after
a fork, \fIeven if you do not plan to use the loop in the parent\fR. This is
because some kernel interfaces *cough* \fIkqueue\fR *cough* do funny things
during fork.
.Sp
On the other hand, you only need to call this function in the child
process if and only if you want to use the event loop in the child. If
you just fork+exec or create a new loop in the child, you don't have to
call it at all (in fact, \f(CW\*(C`epoll\*(C'\fR is so badly broken that it makes a
difference, but libev will usually detect this case on its own and do a
costly reset of the backend).
.Sp
The function itself is quite fast and it's usually not a problem to call
it just in case after a fork.
.Sp
Example: Automate calling \f(CW\*(C`ev_loop_fork\*(C'\fR on the default loop when
using pthreads.
.Sp
.Vb 5
\&   static void
\&   post_fork_child (void)
\&   {
\&     ev_loop_fork (EV_DEFAULT);
\&   }
\&
\&   ...
\&   pthread_atfork (0, 0, post_fork_child);
.Ve
.IP "int ev_is_default_loop (loop)" 4
.IX Item "int ev_is_default_loop (loop)"
Returns true when the given loop is, in fact, the default loop, and false
otherwise.
.IP "unsigned int ev_iteration (loop)" 4
.IX Item "unsigned int ev_iteration (loop)"
Returns the current iteration count for the event loop, which is identical
to the number of times libev did poll for new events. It starts at \f(CW0\fR
and happily wraps around with enough iterations.
.Sp
This value can sometimes be useful as a generation counter of sorts (it
\&\*(L"ticks\*(R" the number of loop iterations), as it roughly corresponds with
\&\f(CW\*(C`ev_prepare\*(C'\fR and \f(CW\*(C`ev_check\*(C'\fR calls \- and is incremented between the
prepare and check phases.
.IP "unsigned int ev_depth (loop)" 4
.IX Item "unsigned int ev_depth (loop)"
Returns the number of times \f(CW\*(C`ev_run\*(C'\fR was entered minus the number of
times \f(CW\*(C`ev_run\*(C'\fR was exited normally, in other words, the recursion depth.
.Sp
Outside \f(CW\*(C`ev_run\*(C'\fR, this number is zero. In a callback, this number is
\&\f(CW1\fR, unless \f(CW\*(C`ev_run\*(C'\fR was invoked recursively (or from another thread),
in which case it is higher.
.Sp
Leaving \f(CW\*(C`ev_run\*(C'\fR abnormally (setjmp/longjmp, cancelling the thread,
throwing an exception etc.), doesn't count as \*(L"exit\*(R" \- consider this
as a hint to avoid such ungentleman-like behaviour unless it's really
convenient, in which case it is fully supported.
.IP "unsigned int ev_backend (loop)" 4
.IX Item "unsigned int ev_backend (loop)"
Returns one of the \f(CW\*(C`EVBACKEND_*\*(C'\fR flags indicating the event backend in
use.
.IP "ev_tstamp ev_now (loop)" 4
.IX Item "ev_tstamp ev_now (loop)"
Returns the current \*(L"event loop time\*(R", which is the time the event loop
received events and started processing them. This timestamp does not
change as long as callbacks are being processed, and this is also the base
time used for relative timers. You can treat it as the timestamp of the
event occurring (or more correctly, libev finding out about it).
.IP "ev_now_update (loop)" 4
.IX Item "ev_now_update (loop)"
Establishes the current time by querying the kernel, updating the time
returned by \f(CW\*(C`ev_now ()\*(C'\fR in the progress. This is a costly operation and
is usually done automatically within \f(CW\*(C`ev_run ()\*(C'\fR.
.Sp
This function is rarely useful, but when some event callback runs for a
very long time without entering the event loop, updating libev's idea of
the current time is a good idea.
.Sp
See also \*(L"The special problem of time updates\*(R" in the \f(CW\*(C`ev_timer\*(C'\fR section.
.IP "ev_suspend (loop)" 4
.IX Item "ev_suspend (loop)"
.PD 0
.IP "ev_resume (loop)" 4
.IX Item "ev_resume (loop)"
.PD
These two functions suspend and resume an event loop, for use when the
loop is not used for a while and timeouts should not be processed.
.Sp
A typical use case would be an interactive program such as a game:  When
the user presses \f(CW\*(C`^Z\*(C'\fR to suspend the game and resumes it an hour later it
would be best to handle timeouts as if no time had actually passed while
the program was suspended. This can be achieved by calling \f(CW\*(C`ev_suspend\*(C'\fR
in your \f(CW\*(C`SIGTSTP\*(C'\fR handler, sending yourself a \f(CW\*(C`SIGSTOP\*(C'\fR and calling
\&\f(CW\*(C`ev_resume\*(C'\fR directly afterwards to resume timer processing.
.Sp
Effectively, all \f(CW\*(C`ev_timer\*(C'\fR watchers will be delayed by the time spend
between \f(CW\*(C`ev_suspend\*(C'\fR and \f(CW\*(C`ev_resume\*(C'\fR, and all \f(CW\*(C`ev_periodic\*(C'\fR watchers
will be rescheduled (that is, they will lose any events that would have
occurred while suspended).
.Sp
After calling \f(CW\*(C`ev_suspend\*(C'\fR you \fBmust not\fR call \fIany\fR function on the
given loop other than \f(CW\*(C`ev_resume\*(C'\fR, and you \fBmust not\fR call \f(CW\*(C`ev_resume\*(C'\fR
without a previous call to \f(CW\*(C`ev_suspend\*(C'\fR.
.Sp
Calling \f(CW\*(C`ev_suspend\*(C'\fR/\f(CW\*(C`ev_resume\*(C'\fR has the side effect of updating the
event loop time (see \f(CW\*(C`ev_now_update\*(C'\fR).
.IP "bool ev_run (loop, int flags)" 4
.IX Item "bool ev_run (loop, int flags)"
Finally, this is it, the event handler. This function usually is called
after you have initialised all your watchers and you want to start
handling events. It will ask the operating system for any new events, call
the watcher callbacks, and then repeat the whole process indefinitely: This
is why event loops are called \fIloops\fR.
.Sp
If the flags argument is specified as \f(CW0\fR, it will keep handling events
until either no event watchers are active anymore or \f(CW\*(C`ev_break\*(C'\fR was
called.
.Sp
The return value is false if there are no more active watchers (which
usually means \*(L"all jobs done\*(R" or \*(L"deadlock\*(R"), and true in all other cases
(which usually means " you should call \f(CW\*(C`ev_run\*(C'\fR again").
.Sp
Please note that an explicit \f(CW\*(C`ev_break\*(C'\fR is usually better than
relying on all watchers to be stopped when deciding when a program has
finished (especially in interactive programs), but having a program
that automatically loops as long as it has to and no longer by virtue
of relying on its watchers stopping correctly, that is truly a thing of
beauty.
.Sp
This function is \fImostly\fR exception-safe \- you can break out of a
\&\f(CW\*(C`ev_run\*(C'\fR call by calling \f(CW\*(C`longjmp\*(C'\fR in a callback, throwing a \*(C+
exception and so on. This does not decrement the \f(CW\*(C`ev_depth\*(C'\fR value, nor
will it clear any outstanding \f(CW\*(C`EVBREAK_ONE\*(C'\fR breaks.
.Sp
A flags value of \f(CW\*(C`EVRUN_NOWAIT\*(C'\fR will look for new events, will handle
those events and any already outstanding ones, but will not wait and
block your process in case there are no events and will return after one
iteration of the loop. This is sometimes useful to poll and handle new
events while doing lengthy calculations, to keep the program responsive.
.Sp
A flags value of \f(CW\*(C`EVRUN_ONCE\*(C'\fR will look for new events (waiting if
necessary) and will handle those and any already outstanding ones. It
will block your process until at least one new event arrives (which could
be an event internal to libev itself, so there is no guarantee that a
user-registered callback will be called), and will return after one
iteration of the loop.
.Sp
This is useful if you are waiting for some external event in conjunction
with something not expressible using other libev watchers (i.e. "roll your
own \f(CW\*(C`ev_run\*(C'\fR"). However, a pair of \f(CW\*(C`ev_prepare\*(C'\fR/\f(CW\*(C`ev_check\*(C'\fR watchers is
usually a better approach for this kind of thing.
.Sp
Here are the gory details of what \f(CW\*(C`ev_run\*(C'\fR does (this is for your
understanding, not a guarantee that things will work exactly like this in
future versions):
.Sp
.Vb 10
\&   \- Increment loop depth.
\&   \- Reset the ev_break status.
\&   \- Before the first iteration, call any pending watchers.
\&   LOOP:
\&   \- If EVFLAG_FORKCHECK was used, check for a fork.
\&   \- If a fork was detected (by any means), queue and call all fork watchers.
\&   \- Queue and call all prepare watchers.
\&   \- If ev_break was called, goto FINISH.
\&   \- If we have been forked, detach and recreate the kernel state
\&     as to not disturb the other process.
\&   \- Update the kernel state with all outstanding changes.
\&   \- Update the "event loop time" (ev_now ()).
\&   \- Calculate for how long to sleep or block, if at all
\&     (active idle watchers, EVRUN_NOWAIT or not having
\&     any active watchers at all will result in not sleeping).
\&   \- Sleep if the I/O and timer collect interval say so.
\&   \- Increment loop iteration counter.
\&   \- Block the process, waiting for any events.
\&   \- Queue all outstanding I/O (fd) events.
\&   \- Update the "event loop time" (ev_now ()), and do time jump adjustments.
\&   \- Queue all expired timers.
\&   \- Queue all expired periodics.
\&   \- Queue all idle watchers with priority higher than that of pending events.
\&   \- Queue all check watchers.
\&   \- Call all queued watchers in reverse order (i.e. check watchers first).
\&     Signals and child watchers are implemented as I/O watchers, and will
\&     be handled here by queueing them when their watcher gets executed.
\&   \- If ev_break has been called, or EVRUN_ONCE or EVRUN_NOWAIT
\&     were used, or there are no active watchers, goto FINISH, otherwise
\&     continue with step LOOP.
\&   FINISH:
\&   \- Reset the ev_break status iff it was EVBREAK_ONE.
\&   \- Decrement the loop depth.
\&   \- Return.
.Ve
.Sp
Example: Queue some jobs and then loop until no events are outstanding
anymore.
.Sp
.Vb 4
\&   ... queue jobs here, make sure they register event watchers as long
\&   ... as they still have work to do (even an idle watcher will do..)
\&   ev_run (my_loop, 0);
\&   ... jobs done or somebody called break. yeah!
.Ve
.IP "ev_break (loop, how)" 4
.IX Item "ev_break (loop, how)"
Can be used to make a call to \f(CW\*(C`ev_run\*(C'\fR return early (but only after it
has processed all outstanding events). The \f(CW\*(C`how\*(C'\fR argument must be either
\&\f(CW\*(C`EVBREAK_ONE\*(C'\fR, which will make the innermost \f(CW\*(C`ev_run\*(C'\fR call return, or
\&\f(CW\*(C`EVBREAK_ALL\*(C'\fR, which will make all nested \f(CW\*(C`ev_run\*(C'\fR calls return.
.Sp
This \*(L"break state\*(R" will be cleared on the next call to \f(CW\*(C`ev_run\*(C'\fR.
.Sp
It is safe to call \f(CW\*(C`ev_break\*(C'\fR from outside any \f(CW\*(C`ev_run\*(C'\fR calls, too, in
which case it will have no effect.
.IP "ev_ref (loop)" 4
.IX Item "ev_ref (loop)"
.PD 0
.IP "ev_unref (loop)" 4
.IX Item "ev_unref (loop)"
.PD
Ref/unref can be used to add or remove a reference count on the event
loop: Every watcher keeps one reference, and as long as the reference
count is nonzero, \f(CW\*(C`ev_run\*(C'\fR will not return on its own.
.Sp
This is useful when you have a watcher that you never intend to
unregister, but that nevertheless should not keep \f(CW\*(C`ev_run\*(C'\fR from
returning. In such a case, call \f(CW\*(C`ev_unref\*(C'\fR after starting, and \f(CW\*(C`ev_ref\*(C'\fR
before stopping it.
.Sp
As an example, libev itself uses this for its internal signal pipe: It
is not visible to the libev user and should not keep \f(CW\*(C`ev_run\*(C'\fR from
exiting if no event watchers registered by it are active. It is also an
excellent way to do this for generic recurring timers or from within
third-party libraries. Just remember to \fIunref after start\fR and \fIref
before stop\fR (but only if the watcher wasn't active before, or was active
before, respectively. Note also that libev might stop watchers itself
(e.g. non-repeating timers) in which case you have to \f(CW\*(C`ev_ref\*(C'\fR
in the callback).
.Sp
Example: Create a signal watcher, but keep it from keeping \f(CW\*(C`ev_run\*(C'\fR
running when nothing else is active.
.Sp
.Vb 4
\&   ev_signal exitsig;
\&   ev_signal_init (&exitsig, sig_cb, SIGINT);
\&   ev_signal_start (loop, &exitsig);
\&   ev_unref (loop);
.Ve
.Sp
Example: For some weird reason, unregister the above signal handler again.
.Sp
.Vb 2
\&   ev_ref (loop);
\&   ev_signal_stop (loop, &exitsig);
.Ve
.IP "ev_set_io_collect_interval (loop, ev_tstamp interval)" 4
.IX Item "ev_set_io_collect_interval (loop, ev_tstamp interval)"
.PD 0
.IP "ev_set_timeout_collect_interval (loop, ev_tstamp interval)" 4
.IX Item "ev_set_timeout_collect_interval (loop, ev_tstamp interval)"
.PD
These advanced functions influence the time that libev will spend waiting
for events. Both time intervals are by default \f(CW0\fR, meaning that libev
will try to invoke timer/periodic callbacks and I/O callbacks with minimum
latency.
.Sp
Setting these to a higher value (the \f(CW\*(C`interval\*(C'\fR \fImust\fR be >= \f(CW0\fR)
allows libev to delay invocation of I/O and timer/periodic callbacks
to increase efficiency of loop iterations (or to increase power-saving
opportunities).
.Sp
The idea is that sometimes your program runs just fast enough to handle
one (or very few) event(s) per loop iteration. While this makes the
program responsive, it also wastes a lot of \s-1CPU\s0 time to poll for new
events, especially with backends like \f(CW\*(C`select ()\*(C'\fR which have a high
overhead for the actual polling but can deliver many events at once.
.Sp
By setting a higher \fIio collect interval\fR you allow libev to spend more
time collecting I/O events, so you can handle more events per iteration,
at the cost of increasing latency. Timeouts (both \f(CW\*(C`ev_periodic\*(C'\fR and
\&\f(CW\*(C`ev_timer\*(C'\fR) will not be affected. Setting this to a non-null value will
introduce an additional \f(CW\*(C`ev_sleep ()\*(C'\fR call into most loop iterations. The
sleep time ensures that libev will not poll for I/O events more often then
once per this interval, on average (as long as the host time resolution is
good enough).
.Sp
Likewise, by setting a higher \fItimeout collect interval\fR you allow libev
to spend more time collecting timeouts, at the expense of increased
latency/jitter/inexactness (the watcher callback will be called
later). \f(CW\*(C`ev_io\*(C'\fR watchers will not be affected. Setting this to a non-null
value will not introduce any overhead in libev.
.Sp
Many (busy) programs can usually benefit by setting the I/O collect
interval to a value near \f(CW0.1\fR or so, which is often enough for
interactive servers (of course not for games), likewise for timeouts. It
usually doesn't make much sense to set it to a lower value than \f(CW0.01\fR,
as this approaches the timing granularity of most systems. Note that if
you do transactions with the outside world and you can't increase the
parallelity, then this setting will limit your transaction rate (if you
need to poll once per transaction and the I/O collect interval is 0.01,
then you can't do more than 100 transactions per second).
.Sp
Setting the \fItimeout collect interval\fR can improve the opportunity for
saving power, as the program will \*(L"bundle\*(R" timer callback invocations that
are \*(L"near\*(R" in time together, by delaying some, thus reducing the number of
times the process sleeps and wakes up again. Another useful technique to
reduce iterations/wake\-ups is to use \f(CW\*(C`ev_periodic\*(C'\fR watchers and make sure
they fire on, say, one-second boundaries only.
.Sp
Example: we only need 0.1s timeout granularity, and we wish not to poll
more often than 100 times per second:
.Sp
.Vb 2
\&   ev_set_timeout_collect_interval (EV_DEFAULT_UC_ 0.1);
\&   ev_set_io_collect_interval (EV_DEFAULT_UC_ 0.01);
.Ve
.IP "ev_invoke_pending (loop)" 4
.IX Item "ev_invoke_pending (loop)"
This call will simply invoke all pending watchers while resetting their
pending state. Normally, \f(CW\*(C`ev_run\*(C'\fR does this automatically when required,
but when overriding the invoke callback this call comes handy. This
function can be invoked from a watcher \- this can be useful for example
when you want to do some lengthy calculation and want to pass further
event handling to another thread (you still have to make sure only one
thread executes within \f(CW\*(C`ev_invoke_pending\*(C'\fR or \f(CW\*(C`ev_run\*(C'\fR of course).
.IP "int ev_pending_count (loop)" 4
.IX Item "int ev_pending_count (loop)"
Returns the number of pending watchers \- zero indicates that no watchers
are pending.
.IP "ev_set_invoke_pending_cb (loop, void (*invoke_pending_cb)(\s-1EV_P\s0))" 4
.IX Item "ev_set_invoke_pending_cb (loop, void (*invoke_pending_cb)(EV_P))"
This overrides the invoke pending functionality of the loop: Instead of
invoking all pending watchers when there are any, \f(CW\*(C`ev_run\*(C'\fR will call
this callback instead. This is useful, for example, when you want to
invoke the actual watchers inside another context (another thread etc.).
.Sp
If you want to reset the callback, use \f(CW\*(C`ev_invoke_pending\*(C'\fR as new
callback.
.IP "ev_set_loop_release_cb (loop, void (*release)(\s-1EV_P\s0) throw (), void (*acquire)(\s-1EV_P\s0) throw ())" 4
.IX Item "ev_set_loop_release_cb (loop, void (*release)(EV_P) throw (), void (*acquire)(EV_P) throw ())"
Sometimes you want to share the same loop between multiple threads. This
can be done relatively simply by putting mutex_lock/unlock calls around
each call to a libev function.
.Sp
However, \f(CW\*(C`ev_run\*(C'\fR can run an indefinite time, so it is not feasible
to wait for it to return. One way around this is to wake up the event
loop via \f(CW\*(C`ev_break\*(C'\fR and \f(CW\*(C`ev_async_send\*(C'\fR, another way is to set these
\&\fIrelease\fR and \fIacquire\fR callbacks on the loop.
.Sp
When set, then \f(CW\*(C`release\*(C'\fR will be called just before the thread is
suspended waiting for new events, and \f(CW\*(C`acquire\*(C'\fR is called just
afterwards.
.Sp
Ideally, \f(CW\*(C`release\*(C'\fR will just call your mutex_unlock function, and
\&\f(CW\*(C`acquire\*(C'\fR will just call the mutex_lock function again.
.Sp
While event loop modifications are allowed between invocations of
\&\f(CW\*(C`release\*(C'\fR and \f(CW\*(C`acquire\*(C'\fR (that's their only purpose after all), no
modifications done will affect the event loop, i.e. adding watchers will
have no effect on the set of file descriptors being watched, or the time
waited. Use an \f(CW\*(C`ev_async\*(C'\fR watcher to wake up \f(CW\*(C`ev_run\*(C'\fR when you want it
to take note of any changes you made.
.Sp
In theory, threads executing \f(CW\*(C`ev_run\*(C'\fR will be async-cancel safe between
invocations of \f(CW\*(C`release\*(C'\fR and \f(CW\*(C`acquire\*(C'\fR.
.Sp
See also the locking example in the \f(CW\*(C`THREADS\*(C'\fR section later in this
document.
.IP "ev_set_userdata (loop, void *data)" 4
.IX Item "ev_set_userdata (loop, void *data)"
.PD 0
.IP "void *ev_userdata (loop)" 4
.IX Item "void *ev_userdata (loop)"
.PD
Set and retrieve a single \f(CW\*(C`void *\*(C'\fR associated with a loop. When
\&\f(CW\*(C`ev_set_userdata\*(C'\fR has never been called, then \f(CW\*(C`ev_userdata\*(C'\fR returns
\&\f(CW0\fR.
.Sp
These two functions can be used to associate arbitrary data with a loop,
and are intended solely for the \f(CW\*(C`invoke_pending_cb\*(C'\fR, \f(CW\*(C`release\*(C'\fR and
\&\f(CW\*(C`acquire\*(C'\fR callbacks described above, but of course can be (ab\-)used for
any other purpose as well.
.IP "ev_verify (loop)" 4
.IX Item "ev_verify (loop)"
This function only does something when \f(CW\*(C`EV_VERIFY\*(C'\fR support has been
compiled in, which is the default for non-minimal builds. It tries to go
through all internal structures and checks them for validity. If anything
is found to be inconsistent, it will print an error message to standard
error and call \f(CW\*(C`abort ()\*(C'\fR.
.Sp
This can be used to catch bugs inside libev itself: under normal
circumstances, this function will never abort as of course libev keeps its
data structures consistent.
.SH "ANATOMY OF A WATCHER"
.IX Header "ANATOMY OF A WATCHER"
In the following description, uppercase \f(CW\*(C`TYPE\*(C'\fR in names stands for the
watcher type, e.g. \f(CW\*(C`ev_TYPE_start\*(C'\fR can mean \f(CW\*(C`ev_timer_start\*(C'\fR for timer
watchers and \f(CW\*(C`ev_io_start\*(C'\fR for I/O watchers.
.PP
A watcher is an opaque structure that you allocate and register to record
your interest in some event. To make a concrete example, imagine you want
to wait for \s-1STDIN\s0 to become readable, you would create an \f(CW\*(C`ev_io\*(C'\fR watcher
for that:
.PP
.Vb 5
\&   static void my_cb (struct ev_loop *loop, ev_io *w, int revents)
\&   {
\&     ev_io_stop (w);
\&     ev_break (loop, EVBREAK_ALL);
\&   }
\&
\&   struct ev_loop *loop = ev_default_loop (0);
\&
\&   ev_io stdin_watcher;
\&
\&   ev_init (&stdin_watcher, my_cb);
\&   ev_io_set (&stdin_watcher, STDIN_FILENO, EV_READ);
\&   ev_io_start (loop, &stdin_watcher);
\&
\&   ev_run (loop, 0);
.Ve
.PP
As you can see, you are responsible for allocating the memory for your
watcher structures (and it is \fIusually\fR a bad idea to do this on the
stack).
.PP
Each watcher has an associated watcher structure (called \f(CW\*(C`struct ev_TYPE\*(C'\fR
or simply \f(CW\*(C`ev_TYPE\*(C'\fR, as typedefs are provided for all watcher structs).
.PP
Each watcher structure must be initialised by a call to \f(CW\*(C`ev_init (watcher
*, callback)\*(C'\fR, which expects a callback to be provided. This callback is
invoked each time the event occurs (or, in the case of I/O watchers, each
time the event loop detects that the file descriptor given is readable
and/or writable).
.PP
Each watcher type further has its own \f(CW\*(C`ev_TYPE_set (watcher *, ...)\*(C'\fR
macro to configure it, with arguments specific to the watcher type. There
is also a macro to combine initialisation and setting in one call: \f(CW\*(C`ev_TYPE_init (watcher *, callback, ...)\*(C'\fR.
.PP
To make the watcher actually watch out for events, you have to start it
with a watcher-specific start function (\f(CW\*(C`ev_TYPE_start (loop, watcher
*)\*(C'\fR), and you can stop watching for events at any time by calling the
corresponding stop function (\f(CW\*(C`ev_TYPE_stop (loop, watcher *)\*(C'\fR.
.PP
As long as your watcher is active (has been started but not stopped) you
must not touch the values stored in it. Most specifically you must never
reinitialise it or call its \f(CW\*(C`ev_TYPE_set\*(C'\fR macro.
.PP
Each and every callback receives the event loop pointer as first, the
registered watcher structure as second, and a bitset of received events as
third argument.
.PP
The received events usually include a single bit per event type received
(you can receive multiple events at the same time). The possible bit masks
are:
.ie n .IP """EV_READ""" 4
.el .IP "\f(CWEV_READ\fR" 4
.IX Item "EV_READ"
.PD 0
.ie n .IP """EV_WRITE""" 4
.el .IP "\f(CWEV_WRITE\fR" 4
.IX Item "EV_WRITE"
.PD
The file descriptor in the \f(CW\*(C`ev_io\*(C'\fR watcher has become readable and/or
writable.
.ie n .IP """EV_TIMER""" 4
.el .IP "\f(CWEV_TIMER\fR" 4
.IX Item "EV_TIMER"
The \f(CW\*(C`ev_timer\*(C'\fR watcher has timed out.
.ie n .IP """EV_PERIODIC""" 4
.el .IP "\f(CWEV_PERIODIC\fR" 4
.IX Item "EV_PERIODIC"
The \f(CW\*(C`ev_periodic\*(C'\fR watcher has timed out.
.ie n .IP """EV_SIGNAL""" 4
.el .IP "\f(CWEV_SIGNAL\fR" 4
.IX Item "EV_SIGNAL"
The signal specified in the \f(CW\*(C`ev_signal\*(C'\fR watcher has been received by a thread.
.ie n .IP """EV_CHILD""" 4
.el .IP "\f(CWEV_CHILD\fR" 4
.IX Item "EV_CHILD"
The pid specified in the \f(CW\*(C`ev_child\*(C'\fR watcher has received a status change.
.ie n .IP """EV_STAT""" 4
.el .IP "\f(CWEV_STAT\fR" 4
.IX Item "EV_STAT"
The path specified in the \f(CW\*(C`ev_stat\*(C'\fR watcher changed its attributes somehow.
.ie n .IP """EV_IDLE""" 4
.el .IP "\f(CWEV_IDLE\fR" 4
.IX Item "EV_IDLE"
The \f(CW\*(C`ev_idle\*(C'\fR watcher has determined that you have nothing better to do.
.ie n .IP """EV_PREPARE""" 4
.el .IP "\f(CWEV_PREPARE\fR" 4
.IX Item "EV_PREPARE"
.PD 0
.ie n .IP """EV_CHECK""" 4
.el .IP "\f(CWEV_CHECK\fR" 4
.IX Item "EV_CHECK"
.PD
All \f(CW\*(C`ev_prepare\*(C'\fR watchers are invoked just \fIbefore\fR \f(CW\*(C`ev_run\*(C'\fR starts to
gather new events, and all \f(CW\*(C`ev_check\*(C'\fR watchers are queued (not invoked)
just after \f(CW\*(C`ev_run\*(C'\fR has gathered them, but before it queues any callbacks
for any received events. That means \f(CW\*(C`ev_prepare\*(C'\fR watchers are the last
watchers invoked before the event loop sleeps or polls for new events, and
\&\f(CW\*(C`ev_check\*(C'\fR watchers will be invoked before any other watchers of the same
or lower priority within an event loop iteration.
.Sp
Callbacks of both watcher types can start and stop as many watchers as
they want, and all of them will be taken into account (for example, a
\&\f(CW\*(C`ev_prepare\*(C'\fR watcher might start an idle watcher to keep \f(CW\*(C`ev_run\*(C'\fR from
blocking).
.ie n .IP """EV_EMBED""" 4
.el .IP "\f(CWEV_EMBED\fR" 4
.IX Item "EV_EMBED"
The embedded event loop specified in the \f(CW\*(C`ev_embed\*(C'\fR watcher needs attention.
.ie n .IP """EV_FORK""" 4
.el .IP "\f(CWEV_FORK\fR" 4
.IX Item "EV_FORK"
The event loop has been resumed in the child process after fork (see
\&\f(CW\*(C`ev_fork\*(C'\fR).
.ie n .IP """EV_CLEANUP""" 4
.el .IP "\f(CWEV_CLEANUP\fR" 4
.IX Item "EV_CLEANUP"
The event loop is about to be destroyed (see \f(CW\*(C`ev_cleanup\*(C'\fR).
.ie n .IP """EV_ASYNC""" 4
.el .IP "\f(CWEV_ASYNC\fR" 4
.IX Item "EV_ASYNC"
The given async watcher has been asynchronously notified (see \f(CW\*(C`ev_async\*(C'\fR).
.ie n .IP """EV_CUSTOM""" 4
.el .IP "\f(CWEV_CUSTOM\fR" 4
.IX Item "EV_CUSTOM"
Not ever sent (or otherwise used) by libev itself, but can be freely used
by libev users to signal watchers (e.g. via \f(CW\*(C`ev_feed_event\*(C'\fR).
.ie n .IP """EV_ERROR""" 4
.el .IP "\f(CWEV_ERROR\fR" 4
.IX Item "EV_ERROR"
An unspecified error has occurred, the watcher has been stopped. This might
happen because the watcher could not be properly started because libev
ran out of memory, a file descriptor was found to be closed or any other
problem. Libev considers these application bugs.
.Sp
You best act on it by reporting the problem and somehow coping with the
watcher being stopped. Note that well-written programs should not receive
an error ever, so when your watcher receives it, this usually indicates a
bug in your program.
.Sp
Libev will usually signal a few \*(L"dummy\*(R" events together with an error, for
example it might indicate that a fd is readable or writable, and if your
callbacks is well-written it can just attempt the operation and cope with
the error from \fIread()\fR or \fIwrite()\fR. This will not work in multi-threaded
programs, though, as the fd could already be closed and reused for another
thing, so beware.
.SS "\s-1GENERIC WATCHER FUNCTIONS\s0"
.IX Subsection "GENERIC WATCHER FUNCTIONS"
.ie n .IP """ev_init"" (ev_TYPE *watcher, callback)" 4
.el .IP "\f(CWev_init\fR (ev_TYPE *watcher, callback)" 4
.IX Item "ev_init (ev_TYPE *watcher, callback)"
This macro initialises the generic portion of a watcher. The contents
of the watcher object can be arbitrary (so \f(CW\*(C`malloc\*(C'\fR will do). Only
the generic parts of the watcher are initialised, you \fIneed\fR to call
the type-specific \f(CW\*(C`ev_TYPE_set\*(C'\fR macro afterwards to initialise the
type-specific parts. For each type there is also a \f(CW\*(C`ev_TYPE_init\*(C'\fR macro
which rolls both calls into one.
.Sp
You can reinitialise a watcher at any time as long as it has been stopped
(or never started) and there are no pending events outstanding.
.Sp
The callback is always of type \f(CW\*(C`void (*)(struct ev_loop *loop, ev_TYPE *watcher,
int revents)\*(C'\fR.
.Sp
Example: Initialise an \f(CW\*(C`ev_io\*(C'\fR watcher in two steps.
.Sp
.Vb 3
\&   ev_io w;
\&   ev_init (&w, my_cb);
\&   ev_io_set (&w, STDIN_FILENO, EV_READ);
.Ve
.ie n .IP """ev_TYPE_set"" (ev_TYPE *watcher, [args])" 4
.el .IP "\f(CWev_TYPE_set\fR (ev_TYPE *watcher, [args])" 4
.IX Item "ev_TYPE_set (ev_TYPE *watcher, [args])"
This macro initialises the type-specific parts of a watcher. You need to
call \f(CW\*(C`ev_init\*(C'\fR at least once before you call this macro, but you can
call \f(CW\*(C`ev_TYPE_set\*(C'\fR any number of times. You must not, however, call this
macro on a watcher that is active (it can be pending, however, which is a
difference to the \f(CW\*(C`ev_init\*(C'\fR macro).
.Sp
Although some watcher types do not have type-specific arguments
(e.g. \f(CW\*(C`ev_prepare\*(C'\fR) you still need to call its \f(CW\*(C`set\*(C'\fR macro.
.Sp
See \f(CW\*(C`ev_init\*(C'\fR, above, for an example.
.ie n .IP """ev_TYPE_init"" (ev_TYPE *watcher, callback, [args])" 4
.el .IP "\f(CWev_TYPE_init\fR (ev_TYPE *watcher, callback, [args])" 4
.IX Item "ev_TYPE_init (ev_TYPE *watcher, callback, [args])"
This convenience macro rolls both \f(CW\*(C`ev_init\*(C'\fR and \f(CW\*(C`ev_TYPE_set\*(C'\fR macro
calls into a single call. This is the most convenient method to initialise
a watcher. The same limitations apply, of course.
.Sp
Example: Initialise and set an \f(CW\*(C`ev_io\*(C'\fR watcher in one step.
.Sp
.Vb 1
\&   ev_io_init (&w, my_cb, STDIN_FILENO, EV_READ);
.Ve
.ie n .IP """ev_TYPE_start"" (loop, ev_TYPE *watcher)" 4
.el .IP "\f(CWev_TYPE_start\fR (loop, ev_TYPE *watcher)" 4
.IX Item "ev_TYPE_start (loop, ev_TYPE *watcher)"
Starts (activates) the given watcher. Only active watchers will receive
events. If the watcher is already active nothing will happen.
.Sp
Example: Start the \f(CW\*(C`ev_io\*(C'\fR watcher that is being abused as example in this
whole section.
.Sp
.Vb 1
\&   ev_io_start (EV_DEFAULT_UC, &w);
.Ve
.ie n .IP """ev_TYPE_stop"" (loop, ev_TYPE *watcher)" 4
.el .IP "\f(CWev_TYPE_stop\fR (loop, ev_TYPE *watcher)" 4
.IX Item "ev_TYPE_stop (loop, ev_TYPE *watcher)"
Stops the given watcher if active, and clears the pending status (whether
the watcher was active or not).
.Sp
It is possible that stopped watchers are pending \- for example,
non-repeating timers are being stopped when they become pending \- but
calling \f(CW\*(C`ev_TYPE_stop\*(C'\fR ensures that the watcher is neither active nor
pending. If you want to free or reuse the memory used by the watcher it is
therefore a good idea to always call its \f(CW\*(C`ev_TYPE_stop\*(C'\fR function.
.IP "bool ev_is_active (ev_TYPE *watcher)" 4
.IX Item "bool ev_is_active (ev_TYPE *watcher)"
Returns a true value iff the watcher is active (i.e. it has been started
and not yet been stopped). As long as a watcher is active you must not modify
it.
.IP "bool ev_is_pending (ev_TYPE *watcher)" 4
.IX Item "bool ev_is_pending (ev_TYPE *watcher)"
Returns a true value iff the watcher is pending, (i.e. it has outstanding
events but its callback has not yet been invoked). As long as a watcher
is pending (but not active) you must not call an init function on it (but
\&\f(CW\*(C`ev_TYPE_set\*(C'\fR is safe), you must not change its priority, and you must
make sure the watcher is available to libev (e.g. you cannot \f(CW\*(C`free ()\*(C'\fR
it).
.IP "callback ev_cb (ev_TYPE *watcher)" 4
.IX Item "callback ev_cb (ev_TYPE *watcher)"
Returns the callback currently set on the watcher.
.IP "ev_set_cb (ev_TYPE *watcher, callback)" 4
.IX Item "ev_set_cb (ev_TYPE *watcher, callback)"
Change the callback. You can change the callback at virtually any time
(modulo threads).
.IP "ev_set_priority (ev_TYPE *watcher, int priority)" 4
.IX Item "ev_set_priority (ev_TYPE *watcher, int priority)"
.PD 0
.IP "int ev_priority (ev_TYPE *watcher)" 4
.IX Item "int ev_priority (ev_TYPE *watcher)"
.PD
Set and query the priority of the watcher. The priority is a small
integer between \f(CW\*(C`EV_MAXPRI\*(C'\fR (default: \f(CW2\fR) and \f(CW\*(C`EV_MINPRI\*(C'\fR
(default: \f(CW\*(C`\-2\*(C'\fR). Pending watchers with higher priority will be invoked
before watchers with lower priority, but priority will not keep watchers
from being executed (except for \f(CW\*(C`ev_idle\*(C'\fR watchers).
.Sp
If you need to suppress invocation when higher priority events are pending
you need to look at \f(CW\*(C`ev_idle\*(C'\fR watchers, which provide this functionality.
.Sp
You \fImust not\fR change the priority of a watcher as long as it is active or
pending.
.Sp
Setting a priority outside the range of \f(CW\*(C`EV_MINPRI\*(C'\fR to \f(CW\*(C`EV_MAXPRI\*(C'\fR is
fine, as long as you do not mind that the priority value you query might
or might not have been clamped to the valid range.
.Sp
The default priority used by watchers when no priority has been set is
always \f(CW0\fR, which is supposed to not be too high and not be too low :).
.Sp
See \*(L"\s-1WATCHER PRIORITY MODELS\*(R"\s0, below, for a more thorough treatment of
priorities.
.IP "ev_invoke (loop, ev_TYPE *watcher, int revents)" 4
.IX Item "ev_invoke (loop, ev_TYPE *watcher, int revents)"
Invoke the \f(CW\*(C`watcher\*(C'\fR with the given \f(CW\*(C`loop\*(C'\fR and \f(CW\*(C`revents\*(C'\fR. Neither
\&\f(CW\*(C`loop\*(C'\fR nor \f(CW\*(C`revents\*(C'\fR need to be valid as long as the watcher callback
can deal with that fact, as both are simply passed through to the
callback.
.IP "int ev_clear_pending (loop, ev_TYPE *watcher)" 4
.IX Item "int ev_clear_pending (loop, ev_TYPE *watcher)"
If the watcher is pending, this function clears its pending status and
returns its \f(CW\*(C`revents\*(C'\fR bitset (as if its callback was invoked). If the
watcher isn't pending it does nothing and returns \f(CW0\fR.
.Sp
Sometimes it can be useful to \*(L"poll\*(R" a watcher instead of waiting for its
callback to be invoked, which can be accomplished with this function.
.IP "ev_feed_event (loop, ev_TYPE *watcher, int revents)" 4
.IX Item "ev_feed_event (loop, ev_TYPE *watcher, int revents)"
Feeds the given event set into the event loop, as if the specified event
had happened for the specified watcher (which must be a pointer to an
initialised but not necessarily started event watcher). Obviously you must
not free the watcher as long as it has pending events.
.Sp
Stopping the watcher, letting libev invoke it, or calling
\&\f(CW\*(C`ev_clear_pending\*(C'\fR will clear the pending event, even if the watcher was
not started in the first place.
.Sp
See also \f(CW\*(C`ev_feed_fd_event\*(C'\fR and \f(CW\*(C`ev_feed_signal_event\*(C'\fR for related
functions that do not need a watcher.
.PP
See also the \*(L"\s-1ASSOCIATING CUSTOM DATA WITH A WATCHER\*(R"\s0 and \*(L"\s-1BUILDING YOUR
OWN COMPOSITE WATCHERS\*(R"\s0 idioms.
.SS "\s-1WATCHER STATES\s0"
.IX Subsection "WATCHER STATES"
There are various watcher states mentioned throughout this manual \-
active, pending and so on. In this section these states and the rules to
transition between them will be described in more detail \- and while these
rules might look complicated, they usually do \*(L"the right thing\*(R".
.IP "initialised" 4
.IX Item "initialised"
Before a watcher can be registered with the event loop it has to be
initialised. This can be done with a call to \f(CW\*(C`ev_TYPE_init\*(C'\fR, or calls to
\&\f(CW\*(C`ev_init\*(C'\fR followed by the watcher-specific \f(CW\*(C`ev_TYPE_set\*(C'\fR function.
.Sp
In this state it is simply some block of memory that is suitable for
use in an event loop. It can be moved around, freed, reused etc. at
will \- as long as you either keep the memory contents intact, or call
\&\f(CW\*(C`ev_TYPE_init\*(C'\fR again.
.IP "started/running/active" 4
.IX Item "started/running/active"
Once a watcher has been started with a call to \f(CW\*(C`ev_TYPE_start\*(C'\fR it becomes
property of the event loop, and is actively waiting for events. While in
this state it cannot be accessed (except in a few documented ways), moved,
freed or anything else \- the only legal thing is to keep a pointer to it,
and call libev functions on it that are documented to work on active watchers.
.IP "pending" 4
.IX Item "pending"
If a watcher is active and libev determines that an event it is interested
in has occurred (such as a timer expiring), it will become pending. It will
stay in this pending state until either it is stopped or its callback is
about to be invoked, so it is not normally pending inside the watcher
callback.
.Sp
The watcher might or might not be active while it is pending (for example,
an expired non-repeating timer can be pending but no longer active). If it
is stopped, it can be freely accessed (e.g. by calling \f(CW\*(C`ev_TYPE_set\*(C'\fR),
but it is still property of the event loop at this time, so cannot be
moved, freed or reused. And if it is active the rules described in the
previous item still apply.
.Sp
It is also possible to feed an event on a watcher that is not active (e.g.
via \f(CW\*(C`ev_feed_event\*(C'\fR), in which case it becomes pending without being
active.
.IP "stopped" 4
.IX Item "stopped"
A watcher can be stopped implicitly by libev (in which case it might still
be pending), or explicitly by calling its \f(CW\*(C`ev_TYPE_stop\*(C'\fR function. The
latter will clear any pending state the watcher might be in, regardless
of whether it was active or not, so stopping a watcher explicitly before
freeing it is often a good idea.
.Sp
While stopped (and not pending) the watcher is essentially in the
initialised state, that is, it can be reused, moved, modified in any way
you wish (but when you trash the memory block, you need to \f(CW\*(C`ev_TYPE_init\*(C'\fR
it again).
.SS "\s-1WATCHER PRIORITY MODELS\s0"
.IX Subsection "WATCHER PRIORITY MODELS"
Many event loops support \fIwatcher priorities\fR, which are usually small
integers that influence the ordering of event callback invocation
between watchers in some way, all else being equal.
.PP
In libev, Watcher priorities can be set using \f(CW\*(C`ev_set_priority\*(C'\fR. See its
description for the more technical details such as the actual priority
range.
.PP
There are two common ways how these these priorities are being interpreted
by event loops:
.PP
In the more common lock-out model, higher priorities \*(L"lock out\*(R" invocation
of lower priority watchers, which means as long as higher priority
watchers receive events, lower priority watchers are not being invoked.
.PP
The less common only-for-ordering model uses priorities solely to order
callback invocation within a single event loop iteration: Higher priority
watchers are invoked before lower priority ones, but they all get invoked
before polling for new events.
.PP
Libev uses the second (only-for-ordering) model for all its watchers
except for idle watchers (which use the lock-out model).
.PP
The rationale behind this is that implementing the lock-out model for
watchers is not well supported by most kernel interfaces, and most event
libraries will just poll for the same events again and again as long as
their callbacks have not been executed, which is very inefficient in the
common case of one high-priority watcher locking out a mass of lower
priority ones.
.PP
Static (ordering) priorities are most useful when you have two or more
watchers handling the same resource: a typical usage example is having an
\&\f(CW\*(C`ev_io\*(C'\fR watcher to receive data, and an associated \f(CW\*(C`ev_timer\*(C'\fR to handle
timeouts. Under load, data might be received while the program handles
other jobs, but since timers normally get invoked first, the timeout
handler will be executed before checking for data. In that case, giving
the timer a lower priority than the I/O watcher ensures that I/O will be
handled first even under adverse conditions (which is usually, but not
always, what you want).
.PP
Since idle watchers use the \*(L"lock-out\*(R" model, meaning that idle watchers
will only be executed when no same or higher priority watchers have
received events, they can be used to implement the \*(L"lock-out\*(R" model when
required.
.PP
For example, to emulate how many other event libraries handle priorities,
you can associate an \f(CW\*(C`ev_idle\*(C'\fR watcher to each such watcher, and in
the normal watcher callback, you just start the idle watcher. The real
processing is done in the idle watcher callback. This causes libev to
continuously poll and process kernel event data for the watcher, but when
the lock-out case is known to be rare (which in turn is rare :), this is
workable.
.PP
Usually, however, the lock-out model implemented that way will perform
miserably under the type of load it was designed to handle. In that case,
it might be preferable to stop the real watcher before starting the
idle watcher, so the kernel will not have to process the event in case
the actual processing will be delayed for considerable time.
.PP
Here is an example of an I/O watcher that should run at a strictly lower
priority than the default, and which should only process data when no
other events are pending:
.PP
.Vb 2
\&   ev_idle idle; // actual processing watcher
\&   ev_io io;     // actual event watcher
\&
\&   static void
\&   io_cb (EV_P_ ev_io *w, int revents)
\&   {
\&     // stop the I/O watcher, we received the event, but
\&     // are not yet ready to handle it.
\&     ev_io_stop (EV_A_ w);
\&
\&     // start the idle watcher to handle the actual event.
\&     // it will not be executed as long as other watchers
\&     // with the default priority are receiving events.
\&     ev_idle_start (EV_A_ &idle);
\&   }
\&
\&   static void
\&   idle_cb (EV_P_ ev_idle *w, int revents)
\&   {
\&     // actual processing
\&     read (STDIN_FILENO, ...);
\&
\&     // have to start the I/O watcher again, as
\&     // we have handled the event
\&     ev_io_start (EV_P_ &io);
\&   }
\&
\&   // initialisation
\&   ev_idle_init (&idle, idle_cb);
\&   ev_io_init (&io, io_cb, STDIN_FILENO, EV_READ);
\&   ev_io_start (EV_DEFAULT_ &io);
.Ve
.PP
In the \*(L"real\*(R" world, it might also be beneficial to start a timer, so that
low-priority connections can not be locked out forever under load. This
enables your program to keep a lower latency for important connections
during short periods of high load, while not completely locking out less
important ones.
.SH "WATCHER TYPES"
.IX Header "WATCHER TYPES"
This section describes each watcher in detail, but will not repeat
information given in the last section. Any initialisation/set macros,
functions and members specific to the watcher type are explained.
.PP
Members are additionally marked with either \fI[read\-only]\fR, meaning that,
while the watcher is active, you can look at the member and expect some
sensible content, but you must not modify it (you can modify it while the
watcher is stopped to your hearts content), or \fI[read\-write]\fR, which
means you can expect it to have some sensible content while the watcher
is active, but you can also modify it. Modifying it may not do something
sensible or take immediate effect (or do anything at all), but libev will
not crash or malfunction in any way.
.ie n .SS """ev_io"" \- is this file descriptor readable or writable?"
.el .SS "\f(CWev_io\fP \- is this file descriptor readable or writable?"
.IX Subsection "ev_io - is this file descriptor readable or writable?"
I/O watchers check whether a file descriptor is readable or writable
in each iteration of the event loop, or, more precisely, when reading
would not block the process and writing would at least be able to write
some data. This behaviour is called level-triggering because you keep
receiving events as long as the condition persists. Remember you can stop
the watcher if you don't want to act on the event and neither want to
receive future events.
.PP
In general you can register as many read and/or write event watchers per
fd as you want (as long as you don't confuse yourself). Setting all file
descriptors to non-blocking mode is also usually a good idea (but not
required if you know what you are doing).
.PP
Another thing you have to watch out for is that it is quite easy to
receive \*(L"spurious\*(R" readiness notifications, that is, your callback might
be called with \f(CW\*(C`EV_READ\*(C'\fR but a subsequent \f(CW\*(C`read\*(C'\fR(2) will actually block
because there is no data. It is very easy to get into this situation even
with a relatively standard program structure. Thus it is best to always
use non-blocking I/O: An extra \f(CW\*(C`read\*(C'\fR(2) returning \f(CW\*(C`EAGAIN\*(C'\fR is far
preferable to a program hanging until some data arrives.
.PP
If you cannot run the fd in non-blocking mode (for example you should
not play around with an Xlib connection), then you have to separately
re-test whether a file descriptor is really ready with a known-to-be good
interface such as poll (fortunately in the case of Xlib, it already does
this on its own, so its quite safe to use). Some people additionally
use \f(CW\*(C`SIGALRM\*(C'\fR and an interval timer, just to be sure you won't block
indefinitely.
.PP
But really, best use non-blocking mode.
.PP
\fIThe special problem of disappearing file descriptors\fR
.IX Subsection "The special problem of disappearing file descriptors"
.PP
Some backends (e.g. kqueue, epoll) need to be told about closing a file
descriptor (either due to calling \f(CW\*(C`close\*(C'\fR explicitly or any other means,
such as \f(CW\*(C`dup2\*(C'\fR). The reason is that you register interest in some file
descriptor, but when it goes away, the operating system will silently drop
this interest. If another file descriptor with the same number then is
registered with libev, there is no efficient way to see that this is, in
fact, a different file descriptor.
.PP
To avoid having to explicitly tell libev about such cases, libev follows
the following policy:  Each time \f(CW\*(C`ev_io_set\*(C'\fR is being called, libev
will assume that this is potentially a new file descriptor, otherwise
it is assumed that the file descriptor stays the same. That means that
you \fIhave\fR to call \f(CW\*(C`ev_io_set\*(C'\fR (or \f(CW\*(C`ev_io_init\*(C'\fR) when you change the
descriptor even if the file descriptor number itself did not change.
.PP
This is how one would do it normally anyway, the important point is that
the libev application should not optimise around libev but should leave
optimisations to libev.
.PP
\fIThe special problem of dup'ed file descriptors\fR
.IX Subsection "The special problem of dup'ed file descriptors"
.PP
Some backends (e.g. epoll), cannot register events for file descriptors,
but only events for the underlying file descriptions. That means when you
have \f(CW\*(C`dup ()\*(C'\fR'ed file descriptors or weirder constellations, and register
events for them, only one file descriptor might actually receive events.
.PP
There is no workaround possible except not registering events
for potentially \f(CW\*(C`dup ()\*(C'\fR'ed file descriptors, or to resort to
\&\f(CW\*(C`EVBACKEND_SELECT\*(C'\fR or \f(CW\*(C`EVBACKEND_POLL\*(C'\fR.
.PP
\fIThe special problem of files\fR
.IX Subsection "The special problem of files"
.PP
Many people try to use \f(CW\*(C`select\*(C'\fR (or libev) on file descriptors
representing files, and expect it to become ready when their program
doesn't block on disk accesses (which can take a long time on their own).
.PP
However, this cannot ever work in the \*(L"expected\*(R" way \- you get a readiness
notification as soon as the kernel knows whether and how much data is
there, and in the case of open files, that's always the case, so you
always get a readiness notification instantly, and your read (or possibly
write) will still block on the disk I/O.
.PP
Another way to view it is that in the case of sockets, pipes, character
devices and so on, there is another party (the sender) that delivers data
on its own, but in the case of files, there is no such thing: the disk
will not send data on its own, simply because it doesn't know what you
wish to read \- you would first have to request some data.
.PP
Since files are typically not-so-well supported by advanced notification
mechanism, libev tries hard to emulate \s-1POSIX\s0 behaviour with respect
to files, even though you should not use it. The reason for this is
convenience: sometimes you want to watch \s-1STDIN\s0 or \s-1STDOUT,\s0 which is
usually a tty, often a pipe, but also sometimes files or special devices
(for example, \f(CW\*(C`epoll\*(C'\fR on Linux works with \fI/dev/random\fR but not with
\&\fI/dev/urandom\fR), and even though the file might better be served with
asynchronous I/O instead of with non-blocking I/O, it is still useful when
it \*(L"just works\*(R" instead of freezing.
.PP
So avoid file descriptors pointing to files when you know it (e.g. use
libeio), but use them when it is convenient, e.g. for \s-1STDIN/STDOUT,\s0 or
when you rarely read from a file instead of from a socket, and want to
reuse the same code path.
.PP
\fIThe special problem of fork\fR
.IX Subsection "The special problem of fork"
.PP
Some backends (epoll, kqueue) do not support \f(CW\*(C`fork ()\*(C'\fR at all or exhibit
useless behaviour. Libev fully supports fork, but needs to be told about
it in the child if you want to continue to use it in the child.
.PP
To support fork in your child processes, you have to call \f(CW\*(C`ev_loop_fork
()\*(C'\fR after a fork in the child, enable \f(CW\*(C`EVFLAG_FORKCHECK\*(C'\fR, or resort to
\&\f(CW\*(C`EVBACKEND_SELECT\*(C'\fR or \f(CW\*(C`EVBACKEND_POLL\*(C'\fR.
.PP
\fIThe special problem of \s-1SIGPIPE\s0\fR
.IX Subsection "The special problem of SIGPIPE"
.PP
While not really specific to libev, it is easy to forget about \f(CW\*(C`SIGPIPE\*(C'\fR:
when writing to a pipe whose other end has been closed, your program gets
sent a \s-1SIGPIPE,\s0 which, by default, aborts your program. For most programs
this is sensible behaviour, for daemons, this is usually undesirable.
.PP
So when you encounter spurious, unexplained daemon exits, make sure you
ignore \s-1SIGPIPE \s0(and maybe make sure you log the exit status of your daemon
somewhere, as that would have given you a big clue).
.PP
\fIThe special problem of \fIaccept()\fIing when you can't\fR
.IX Subsection "The special problem of accept()ing when you can't"
.PP
Many implementations of the \s-1POSIX \s0\f(CW\*(C`accept\*(C'\fR function (for example,
found in post\-2004 Linux) have the peculiar behaviour of not removing a
connection from the pending queue in all error cases.
.PP
For example, larger servers often run out of file descriptors (because
of resource limits), causing \f(CW\*(C`accept\*(C'\fR to fail with \f(CW\*(C`ENFILE\*(C'\fR but not
rejecting the connection, leading to libev signalling readiness on
the next iteration again (the connection still exists after all), and
typically causing the program to loop at 100% \s-1CPU\s0 usage.
.PP
Unfortunately, the set of errors that cause this issue differs between
operating systems, there is usually little the app can do to remedy the
situation, and no known thread-safe method of removing the connection to
cope with overload is known (to me).
.PP
One of the easiest ways to handle this situation is to just ignore it
\&\- when the program encounters an overload, it will just loop until the
situation is over. While this is a form of busy waiting, no \s-1OS\s0 offers an
event-based way to handle this situation, so it's the best one can do.
.PP
A better way to handle the situation is to log any errors other than
\&\f(CW\*(C`EAGAIN\*(C'\fR and \f(CW\*(C`EWOULDBLOCK\*(C'\fR, making sure not to flood the log with such
messages, and continue as usual, which at least gives the user an idea of
what could be wrong (\*(L"raise the ulimit!\*(R"). For extra points one could stop
the \f(CW\*(C`ev_io\*(C'\fR watcher on the listening fd \*(L"for a while\*(R", which reduces \s-1CPU\s0
usage.
.PP
If your program is single-threaded, then you could also keep a dummy file
descriptor for overload situations (e.g. by opening \fI/dev/null\fR), and
when you run into \f(CW\*(C`ENFILE\*(C'\fR or \f(CW\*(C`EMFILE\*(C'\fR, close it, run \f(CW\*(C`accept\*(C'\fR,
close that fd, and create a new dummy fd. This will gracefully refuse
clients under typical overload conditions.
.PP
The last way to handle it is to simply log the error and \f(CW\*(C`exit\*(C'\fR, as
is often done with \f(CW\*(C`malloc\*(C'\fR failures, but this results in an easy
opportunity for a DoS attack.
.PP
\fIWatcher-Specific Functions\fR
.IX Subsection "Watcher-Specific Functions"
.IP "ev_io_init (ev_io *, callback, int fd, int events)" 4
.IX Item "ev_io_init (ev_io *, callback, int fd, int events)"
.PD 0
.IP "ev_io_set (ev_io *, int fd, int events)" 4
.IX Item "ev_io_set (ev_io *, int fd, int events)"
.PD
Configures an \f(CW\*(C`ev_io\*(C'\fR watcher. The \f(CW\*(C`fd\*(C'\fR is the file descriptor to
receive events for and \f(CW\*(C`events\*(C'\fR is either \f(CW\*(C`EV_READ\*(C'\fR, \f(CW\*(C`EV_WRITE\*(C'\fR or
\&\f(CW\*(C`EV_READ | EV_WRITE\*(C'\fR, to express the desire to receive the given events.
.IP "int fd [read\-only]" 4
.IX Item "int fd [read-only]"
The file descriptor being watched.
.IP "int events [read\-only]" 4
.IX Item "int events [read-only]"
The events being watched.
.PP
\fIExamples\fR
.IX Subsection "Examples"
.PP
Example: Call \f(CW\*(C`stdin_readable_cb\*(C'\fR when \s-1STDIN_FILENO\s0 has become, well
readable, but only once. Since it is likely line-buffered, you could
attempt to read a whole line in the callback.
.PP
.Vb 6
\&   static void
\&   stdin_readable_cb (struct ev_loop *loop, ev_io *w, int revents)
\&   {
\&      ev_io_stop (loop, w);
\&     .. read from stdin here (or from w\->fd) and handle any I/O errors
\&   }
\&
\&   ...
\&   struct ev_loop *loop = ev_default_init (0);
\&   ev_io stdin_readable;
\&   ev_io_init (&stdin_readable, stdin_readable_cb, STDIN_FILENO, EV_READ);
\&   ev_io_start (loop, &stdin_readable);
\&   ev_run (loop, 0);
.Ve
.ie n .SS """ev_timer"" \- relative and optionally repeating timeouts"
.el .SS "\f(CWev_timer\fP \- relative and optionally repeating timeouts"
.IX Subsection "ev_timer - relative and optionally repeating timeouts"
Timer watchers are simple relative timers that generate an event after a
given time, and optionally repeating in regular intervals after that.
.PP
The timers are based on real time, that is, if you register an event that
times out after an hour and you reset your system clock to January last
year, it will still time out after (roughly) one hour. \*(L"Roughly\*(R" because
detecting time jumps is hard, and some inaccuracies are unavoidable (the
monotonic clock option helps a lot here).
.PP
The callback is guaranteed to be invoked only \fIafter\fR its timeout has
passed (not \fIat\fR, so on systems with very low-resolution clocks this
might introduce a small delay, see \*(L"the special problem of being too
early\*(R", below). If multiple timers become ready during the same loop
iteration then the ones with earlier time-out values are invoked before
ones of the same priority with later time-out values (but this is no
longer true when a callback calls \f(CW\*(C`ev_run\*(C'\fR recursively).
.PP
\fIBe smart about timeouts\fR
.IX Subsection "Be smart about timeouts"
.PP
Many real-world problems involve some kind of timeout, usually for error
recovery. A typical example is an \s-1HTTP\s0 request \- if the other side hangs,
you want to raise some error after a while.
.PP
What follows are some ways to handle this problem, from obvious and
inefficient to smart and efficient.
.PP
In the following, a 60 second activity timeout is assumed \- a timeout that
gets reset to 60 seconds each time there is activity (e.g. each time some
data or other life sign was received).
.IP "1. Use a timer and stop, reinitialise and start it on activity." 4
.IX Item "1. Use a timer and stop, reinitialise and start it on activity."
This is the most obvious, but not the most simple way: In the beginning,
start the watcher:
.Sp
.Vb 2
\&   ev_timer_init (timer, callback, 60., 0.);
\&   ev_timer_start (loop, timer);
.Ve
.Sp
Then, each time there is some activity, \f(CW\*(C`ev_timer_stop\*(C'\fR it, initialise it
and start it again:
.Sp
.Vb 3
\&   ev_timer_stop (loop, timer);
\&   ev_timer_set (timer, 60., 0.);
\&   ev_timer_start (loop, timer);
.Ve
.Sp
This is relatively simple to implement, but means that each time there is
some activity, libev will first have to remove the timer from its internal
data structure and then add it again. Libev tries to be fast, but it's
still not a constant-time operation.
.ie n .IP "2. Use a timer and re-start it with ""ev_timer_again"" inactivity." 4
.el .IP "2. Use a timer and re-start it with \f(CWev_timer_again\fR inactivity." 4
.IX Item "2. Use a timer and re-start it with ev_timer_again inactivity."
This is the easiest way, and involves using \f(CW\*(C`ev_timer_again\*(C'\fR instead of
\&\f(CW\*(C`ev_timer_start\*(C'\fR.
.Sp
To implement this, configure an \f(CW\*(C`ev_timer\*(C'\fR with a \f(CW\*(C`repeat\*(C'\fR value
of \f(CW60\fR and then call \f(CW\*(C`ev_timer_again\*(C'\fR at start and each time you
successfully read or write some data. If you go into an idle state where
you do not expect data to travel on the socket, you can \f(CW\*(C`ev_timer_stop\*(C'\fR
the timer, and \f(CW\*(C`ev_timer_again\*(C'\fR will automatically restart it if need be.
.Sp
That means you can ignore both the \f(CW\*(C`ev_timer_start\*(C'\fR function and the
\&\f(CW\*(C`after\*(C'\fR argument to \f(CW\*(C`ev_timer_set\*(C'\fR, and only ever use the \f(CW\*(C`repeat\*(C'\fR
member and \f(CW\*(C`ev_timer_again\*(C'\fR.
.Sp
At start:
.Sp
.Vb 3
\&   ev_init (timer, callback);
\&   timer\->repeat = 60.;
\&   ev_timer_again (loop, timer);
.Ve
.Sp
Each time there is some activity:
.Sp
.Vb 1
\&   ev_timer_again (loop, timer);
.Ve
.Sp
It is even possible to change the time-out on the fly, regardless of
whether the watcher is active or not:
.Sp
.Vb 2
\&   timer\->repeat = 30.;
\&   ev_timer_again (loop, timer);
.Ve
.Sp
This is slightly more efficient then stopping/starting the timer each time
you want to modify its timeout value, as libev does not have to completely
remove and re-insert the timer from/into its internal data structure.
.Sp
It is, however, even simpler than the \*(L"obvious\*(R" way to do it.
.IP "3. Let the timer time out, but then re-arm it as required." 4
.IX Item "3. Let the timer time out, but then re-arm it as required."
This method is more tricky, but usually most efficient: Most timeouts are
relatively long compared to the intervals between other activity \- in
our example, within 60 seconds, there are usually many I/O events with
associated activity resets.
.Sp
In this case, it would be more efficient to leave the \f(CW\*(C`ev_timer\*(C'\fR alone,
but remember the time of last activity, and check for a real timeout only
within the callback:
.Sp
.Vb 3
\&   ev_tstamp timeout = 60.;
\&   ev_tstamp last_activity; // time of last activity
\&   ev_timer timer;
\&
\&   static void
\&   callback (EV_P_ ev_timer *w, int revents)
\&   {
\&     // calculate when the timeout would happen
\&     ev_tstamp after = last_activity \- ev_now (EV_A) + timeout;
\&
\&     // if negative, it means we the timeout already occurred
\&     if (after < 0.)
\&       {
\&         // timeout occurred, take action
\&       }
\&     else
\&       {
\&         // callback was invoked, but there was some recent 
\&         // activity. simply restart the timer to time out
\&         // after "after" seconds, which is the earliest time
\&         // the timeout can occur.
\&         ev_timer_set (w, after, 0.);
\&         ev_timer_start (EV_A_ w);
\&       }
\&   }
.Ve
.Sp
To summarise the callback: first calculate in how many seconds the
timeout will occur (by calculating the absolute time when it would occur,
\&\f(CW\*(C`last_activity + timeout\*(C'\fR, and subtracting the current time, \f(CW\*(C`ev_now
(EV_A)\*(C'\fR from that).
.Sp
If this value is negative, then we are already past the timeout, i.e. we
timed out, and need to do whatever is needed in this case.
.Sp
Otherwise, we now the earliest time at which the timeout would trigger,
and simply start the timer with this timeout value.
.Sp
In other words, each time the callback is invoked it will check whether
the timeout occurred. If not, it will simply reschedule itself to check
again at the earliest time it could time out. Rinse. Repeat.
.Sp
This scheme causes more callback invocations (about one every 60 seconds
minus half the average time between activity), but virtually no calls to
libev to change the timeout.
.Sp
To start the machinery, simply initialise the watcher and set
\&\f(CW\*(C`last_activity\*(C'\fR to the current time (meaning there was some activity just
now), then call the callback, which will \*(L"do the right thing\*(R" and start
the timer:
.Sp
.Vb 3
\&   last_activity = ev_now (EV_A);
\&   ev_init (&timer, callback);
\&   callback (EV_A_ &timer, 0);
.Ve
.Sp
When there is some activity, simply store the current time in
\&\f(CW\*(C`last_activity\*(C'\fR, no libev calls at all:
.Sp
.Vb 2
\&   if (activity detected)
\&     last_activity = ev_now (EV_A);
.Ve
.Sp
When your timeout value changes, then the timeout can be changed by simply
providing a new value, stopping the timer and calling the callback, which
will again do the right thing (for example, time out immediately :).
.Sp
.Vb 3
\&   timeout = new_value;
\&   ev_timer_stop (EV_A_ &timer);
\&   callback (EV_A_ &timer, 0);
.Ve
.Sp
This technique is slightly more complex, but in most cases where the
time-out is unlikely to be triggered, much more efficient.
.IP "4. Wee, just use a double-linked list for your timeouts." 4
.IX Item "4. Wee, just use a double-linked list for your timeouts."
If there is not one request, but many thousands (millions...), all
employing some kind of timeout with the same timeout value, then one can
do even better:
.Sp
When starting the timeout, calculate the timeout value and put the timeout
at the \fIend\fR of the list.
.Sp
Then use an \f(CW\*(C`ev_timer\*(C'\fR to fire when the timeout at the \fIbeginning\fR of
the list is expected to fire (for example, using the technique #3).
.Sp
When there is some activity, remove the timer from the list, recalculate
the timeout, append it to the end of the list again, and make sure to
update the \f(CW\*(C`ev_timer\*(C'\fR if it was taken from the beginning of the list.
.Sp
This way, one can manage an unlimited number of timeouts in O(1) time for
starting, stopping and updating the timers, at the expense of a major
complication, and having to use a constant timeout. The constant timeout
ensures that the list stays sorted.
.PP
So which method the best?
.PP
Method #2 is a simple no-brain-required solution that is adequate in most
situations. Method #3 requires a bit more thinking, but handles many cases
better, and isn't very complicated either. In most case, choosing either
one is fine, with #3 being better in typical situations.
.PP
Method #1 is almost always a bad idea, and buys you nothing. Method #4 is
rather complicated, but extremely efficient, something that really pays
off after the first million or so of active timers, i.e. it's usually
overkill :)
.PP
\fIThe special problem of being too early\fR
.IX Subsection "The special problem of being too early"
.PP
If you ask a timer to call your callback after three seconds, then
you expect it to be invoked after three seconds \- but of course, this
cannot be guaranteed to infinite precision. Less obviously, it cannot be
guaranteed to any precision by libev \- imagine somebody suspending the
process with a \s-1STOP\s0 signal for a few hours for example.
.PP
So, libev tries to invoke your callback as soon as possible \fIafter\fR the
delay has occurred, but cannot guarantee this.
.PP
A less obvious failure mode is calling your callback too early: many event
loops compare timestamps with a \*(L"elapsed delay >= requested delay\*(R", but
this can cause your callback to be invoked much earlier than you would
expect.
.PP
To see why, imagine a system with a clock that only offers full second
resolution (think windows if you can't come up with a broken enough \s-1OS\s0
yourself). If you schedule a one-second timer at the time 500.9, then the
event loop will schedule your timeout to elapse at a system time of 500
(500.9 truncated to the resolution) + 1, or 501.
.PP
If an event library looks at the timeout 0.1s later, it will see \*(L"501 >=
501\*(R" and invoke the callback 0.1s after it was started, even though a
one-second delay was requested \- this is being \*(L"too early\*(R", despite best
intentions.
.PP
This is the reason why libev will never invoke the callback if the elapsed
delay equals the requested delay, but only when the elapsed delay is
larger than the requested delay. In the example above, libev would only invoke
the callback at system time 502, or 1.1s after the timer was started.
.PP
So, while libev cannot guarantee that your callback will be invoked
exactly when requested, it \fIcan\fR and \fIdoes\fR guarantee that the requested
delay has actually elapsed, or in other words, it always errs on the \*(L"too
late\*(R" side of things.
.PP
\fIThe special problem of time updates\fR
.IX Subsection "The special problem of time updates"
.PP
Establishing the current time is a costly operation (it usually takes
at least one system call): \s-1EV\s0 therefore updates its idea of the current
time only before and after \f(CW\*(C`ev_run\*(C'\fR collects new events, which causes a
growing difference between \f(CW\*(C`ev_now ()\*(C'\fR and \f(CW\*(C`ev_time ()\*(C'\fR when handling
lots of events in one iteration.
.PP
The relative timeouts are calculated relative to the \f(CW\*(C`ev_now ()\*(C'\fR
time. This is usually the right thing as this timestamp refers to the time
of the event triggering whatever timeout you are modifying/starting. If
you suspect event processing to be delayed and you \fIneed\fR to base the
timeout on the current time, use something like the following to adjust
for it:
.PP
.Vb 1
\&   ev_timer_set (&timer, after + (ev_time () \- ev_now ()), 0.);
.Ve
.PP
If the event loop is suspended for a long time, you can also force an
update of the time returned by \f(CW\*(C`ev_now ()\*(C'\fR by calling \f(CW\*(C`ev_now_update
()\*(C'\fR, although that will push the event time of all outstanding events
further into the future.
.PP
\fIThe special problem of unsynchronised clocks\fR
.IX Subsection "The special problem of unsynchronised clocks"
.PP
Modern systems have a variety of clocks \- libev itself uses the normal
\&\*(L"wall clock\*(R" clock and, if available, the monotonic clock (to avoid time
jumps).
.PP
Neither of these clocks is synchronised with each other or any other clock
on the system, so \f(CW\*(C`ev_time ()\*(C'\fR might return a considerably different time
than \f(CW\*(C`gettimeofday ()\*(C'\fR or \f(CW\*(C`time ()\*(C'\fR. On a GNU/Linux system, for example,
a call to \f(CW\*(C`gettimeofday\*(C'\fR might return a second count that is one higher
than a directly following call to \f(CW\*(C`time\*(C'\fR.
.PP
The moral of this is to only compare libev-related timestamps with
\&\f(CW\*(C`ev_time ()\*(C'\fR and \f(CW\*(C`ev_now ()\*(C'\fR, at least if you want better precision than
a second or so.
.PP
One more problem arises due to this lack of synchronisation: if libev uses
the system monotonic clock and you compare timestamps from \f(CW\*(C`ev_time\*(C'\fR
or \f(CW\*(C`ev_now\*(C'\fR from when you started your timer and when your callback is
invoked, you will find that sometimes the callback is a bit \*(L"early\*(R".
.PP
This is because \f(CW\*(C`ev_timer\*(C'\fRs work in real time, not wall clock time, so
libev makes sure your callback is not invoked before the delay happened,
\&\fImeasured according to the real time\fR, not the system clock.
.PP
If your timeouts are based on a physical timescale (e.g. \*(L"time out this
connection after 100 seconds\*(R") then this shouldn't bother you as it is
exactly the right behaviour.
.PP
If you want to compare wall clock/system timestamps to your timers, then
you need to use \f(CW\*(C`ev_periodic\*(C'\fRs, as these are based on the wall clock
time, where your comparisons will always generate correct results.
.PP
\fIThe special problems of suspended animation\fR
.IX Subsection "The special problems of suspended animation"
.PP
When you leave the server world it is quite customary to hit machines that
can suspend/hibernate \- what happens to the clocks during such a suspend?
.PP
Some quick tests made with a Linux 2.6.28 indicate that a suspend freezes
all processes, while the clocks (\f(CW\*(C`times\*(C'\fR, \f(CW\*(C`CLOCK_MONOTONIC\*(C'\fR) continue
to run until the system is suspended, but they will not advance while the
system is suspended. That means, on resume, it will be as if the program
was frozen for a few seconds, but the suspend time will not be counted
towards \f(CW\*(C`ev_timer\*(C'\fR when a monotonic clock source is used. The real time
clock advanced as expected, but if it is used as sole clocksource, then a
long suspend would be detected as a time jump by libev, and timers would
be adjusted accordingly.
.PP
I would not be surprised to see different behaviour in different between
operating systems, \s-1OS\s0 versions or even different hardware.
.PP
The other form of suspend (job control, or sending a \s-1SIGSTOP\s0) will see a
time jump in the monotonic clocks and the realtime clock. If the program
is suspended for a very long time, and monotonic clock sources are in use,
then you can expect \f(CW\*(C`ev_timer\*(C'\fRs to expire as the full suspension time
will be counted towards the timers. When no monotonic clock source is in
use, then libev will again assume a timejump and adjust accordingly.
.PP
It might be beneficial for this latter case to call \f(CW\*(C`ev_suspend\*(C'\fR
and \f(CW\*(C`ev_resume\*(C'\fR in code that handles \f(CW\*(C`SIGTSTP\*(C'\fR, to at least get
deterministic behaviour in this case (you can do nothing against
\&\f(CW\*(C`SIGSTOP\*(C'\fR).
.PP
\fIWatcher-Specific Functions and Data Members\fR
.IX Subsection "Watcher-Specific Functions and Data Members"
.IP "ev_timer_init (ev_timer *, callback, ev_tstamp after, ev_tstamp repeat)" 4
.IX Item "ev_timer_init (ev_timer *, callback, ev_tstamp after, ev_tstamp repeat)"
.PD 0
.IP "ev_timer_set (ev_timer *, ev_tstamp after, ev_tstamp repeat)" 4
.IX Item "ev_timer_set (ev_timer *, ev_tstamp after, ev_tstamp repeat)"
.PD
Configure the timer to trigger after \f(CW\*(C`after\*(C'\fR seconds. If \f(CW\*(C`repeat\*(C'\fR
is \f(CW0.\fR, then it will automatically be stopped once the timeout is
reached. If it is positive, then the timer will automatically be
configured to trigger again \f(CW\*(C`repeat\*(C'\fR seconds later, again, and again,
until stopped manually.
.Sp
The timer itself will do a best-effort at avoiding drift, that is, if
you configure a timer to trigger every 10 seconds, then it will normally
trigger at exactly 10 second intervals. If, however, your program cannot
keep up with the timer (because it takes longer than those 10 seconds to
do stuff) the timer will not fire more than once per event loop iteration.
.IP "ev_timer_again (loop, ev_timer *)" 4
.IX Item "ev_timer_again (loop, ev_timer *)"
This will act as if the timer timed out, and restarts it again if it is
repeating. It basically works like calling \f(CW\*(C`ev_timer_stop\*(C'\fR, updating the
timeout to the \f(CW\*(C`repeat\*(C'\fR value and calling \f(CW\*(C`ev_timer_start\*(C'\fR.
.Sp
The exact semantics are as in the following rules, all of which will be
applied to the watcher:
.RS 4
.IP "If the timer is pending, the pending status is always cleared." 4
.IX Item "If the timer is pending, the pending status is always cleared."
.PD 0
.IP "If the timer is started but non-repeating, stop it (as if it timed out, without invoking it)." 4
.IX Item "If the timer is started but non-repeating, stop it (as if it timed out, without invoking it)."
.ie n .IP "If the timer is repeating, make the ""repeat"" value the new timeout and start the timer, if necessary." 4
.el .IP "If the timer is repeating, make the \f(CWrepeat\fR value the new timeout and start the timer, if necessary." 4
.IX Item "If the timer is repeating, make the repeat value the new timeout and start the timer, if necessary."
.RE
.RS 4
.PD
.Sp
This sounds a bit complicated, see \*(L"Be smart about timeouts\*(R", above, for a
usage example.
.RE
.IP "ev_tstamp ev_timer_remaining (loop, ev_timer *)" 4
.IX Item "ev_tstamp ev_timer_remaining (loop, ev_timer *)"
Returns the remaining time until a timer fires. If the timer is active,
then this time is relative to the current event loop time, otherwise it's
the timeout value currently configured.
.Sp
That is, after an \f(CW\*(C`ev_timer_set (w, 5, 7)\*(C'\fR, \f(CW\*(C`ev_timer_remaining\*(C'\fR returns
\&\f(CW5\fR. When the timer is started and one second passes, \f(CW\*(C`ev_timer_remaining\*(C'\fR
will return \f(CW4\fR. When the timer expires and is restarted, it will return
roughly \f(CW7\fR (likely slightly less as callback invocation takes some time,
too), and so on.
.IP "ev_tstamp repeat [read\-write]" 4
.IX Item "ev_tstamp repeat [read-write]"
The current \f(CW\*(C`repeat\*(C'\fR value. Will be used each time the watcher times out
or \f(CW\*(C`ev_timer_again\*(C'\fR is called, and determines the next timeout (if any),
which is also when any modifications are taken into account.
.PP
\fIExamples\fR
.IX Subsection "Examples"
.PP
Example: Create a timer that fires after 60 seconds.
.PP
.Vb 5
\&   static void
\&   one_minute_cb (struct ev_loop *loop, ev_timer *w, int revents)
\&   {
\&     .. one minute over, w is actually stopped right here
\&   }
\&
\&   ev_timer mytimer;
\&   ev_timer_init (&mytimer, one_minute_cb, 60., 0.);
\&   ev_timer_start (loop, &mytimer);
.Ve
.PP
Example: Create a timeout timer that times out after 10 seconds of
inactivity.
.PP
.Vb 5
\&   static void
\&   timeout_cb (struct ev_loop *loop, ev_timer *w, int revents)
\&   {
\&     .. ten seconds without any activity
\&   }
\&
\&   ev_timer mytimer;
\&   ev_timer_init (&mytimer, timeout_cb, 0., 10.); /* note, only repeat used */
\&   ev_timer_again (&mytimer); /* start timer */
\&   ev_run (loop, 0);
\&
\&   // and in some piece of code that gets executed on any "activity":
\&   // reset the timeout to start ticking again at 10 seconds
\&   ev_timer_again (&mytimer);
.Ve
.ie n .SS """ev_periodic"" \- to cron or not to cron?"
.el .SS "\f(CWev_periodic\fP \- to cron or not to cron?"
.IX Subsection "ev_periodic - to cron or not to cron?"
Periodic watchers are also timers of a kind, but they are very versatile
(and unfortunately a bit complex).
.PP
Unlike \f(CW\*(C`ev_timer\*(C'\fR, periodic watchers are not based on real time (or
relative time, the physical time that passes) but on wall clock time
(absolute time, the thing you can read on your calendar or clock). The
difference is that wall clock time can run faster or slower than real
time, and time jumps are not uncommon (e.g. when you adjust your
wrist-watch).
.PP
You can tell a periodic watcher to trigger after some specific point
in time: for example, if you tell a periodic watcher to trigger \*(L"in 10
seconds\*(R" (by specifying e.g. \f(CW\*(C`ev_now () + 10.\*(C'\fR, that is, an absolute time
not a delay) and then reset your system clock to January of the previous
year, then it will take a year or more to trigger the event (unlike an
\&\f(CW\*(C`ev_timer\*(C'\fR, which would still trigger roughly 10 seconds after starting
it, as it uses a relative timeout).
.PP
\&\f(CW\*(C`ev_periodic\*(C'\fR watchers can also be used to implement vastly more complex
timers, such as triggering an event on each \*(L"midnight, local time\*(R", or
other complicated rules. This cannot be done with \f(CW\*(C`ev_timer\*(C'\fR watchers, as
those cannot react to time jumps.
.PP
As with timers, the callback is guaranteed to be invoked only when the
point in time where it is supposed to trigger has passed. If multiple
timers become ready during the same loop iteration then the ones with
earlier time-out values are invoked before ones with later time-out values
(but this is no longer true when a callback calls \f(CW\*(C`ev_run\*(C'\fR recursively).
.PP
\fIWatcher-Specific Functions and Data Members\fR
.IX Subsection "Watcher-Specific Functions and Data Members"
.IP "ev_periodic_init (ev_periodic *, callback, ev_tstamp offset, ev_tstamp interval, reschedule_cb)" 4
.IX Item "ev_periodic_init (ev_periodic *, callback, ev_tstamp offset, ev_tstamp interval, reschedule_cb)"
.PD 0
.IP "ev_periodic_set (ev_periodic *, ev_tstamp offset, ev_tstamp interval, reschedule_cb)" 4
.IX Item "ev_periodic_set (ev_periodic *, ev_tstamp offset, ev_tstamp interval, reschedule_cb)"
.PD
Lots of arguments, let's sort it out... There are basically three modes of
operation, and we will explain them from simplest to most complex:
.RS 4
.IP "\(bu" 4
absolute timer (offset = absolute time, interval = 0, reschedule_cb = 0)
.Sp
In this configuration the watcher triggers an event after the wall clock
time \f(CW\*(C`offset\*(C'\fR has passed. It will not repeat and will not adjust when a
time jump occurs, that is, if it is to be run at January 1st 2011 then it
will be stopped and invoked when the system clock reaches or surpasses
this point in time.
.IP "\(bu" 4
repeating interval timer (offset = offset within interval, interval > 0, reschedule_cb = 0)
.Sp
In this mode the watcher will always be scheduled to time out at the next
\&\f(CW\*(C`offset + N * interval\*(C'\fR time (for some integer N, which can also be
negative) and then repeat, regardless of any time jumps. The \f(CW\*(C`offset\*(C'\fR
argument is merely an offset into the \f(CW\*(C`interval\*(C'\fR periods.
.Sp
This can be used to create timers that do not drift with respect to the
system clock, for example, here is an \f(CW\*(C`ev_periodic\*(C'\fR that triggers each
hour, on the hour (with respect to \s-1UTC\s0):
.Sp
.Vb 1
\&   ev_periodic_set (&periodic, 0., 3600., 0);
.Ve
.Sp
This doesn't mean there will always be 3600 seconds in between triggers,
but only that the callback will be called when the system time shows a
full hour (\s-1UTC\s0), or more correctly, when the system time is evenly divisible
by 3600.
.Sp
Another way to think about it (for the mathematically inclined) is that
\&\f(CW\*(C`ev_periodic\*(C'\fR will try to run the callback in this mode at the next possible
time where \f(CW\*(C`time = offset (mod interval)\*(C'\fR, regardless of any time jumps.
.Sp
The \f(CW\*(C`interval\*(C'\fR \fI\s-1MUST\s0\fR be positive, and for numerical stability, the
interval value should be higher than \f(CW\*(C`1/8192\*(C'\fR (which is around 100
microseconds) and \f(CW\*(C`offset\*(C'\fR should be higher than \f(CW0\fR and should have
at most a similar magnitude as the current time (say, within a factor of
ten). Typical values for offset are, in fact, \f(CW0\fR or something between
\&\f(CW0\fR and \f(CW\*(C`interval\*(C'\fR, which is also the recommended range.
.Sp
Note also that there is an upper limit to how often a timer can fire (\s-1CPU\s0
speed for example), so if \f(CW\*(C`interval\*(C'\fR is very small then timing stability
will of course deteriorate. Libev itself tries to be exact to be about one
millisecond (if the \s-1OS\s0 supports it and the machine is fast enough).
.IP "\(bu" 4
manual reschedule mode (offset ignored, interval ignored, reschedule_cb = callback)
.Sp
In this mode the values for \f(CW\*(C`interval\*(C'\fR and \f(CW\*(C`offset\*(C'\fR are both being
ignored. Instead, each time the periodic watcher gets scheduled, the
reschedule callback will be called with the watcher as first, and the
current time as second argument.
.Sp
\&\s-1NOTE: \s0\fIThis callback \s-1MUST NOT\s0 stop or destroy any periodic watcher, ever,
or make \s-1ANY\s0 other event loop modifications whatsoever, unless explicitly
allowed by documentation here\fR.
.Sp
If you need to stop it, return \f(CW\*(C`now + 1e30\*(C'\fR (or so, fudge fudge) and stop
it afterwards (e.g. by starting an \f(CW\*(C`ev_prepare\*(C'\fR watcher, which is the
only event loop modification you are allowed to do).
.Sp
The callback prototype is \f(CW\*(C`ev_tstamp (*reschedule_cb)(ev_periodic
*w, ev_tstamp now)\*(C'\fR, e.g.:
.Sp
.Vb 5
\&   static ev_tstamp
\&   my_rescheduler (ev_periodic *w, ev_tstamp now)
\&   {
\&     return now + 60.;
\&   }
.Ve
.Sp
It must return the next time to trigger, based on the passed time value
(that is, the lowest time value larger than to the second argument). It
will usually be called just before the callback will be triggered, but
might be called at other times, too.
.Sp
\&\s-1NOTE: \s0\fIThis callback must always return a time that is higher than or
equal to the passed \f(CI\*(C`now\*(C'\fI value\fR.
.Sp
This can be used to create very complex timers, such as a timer that
triggers on \*(L"next midnight, local time\*(R". To do this, you would calculate the
next midnight after \f(CW\*(C`now\*(C'\fR and return the timestamp value for this. How
you do this is, again, up to you (but it is not trivial, which is the main
reason I omitted it as an example).
.RE
.RS 4
.RE
.IP "ev_periodic_again (loop, ev_periodic *)" 4
.IX Item "ev_periodic_again (loop, ev_periodic *)"
Simply stops and restarts the periodic watcher again. This is only useful
when you changed some parameters or the reschedule callback would return
a different time than the last time it was called (e.g. in a crond like
program when the crontabs have changed).
.IP "ev_tstamp ev_periodic_at (ev_periodic *)" 4
.IX Item "ev_tstamp ev_periodic_at (ev_periodic *)"
When active, returns the absolute time that the watcher is supposed
to trigger next. This is not the same as the \f(CW\*(C`offset\*(C'\fR argument to
\&\f(CW\*(C`ev_periodic_set\*(C'\fR, but indeed works even in interval and manual
rescheduling modes.
.IP "ev_tstamp offset [read\-write]" 4
.IX Item "ev_tstamp offset [read-write]"
When repeating, this contains the offset value, otherwise this is the
absolute point in time (the \f(CW\*(C`offset\*(C'\fR value passed to \f(CW\*(C`ev_periodic_set\*(C'\fR,
although libev might modify this value for better numerical stability).
.Sp
Can be modified any time, but changes only take effect when the periodic
timer fires or \f(CW\*(C`ev_periodic_again\*(C'\fR is being called.
.IP "ev_tstamp interval [read\-write]" 4
.IX Item "ev_tstamp interval [read-write]"
The current interval value. Can be modified any time, but changes only
take effect when the periodic timer fires or \f(CW\*(C`ev_periodic_again\*(C'\fR is being
called.
.IP "ev_tstamp (*reschedule_cb)(ev_periodic *w, ev_tstamp now) [read\-write]" 4
.IX Item "ev_tstamp (*reschedule_cb)(ev_periodic *w, ev_tstamp now) [read-write]"
The current reschedule callback, or \f(CW0\fR, if this functionality is
switched off. Can be changed any time, but changes only take effect when
the periodic timer fires or \f(CW\*(C`ev_periodic_again\*(C'\fR is being called.
.PP
\fIExamples\fR
.IX Subsection "Examples"
.PP
Example: Call a callback every hour, or, more precisely, whenever the
system time is divisible by 3600. The callback invocation times have
potentially a lot of jitter, but good long-term stability.
.PP
.Vb 5
\&   static void
\&   clock_cb (struct ev_loop *loop, ev_periodic *w, int revents)
\&   {
\&     ... its now a full hour (UTC, or TAI or whatever your clock follows)
\&   }
\&
\&   ev_periodic hourly_tick;
\&   ev_periodic_init (&hourly_tick, clock_cb, 0., 3600., 0);
\&   ev_periodic_start (loop, &hourly_tick);
.Ve
.PP
Example: The same as above, but use a reschedule callback to do it:
.PP
.Vb 1
\&   #include <math.h>
\&
\&   static ev_tstamp
\&   my_scheduler_cb (ev_periodic *w, ev_tstamp now)
\&   {
\&     return now + (3600. \- fmod (now, 3600.));
\&   }
\&
\&   ev_periodic_init (&hourly_tick, clock_cb, 0., 0., my_scheduler_cb);
.Ve
.PP
Example: Call a callback every hour, starting now:
.PP
.Vb 4
\&   ev_periodic hourly_tick;
\&   ev_periodic_init (&hourly_tick, clock_cb,
\&                     fmod (ev_now (loop), 3600.), 3600., 0);
\&   ev_periodic_start (loop, &hourly_tick);
.Ve
.ie n .SS """ev_signal"" \- signal me when a signal gets signalled!"
.el .SS "\f(CWev_signal\fP \- signal me when a signal gets signalled!"
.IX Subsection "ev_signal - signal me when a signal gets signalled!"
Signal watchers will trigger an event when the process receives a specific
signal one or more times. Even though signals are very asynchronous, libev
will try its best to deliver signals synchronously, i.e. as part of the
normal event processing, like any other event.
.PP
If you want signals to be delivered truly asynchronously, just use
\&\f(CW\*(C`sigaction\*(C'\fR as you would do without libev and forget about sharing
the signal. You can even use \f(CW\*(C`ev_async\*(C'\fR from a signal handler to
synchronously wake up an event loop.
.PP
You can configure as many watchers as you like for the same signal, but
only within the same loop, i.e. you can watch for \f(CW\*(C`SIGINT\*(C'\fR in your
default loop and for \f(CW\*(C`SIGIO\*(C'\fR in another loop, but you cannot watch for
\&\f(CW\*(C`SIGINT\*(C'\fR in both the default loop and another loop at the same time. At
the moment, \f(CW\*(C`SIGCHLD\*(C'\fR is permanently tied to the default loop.
.PP
Only after the first watcher for a signal is started will libev actually
register something with the kernel. It thus coexists with your own signal
handlers as long as you don't register any with libev for the same signal.
.PP
If possible and supported, libev will install its handlers with
\&\f(CW\*(C`SA_RESTART\*(C'\fR (or equivalent) behaviour enabled, so system calls should
not be unduly interrupted. If you have a problem with system calls getting
interrupted by signals you can block all signals in an \f(CW\*(C`ev_check\*(C'\fR watcher
and unblock them in an \f(CW\*(C`ev_prepare\*(C'\fR watcher.
.PP
\fIThe special problem of inheritance over fork/execve/pthread_create\fR
.IX Subsection "The special problem of inheritance over fork/execve/pthread_create"
.PP
Both the signal mask (\f(CW\*(C`sigprocmask\*(C'\fR) and the signal disposition
(\f(CW\*(C`sigaction\*(C'\fR) are unspecified after starting a signal watcher (and after
stopping it again), that is, libev might or might not block the signal,
and might or might not set or restore the installed signal handler (but
see \f(CW\*(C`EVFLAG_NOSIGMASK\*(C'\fR).
.PP
While this does not matter for the signal disposition (libev never
sets signals to \f(CW\*(C`SIG_IGN\*(C'\fR, so handlers will be reset to \f(CW\*(C`SIG_DFL\*(C'\fR on
\&\f(CW\*(C`execve\*(C'\fR), this matters for the signal mask: many programs do not expect
certain signals to be blocked.
.PP
This means that before calling \f(CW\*(C`exec\*(C'\fR (from the child) you should reset
the signal mask to whatever \*(L"default\*(R" you expect (all clear is a good
choice usually).
.PP
The simplest way to ensure that the signal mask is reset in the child is
to install a fork handler with \f(CW\*(C`pthread_atfork\*(C'\fR that resets it. That will
catch fork calls done by libraries (such as the libc) as well.
.PP
In current versions of libev, the signal will not be blocked indefinitely
unless you use the \f(CW\*(C`signalfd\*(C'\fR \s-1API \s0(\f(CW\*(C`EV_SIGNALFD\*(C'\fR). While this reduces
the window of opportunity for problems, it will not go away, as libev
\&\fIhas\fR to modify the signal mask, at least temporarily.
.PP
So I can't stress this enough: \fIIf you do not reset your signal mask when
you expect it to be empty, you have a race condition in your code\fR. This
is not a libev-specific thing, this is true for most event libraries.
.PP
\fIThe special problem of threads signal handling\fR
.IX Subsection "The special problem of threads signal handling"
.PP
\&\s-1POSIX\s0 threads has problematic signal handling semantics, specifically,
a lot of functionality (sigfd, sigwait etc.) only really works if all
threads in a process block signals, which is hard to achieve.
.PP
When you want to use sigwait (or mix libev signal handling with your own
for the same signals), you can tackle this problem by globally blocking
all signals before creating any threads (or creating them with a fully set
sigprocmask) and also specifying the \f(CW\*(C`EVFLAG_NOSIGMASK\*(C'\fR when creating
loops. Then designate one thread as \*(L"signal receiver thread\*(R" which handles
these signals. You can pass on any signals that libev might be interested
in by calling \f(CW\*(C`ev_feed_signal\*(C'\fR.
.PP
\fIWatcher-Specific Functions and Data Members\fR
.IX Subsection "Watcher-Specific Functions and Data Members"
.IP "ev_signal_init (ev_signal *, callback, int signum)" 4
.IX Item "ev_signal_init (ev_signal *, callback, int signum)"
.PD 0
.IP "ev_signal_set (ev_signal *, int signum)" 4
.IX Item "ev_signal_set (ev_signal *, int signum)"
.PD
Configures the watcher to trigger on the given signal number (usually one
of the \f(CW\*(C`SIGxxx\*(C'\fR constants).
.IP "int signum [read\-only]" 4
.IX Item "int signum [read-only]"
The signal the watcher watches out for.
.PP
\fIExamples\fR
.IX Subsection "Examples"
.PP
Example: Try to exit cleanly on \s-1SIGINT.\s0
.PP
.Vb 5
\&   static void
\&   sigint_cb (struct ev_loop *loop, ev_signal *w, int revents)
\&   {
\&     ev_break (loop, EVBREAK_ALL);
\&   }
\&
\&   ev_signal signal_watcher;
\&   ev_signal_init (&signal_watcher, sigint_cb, SIGINT);
\&   ev_signal_start (loop, &signal_watcher);
.Ve
.ie n .SS """ev_child"" \- watch out for process status changes"
.el .SS "\f(CWev_child\fP \- watch out for process status changes"
.IX Subsection "ev_child - watch out for process status changes"
Child watchers trigger when your process receives a \s-1SIGCHLD\s0 in response to
some child status changes (most typically when a child of yours dies or
exits). It is permissible to install a child watcher \fIafter\fR the child
has been forked (which implies it might have already exited), as long
as the event loop isn't entered (or is continued from a watcher), i.e.,
forking and then immediately registering a watcher for the child is fine,
but forking and registering a watcher a few event loop iterations later or
in the next callback invocation is not.
.PP
Only the default event loop is capable of handling signals, and therefore
you can only register child watchers in the default event loop.
.PP
Due to some design glitches inside libev, child watchers will always be
handled at maximum priority (their priority is set to \f(CW\*(C`EV_MAXPRI\*(C'\fR by
libev)
.PP
\fIProcess Interaction\fR
.IX Subsection "Process Interaction"
.PP
Libev grabs \f(CW\*(C`SIGCHLD\*(C'\fR as soon as the default event loop is
initialised. This is necessary to guarantee proper behaviour even if the
first child watcher is started after the child exits. The occurrence
of \f(CW\*(C`SIGCHLD\*(C'\fR is recorded asynchronously, but child reaping is done
synchronously as part of the event loop processing. Libev always reaps all
children, even ones not watched.
.PP
\fIOverriding the Built-In Processing\fR
.IX Subsection "Overriding the Built-In Processing"
.PP
Libev offers no special support for overriding the built-in child
processing, but if your application collides with libev's default child
handler, you can override it easily by installing your own handler for
\&\f(CW\*(C`SIGCHLD\*(C'\fR after initialising the default loop, and making sure the
default loop never gets destroyed. You are encouraged, however, to use an
event-based approach to child reaping and thus use libev's support for
that, so other libev users can use \f(CW\*(C`ev_child\*(C'\fR watchers freely.
.PP
\fIStopping the Child Watcher\fR
.IX Subsection "Stopping the Child Watcher"
.PP
Currently, the child watcher never gets stopped, even when the
child terminates, so normally one needs to stop the watcher in the
callback. Future versions of libev might stop the watcher automatically
when a child exit is detected (calling \f(CW\*(C`ev_child_stop\*(C'\fR twice is not a
problem).
.PP
\fIWatcher-Specific Functions and Data Members\fR
.IX Subsection "Watcher-Specific Functions and Data Members"
.IP "ev_child_init (ev_child *, callback, int pid, int trace)" 4
.IX Item "ev_child_init (ev_child *, callback, int pid, int trace)"
.PD 0
.IP "ev_child_set (ev_child *, int pid, int trace)" 4
.IX Item "ev_child_set (ev_child *, int pid, int trace)"
.PD
Configures the watcher to wait for status changes of process \f(CW\*(C`pid\*(C'\fR (or
\&\fIany\fR process if \f(CW\*(C`pid\*(C'\fR is specified as \f(CW0\fR). The callback can look
at the \f(CW\*(C`rstatus\*(C'\fR member of the \f(CW\*(C`ev_child\*(C'\fR watcher structure to see
the status word (use the macros from \f(CW\*(C`sys/wait.h\*(C'\fR and see your systems
\&\f(CW\*(C`waitpid\*(C'\fR documentation). The \f(CW\*(C`rpid\*(C'\fR member contains the pid of the
process causing the status change. \f(CW\*(C`trace\*(C'\fR must be either \f(CW0\fR (only
activate the watcher when the process terminates) or \f(CW1\fR (additionally
activate the watcher when the process is stopped or continued).
.IP "int pid [read\-only]" 4
.IX Item "int pid [read-only]"
The process id this watcher watches out for, or \f(CW0\fR, meaning any process id.
.IP "int rpid [read\-write]" 4
.IX Item "int rpid [read-write]"
The process id that detected a status change.
.IP "int rstatus [read\-write]" 4
.IX Item "int rstatus [read-write]"
The process exit/trace status caused by \f(CW\*(C`rpid\*(C'\fR (see your systems
\&\f(CW\*(C`waitpid\*(C'\fR and \f(CW\*(C`sys/wait.h\*(C'\fR documentation for details).
.PP
\fIExamples\fR
.IX Subsection "Examples"
.PP
Example: \f(CW\*(C`fork()\*(C'\fR a new process and install a child handler to wait for
its completion.
.PP
.Vb 1
\&   ev_child cw;
\&
\&   static void
\&   child_cb (EV_P_ ev_child *w, int revents)
\&   {
\&     ev_child_stop (EV_A_ w);
\&     printf ("process %d exited with status %x\en", w\->rpid, w\->rstatus);
\&   }
\&
\&   pid_t pid = fork ();
\&
\&   if (pid < 0)
\&     // error
\&   else if (pid == 0)
\&     {
\&       // the forked child executes here
\&       exit (1);
\&     }
\&   else
\&     {
\&       ev_child_init (&cw, child_cb, pid, 0);
\&       ev_child_start (EV_DEFAULT_ &cw);
\&     }
.Ve
.ie n .SS """ev_stat"" \- did the file attributes just change?"
.el .SS "\f(CWev_stat\fP \- did the file attributes just change?"
.IX Subsection "ev_stat - did the file attributes just change?"
This watches a file system path for attribute changes. That is, it calls
\&\f(CW\*(C`stat\*(C'\fR on that path in regular intervals (or when the \s-1OS\s0 says it changed)
and sees if it changed compared to the last time, invoking the callback
if it did. Starting the watcher \f(CW\*(C`stat\*(C'\fR's the file, so only changes that
happen after the watcher has been started will be reported.
.PP
The path does not need to exist: changing from \*(L"path exists\*(R" to \*(L"path does
not exist\*(R" is a status change like any other. The condition \*(L"path does not
exist\*(R" (or more correctly \*(L"path cannot be stat'ed\*(R") is signified by the
\&\f(CW\*(C`st_nlink\*(C'\fR field being zero (which is otherwise always forced to be at
least one) and all the other fields of the stat buffer having unspecified
contents.
.PP
The path \fImust not\fR end in a slash or contain special components such as
\&\f(CW\*(C`.\*(C'\fR or \f(CW\*(C`..\*(C'\fR. The path \fIshould\fR be absolute: If it is relative and
your working directory changes, then the behaviour is undefined.
.PP
Since there is no portable change notification interface available, the
portable implementation simply calls \f(CWstat(2)\fR regularly on the path
to see if it changed somehow. You can specify a recommended polling
interval for this case. If you specify a polling interval of \f(CW0\fR (highly
recommended!) then a \fIsuitable, unspecified default\fR value will be used
(which you can expect to be around five seconds, although this might
change dynamically). Libev will also impose a minimum interval which is
currently around \f(CW0.1\fR, but that's usually overkill.
.PP
This watcher type is not meant for massive numbers of stat watchers,
as even with OS-supported change notifications, this can be
resource-intensive.
.PP
At the time of this writing, the only OS-specific interface implemented
is the Linux inotify interface (implementing kqueue support is left as an
exercise for the reader. Note, however, that the author sees no way of
implementing \f(CW\*(C`ev_stat\*(C'\fR semantics with kqueue, except as a hint).
.PP
\fI\s-1ABI\s0 Issues (Largefile Support)\fR
.IX Subsection "ABI Issues (Largefile Support)"
.PP
Libev by default (unless the user overrides this) uses the default
compilation environment, which means that on systems with large file
support disabled by default, you get the 32 bit version of the stat
structure. When using the library from programs that change the \s-1ABI\s0 to
use 64 bit file offsets the programs will fail. In that case you have to
compile libev with the same flags to get binary compatibility. This is
obviously the case with any flags that change the \s-1ABI,\s0 but the problem is
most noticeably displayed with ev_stat and large file support.
.PP
The solution for this is to lobby your distribution maker to make large
file interfaces available by default (as e.g. FreeBSD does) and not
optional. Libev cannot simply switch on large file support because it has
to exchange stat structures with application programs compiled using the
default compilation environment.
.PP
\fIInotify and Kqueue\fR
.IX Subsection "Inotify and Kqueue"
.PP
When \f(CW\*(C`inotify (7)\*(C'\fR support has been compiled into libev and present at
runtime, it will be used to speed up change detection where possible. The
inotify descriptor will be created lazily when the first \f(CW\*(C`ev_stat\*(C'\fR
watcher is being started.
.PP
Inotify presence does not change the semantics of \f(CW\*(C`ev_stat\*(C'\fR watchers
except that changes might be detected earlier, and in some cases, to avoid
making regular \f(CW\*(C`stat\*(C'\fR calls. Even in the presence of inotify support
there are many cases where libev has to resort to regular \f(CW\*(C`stat\*(C'\fR polling,
but as long as kernel 2.6.25 or newer is used (2.6.24 and older have too
many bugs), the path exists (i.e. stat succeeds), and the path resides on
a local filesystem (libev currently assumes only ext2/3, jfs, reiserfs and
xfs are fully working) libev usually gets away without polling.
.PP
There is no support for kqueue, as apparently it cannot be used to
implement this functionality, due to the requirement of having a file
descriptor open on the object at all times, and detecting renames, unlinks
etc. is difficult.
.PP
\fI\f(CI\*(C`stat ()\*(C'\fI is a synchronous operation\fR
.IX Subsection "stat () is a synchronous operation"
.PP
Libev doesn't normally do any kind of I/O itself, and so is not blocking
the process. The exception are \f(CW\*(C`ev_stat\*(C'\fR watchers \- those call \f(CW\*(C`stat
()\*(C'\fR, which is a synchronous operation.
.PP
For local paths, this usually doesn't matter: unless the system is very
busy or the intervals between stat's are large, a stat call will be fast,
as the path data is usually in memory already (except when starting the
watcher).
.PP
For networked file systems, calling \f(CW\*(C`stat ()\*(C'\fR can block an indefinite
time due to network issues, and even under good conditions, a stat call
often takes multiple milliseconds.
.PP
Therefore, it is best to avoid using \f(CW\*(C`ev_stat\*(C'\fR watchers on networked
paths, although this is fully supported by libev.
.PP
\fIThe special problem of stat time resolution\fR
.IX Subsection "The special problem of stat time resolution"
.PP
The \f(CW\*(C`stat ()\*(C'\fR system call only supports full-second resolution portably,
and even on systems where the resolution is higher, most file systems
still only support whole seconds.
.PP
That means that, if the time is the only thing that changes, you can
easily miss updates: on the first update, \f(CW\*(C`ev_stat\*(C'\fR detects a change and
calls your callback, which does something. When there is another update
within the same second, \f(CW\*(C`ev_stat\*(C'\fR will be unable to detect unless the
stat data does change in other ways (e.g. file size).
.PP
The solution to this is to delay acting on a change for slightly more
than a second (or till slightly after the next full second boundary), using
a roughly one-second-delay \f(CW\*(C`ev_timer\*(C'\fR (e.g. \f(CW\*(C`ev_timer_set (w, 0., 1.02);
ev_timer_again (loop, w)\*(C'\fR).
.PP
The \f(CW.02\fR offset is added to work around small timing inconsistencies
of some operating systems (where the second counter of the current time
might be be delayed. One such system is the Linux kernel, where a call to
\&\f(CW\*(C`gettimeofday\*(C'\fR might return a timestamp with a full second later than
a subsequent \f(CW\*(C`time\*(C'\fR call \- if the equivalent of \f(CW\*(C`time ()\*(C'\fR is used to
update file times then there will be a small window where the kernel uses
the previous second to update file times but libev might already execute
the timer callback).
.PP
\fIWatcher-Specific Functions and Data Members\fR
.IX Subsection "Watcher-Specific Functions and Data Members"
.IP "ev_stat_init (ev_stat *, callback, const char *path, ev_tstamp interval)" 4
.IX Item "ev_stat_init (ev_stat *, callback, const char *path, ev_tstamp interval)"
.PD 0
.IP "ev_stat_set (ev_stat *, const char *path, ev_tstamp interval)" 4
.IX Item "ev_stat_set (ev_stat *, const char *path, ev_tstamp interval)"
.PD
Configures the watcher to wait for status changes of the given
\&\f(CW\*(C`path\*(C'\fR. The \f(CW\*(C`interval\*(C'\fR is a hint on how quickly a change is expected to
be detected and should normally be specified as \f(CW0\fR to let libev choose
a suitable value. The memory pointed to by \f(CW\*(C`path\*(C'\fR must point to the same
path for as long as the watcher is active.
.Sp
The callback will receive an \f(CW\*(C`EV_STAT\*(C'\fR event when a change was detected,
relative to the attributes at the time the watcher was started (or the
last change was detected).
.IP "ev_stat_stat (loop, ev_stat *)" 4
.IX Item "ev_stat_stat (loop, ev_stat *)"
Updates the stat buffer immediately with new values. If you change the
watched path in your callback, you could call this function to avoid
detecting this change (while introducing a race condition if you are not
the only one changing the path). Can also be useful simply to find out the
new values.
.IP "ev_statdata attr [read\-only]" 4
.IX Item "ev_statdata attr [read-only]"
The most-recently detected attributes of the file. Although the type is
\&\f(CW\*(C`ev_statdata\*(C'\fR, this is usually the (or one of the) \f(CW\*(C`struct stat\*(C'\fR types
suitable for your system, but you can only rely on the POSIX-standardised
members to be present. If the \f(CW\*(C`st_nlink\*(C'\fR member is \f(CW0\fR, then there was
some error while \f(CW\*(C`stat\*(C'\fRing the file.
.IP "ev_statdata prev [read\-only]" 4
.IX Item "ev_statdata prev [read-only]"
The previous attributes of the file. The callback gets invoked whenever
\&\f(CW\*(C`prev\*(C'\fR != \f(CW\*(C`attr\*(C'\fR, or, more precisely, one or more of these members
differ: \f(CW\*(C`st_dev\*(C'\fR, \f(CW\*(C`st_ino\*(C'\fR, \f(CW\*(C`st_mode\*(C'\fR, \f(CW\*(C`st_nlink\*(C'\fR, \f(CW\*(C`st_uid\*(C'\fR,
\&\f(CW\*(C`st_gid\*(C'\fR, \f(CW\*(C`st_rdev\*(C'\fR, \f(CW\*(C`st_size\*(C'\fR, \f(CW\*(C`st_atime\*(C'\fR, \f(CW\*(C`st_mtime\*(C'\fR, \f(CW\*(C`st_ctime\*(C'\fR.
.IP "ev_tstamp interval [read\-only]" 4
.IX Item "ev_tstamp interval [read-only]"
The specified interval.
.IP "const char *path [read\-only]" 4
.IX Item "const char *path [read-only]"
The file system path that is being watched.
.PP
\fIExamples\fR
.IX Subsection "Examples"
.PP
Example: Watch \f(CW\*(C`/etc/passwd\*(C'\fR for attribute changes.
.PP
.Vb 10
\&   static void
\&   passwd_cb (struct ev_loop *loop, ev_stat *w, int revents)
\&   {
\&     /* /etc/passwd changed in some way */
\&     if (w\->attr.st_nlink)
\&       {
\&         printf ("passwd current size  %ld\en", (long)w\->attr.st_size);
\&         printf ("passwd current atime %ld\en", (long)w\->attr.st_mtime);
\&         printf ("passwd current mtime %ld\en", (long)w\->attr.st_mtime);
\&       }
\&     else
\&       /* you shalt not abuse printf for puts */
\&       puts ("wow, /etc/passwd is not there, expect problems. "
\&             "if this is windows, they already arrived\en");
\&   }
\&
\&   ...
\&   ev_stat passwd;
\&
\&   ev_stat_init (&passwd, passwd_cb, "/etc/passwd", 0.);
\&   ev_stat_start (loop, &passwd);
.Ve
.PP
Example: Like above, but additionally use a one-second delay so we do not
miss updates (however, frequent updates will delay processing, too, so
one might do the work both on \f(CW\*(C`ev_stat\*(C'\fR callback invocation \fIand\fR on
\&\f(CW\*(C`ev_timer\*(C'\fR callback invocation).
.PP
.Vb 2
\&   static ev_stat passwd;
\&   static ev_timer timer;
\&
\&   static void
\&   timer_cb (EV_P_ ev_timer *w, int revents)
\&   {
\&     ev_timer_stop (EV_A_ w);
\&
\&     /* now it\*(Aqs one second after the most recent passwd change */
\&   }
\&
\&   static void
\&   stat_cb (EV_P_ ev_stat *w, int revents)
\&   {
\&     /* reset the one\-second timer */
\&     ev_timer_again (EV_A_ &timer);
\&   }
\&
\&   ...
\&   ev_stat_init (&passwd, stat_cb, "/etc/passwd", 0.);
\&   ev_stat_start (loop, &passwd);
\&   ev_timer_init (&timer, timer_cb, 0., 1.02);
.Ve
.ie n .SS """ev_idle"" \- when you've got nothing better to do..."
.el .SS "\f(CWev_idle\fP \- when you've got nothing better to do..."
.IX Subsection "ev_idle - when you've got nothing better to do..."
Idle watchers trigger events when no other events of the same or higher
priority are pending (prepare, check and other idle watchers do not count
as receiving \*(L"events\*(R").
.PP
That is, as long as your process is busy handling sockets or timeouts
(or even signals, imagine) of the same or higher priority it will not be
triggered. But when your process is idle (or only lower-priority watchers
are pending), the idle watchers are being called once per event loop
iteration \- until stopped, that is, or your process receives more events
and becomes busy again with higher priority stuff.
.PP
The most noteworthy effect is that as long as any idle watchers are
active, the process will not block when waiting for new events.
.PP
Apart from keeping your process non-blocking (which is a useful
effect on its own sometimes), idle watchers are a good place to do
\&\*(L"pseudo-background processing\*(R", or delay processing stuff to after the
event loop has handled all outstanding events.
.PP
\fIAbusing an \f(CI\*(C`ev_idle\*(C'\fI watcher for its side-effect\fR
.IX Subsection "Abusing an ev_idle watcher for its side-effect"
.PP
As long as there is at least one active idle watcher, libev will never
sleep unnecessarily. Or in other words, it will loop as fast as possible.
For this to work, the idle watcher doesn't need to be invoked at all \- the
lowest priority will do.
.PP
This mode of operation can be useful together with an \f(CW\*(C`ev_check\*(C'\fR watcher,
to do something on each event loop iteration \- for example to balance load
between different connections.
.PP
See \*(L"Abusing an ev_check watcher for its side-effect\*(R" for a longer
example.
.PP
\fIWatcher-Specific Functions and Data Members\fR
.IX Subsection "Watcher-Specific Functions and Data Members"
.IP "ev_idle_init (ev_idle *, callback)" 4
.IX Item "ev_idle_init (ev_idle *, callback)"
Initialises and configures the idle watcher \- it has no parameters of any
kind. There is a \f(CW\*(C`ev_idle_set\*(C'\fR macro, but using it is utterly pointless,
believe me.
.PP
\fIExamples\fR
.IX Subsection "Examples"
.PP
Example: Dynamically allocate an \f(CW\*(C`ev_idle\*(C'\fR watcher, start it, and in the
callback, free it. Also, use no error checking, as usual.
.PP
.Vb 5
\&   static void
\&   idle_cb (struct ev_loop *loop, ev_idle *w, int revents)
\&   {
\&     // stop the watcher
\&     ev_idle_stop (loop, w);
\&
\&     // now we can free it
\&     free (w);
\&
\&     // now do something you wanted to do when the program has
\&     // no longer anything immediate to do.
\&   }
\&
\&   ev_idle *idle_watcher = malloc (sizeof (ev_idle));
\&   ev_idle_init (idle_watcher, idle_cb);
\&   ev_idle_start (loop, idle_watcher);
.Ve
.ie n .SS """ev_prepare"" and ""ev_check"" \- customise your event loop!"
.el .SS "\f(CWev_prepare\fP and \f(CWev_check\fP \- customise your event loop!"
.IX Subsection "ev_prepare and ev_check - customise your event loop!"
Prepare and check watchers are often (but not always) used in pairs:
prepare watchers get invoked before the process blocks and check watchers
afterwards.
.PP
You \fImust not\fR call \f(CW\*(C`ev_run\*(C'\fR (or similar functions that enter the
current event loop) or \f(CW\*(C`ev_loop_fork\*(C'\fR from either \f(CW\*(C`ev_prepare\*(C'\fR or
\&\f(CW\*(C`ev_check\*(C'\fR watchers. Other loops than the current one are fine,
however. The rationale behind this is that you do not need to check
for recursion in those watchers, i.e. the sequence will always be
\&\f(CW\*(C`ev_prepare\*(C'\fR, blocking, \f(CW\*(C`ev_check\*(C'\fR so if you have one watcher of each
kind they will always be called in pairs bracketing the blocking call.
.PP
Their main purpose is to integrate other event mechanisms into libev and
their use is somewhat advanced. They could be used, for example, to track
variable changes, implement your own watchers, integrate net-snmp or a
coroutine library and lots more. They are also occasionally useful if
you cache some data and want to flush it before blocking (for example,
in X programs you might want to do an \f(CW\*(C`XFlush ()\*(C'\fR in an \f(CW\*(C`ev_prepare\*(C'\fR
watcher).
.PP
This is done by examining in each prepare call which file descriptors
need to be watched by the other library, registering \f(CW\*(C`ev_io\*(C'\fR watchers
for them and starting an \f(CW\*(C`ev_timer\*(C'\fR watcher for any timeouts (many
libraries provide exactly this functionality). Then, in the check watcher,
you check for any events that occurred (by checking the pending status
of all watchers and stopping them) and call back into the library. The
I/O and timer callbacks will never actually be called (but must be valid
nevertheless, because you never know, you know?).
.PP
As another example, the Perl Coro module uses these hooks to integrate
coroutines into libev programs, by yielding to other active coroutines
during each prepare and only letting the process block if no coroutines
are ready to run (it's actually more complicated: it only runs coroutines
with priority higher than or equal to the event loop and one coroutine
of lower priority, but only once, using idle watchers to keep the event
loop from blocking if lower-priority coroutines are active, thus mapping
low-priority coroutines to idle/background tasks).
.PP
When used for this purpose, it is recommended to give \f(CW\*(C`ev_check\*(C'\fR watchers
highest (\f(CW\*(C`EV_MAXPRI\*(C'\fR) priority, to ensure that they are being run before
any other watchers after the poll (this doesn't matter for \f(CW\*(C`ev_prepare\*(C'\fR
watchers).
.PP
Also, \f(CW\*(C`ev_check\*(C'\fR watchers (and \f(CW\*(C`ev_prepare\*(C'\fR watchers, too) should not
activate (\*(L"feed\*(R") events into libev. While libev fully supports this, they
might get executed before other \f(CW\*(C`ev_check\*(C'\fR watchers did their job. As
\&\f(CW\*(C`ev_check\*(C'\fR watchers are often used to embed other (non-libev) event
loops those other event loops might be in an unusable state until their
\&\f(CW\*(C`ev_check\*(C'\fR watcher ran (always remind yourself to coexist peacefully with
others).
.PP
\fIAbusing an \f(CI\*(C`ev_check\*(C'\fI watcher for its side-effect\fR
.IX Subsection "Abusing an ev_check watcher for its side-effect"
.PP
\&\f(CW\*(C`ev_check\*(C'\fR (and less often also \f(CW\*(C`ev_prepare\*(C'\fR) watchers can also be
useful because they are called once per event loop iteration. For
example, if you want to handle a large number of connections fairly, you
normally only do a bit of work for each active connection, and if there
is more work to do, you wait for the next event loop iteration, so other
connections have a chance of making progress.
.PP
Using an \f(CW\*(C`ev_check\*(C'\fR watcher is almost enough: it will be called on the
next event loop iteration. However, that isn't as soon as possible \-
without external events, your \f(CW\*(C`ev_check\*(C'\fR watcher will not be invoked.
.PP
This is where \f(CW\*(C`ev_idle\*(C'\fR watchers come in handy \- all you need is a
single global idle watcher that is active as long as you have one active
\&\f(CW\*(C`ev_check\*(C'\fR watcher. The \f(CW\*(C`ev_idle\*(C'\fR watcher makes sure the event loop
will not sleep, and the \f(CW\*(C`ev_check\*(C'\fR watcher makes sure a callback gets
invoked. Neither watcher alone can do that.
.PP
\fIWatcher-Specific Functions and Data Members\fR
.IX Subsection "Watcher-Specific Functions and Data Members"
.IP "ev_prepare_init (ev_prepare *, callback)" 4
.IX Item "ev_prepare_init (ev_prepare *, callback)"
.PD 0
.IP "ev_check_init (ev_check *, callback)" 4
.IX Item "ev_check_init (ev_check *, callback)"
.PD
Initialises and configures the prepare or check watcher \- they have no
parameters of any kind. There are \f(CW\*(C`ev_prepare_set\*(C'\fR and \f(CW\*(C`ev_check_set\*(C'\fR
macros, but using them is utterly, utterly, utterly and completely
pointless.
.PP
\fIExamples\fR
.IX Subsection "Examples"
.PP
There are a number of principal ways to embed other event loops or modules
into libev. Here are some ideas on how to include libadns into libev
(there is a Perl module named \f(CW\*(C`EV::ADNS\*(C'\fR that does this, which you could
use as a working example. Another Perl module named \f(CW\*(C`EV::Glib\*(C'\fR embeds a
Glib main context into libev, and finally, \f(CW\*(C`Glib::EV\*(C'\fR embeds \s-1EV\s0 into the
Glib event loop).
.PP
Method 1: Add \s-1IO\s0 watchers and a timeout watcher in a prepare handler,
and in a check watcher, destroy them and call into libadns. What follows
is pseudo-code only of course. This requires you to either use a low
priority for the check watcher or use \f(CW\*(C`ev_clear_pending\*(C'\fR explicitly, as
the callbacks for the IO/timeout watchers might not have been called yet.
.PP
.Vb 2
\&   static ev_io iow [nfd];
\&   static ev_timer tw;
\&
\&   static void
\&   io_cb (struct ev_loop *loop, ev_io *w, int revents)
\&   {
\&   }
\&
\&   // create io watchers for each fd and a timer before blocking
\&   static void
\&   adns_prepare_cb (struct ev_loop *loop, ev_prepare *w, int revents)
\&   {
\&     int timeout = 3600000;
\&     struct pollfd fds [nfd];
\&     // actual code will need to loop here and realloc etc.
\&     adns_beforepoll (ads, fds, &nfd, &timeout, timeval_from (ev_time ()));
\&
\&     /* the callback is illegal, but won\*(Aqt be called as we stop during check */
\&     ev_timer_init (&tw, 0, timeout * 1e\-3, 0.);
\&     ev_timer_start (loop, &tw);
\&
\&     // create one ev_io per pollfd
\&     for (int i = 0; i < nfd; ++i)
\&       {
\&         ev_io_init (iow + i, io_cb, fds [i].fd,
\&           ((fds [i].events & POLLIN ? EV_READ : 0)
\&            | (fds [i].events & POLLOUT ? EV_WRITE : 0)));
\&
\&         fds [i].revents = 0;
\&         ev_io_start (loop, iow + i);
\&       }
\&   }
\&
\&   // stop all watchers after blocking
\&   static void
\&   adns_check_cb (struct ev_loop *loop, ev_check *w, int revents)
\&   {
\&     ev_timer_stop (loop, &tw);
\&
\&     for (int i = 0; i < nfd; ++i)
\&       {
\&         // set the relevant poll flags
\&         // could also call adns_processreadable etc. here
\&         struct pollfd *fd = fds + i;
\&         int revents = ev_clear_pending (iow + i);
\&         if (revents & EV_READ ) fd\->revents |= fd\->events & POLLIN;
\&         if (revents & EV_WRITE) fd\->revents |= fd\->events & POLLOUT;
\&
\&         // now stop the watcher
\&         ev_io_stop (loop, iow + i);
\&       }
\&
\&     adns_afterpoll (adns, fds, nfd, timeval_from (ev_now (loop));
\&   }
.Ve
.PP
Method 2: This would be just like method 1, but you run \f(CW\*(C`adns_afterpoll\*(C'\fR
in the prepare watcher and would dispose of the check watcher.
.PP
Method 3: If the module to be embedded supports explicit event
notification (libadns does), you can also make use of the actual watcher
callbacks, and only destroy/create the watchers in the prepare watcher.
.PP
.Vb 5
\&   static void
\&   timer_cb (EV_P_ ev_timer *w, int revents)
\&   {
\&     adns_state ads = (adns_state)w\->data;
\&     update_now (EV_A);
\&
\&     adns_processtimeouts (ads, &tv_now);
\&   }
\&
\&   static void
\&   io_cb (EV_P_ ev_io *w, int revents)
\&   {
\&     adns_state ads = (adns_state)w\->data;
\&     update_now (EV_A);
\&
\&     if (revents & EV_READ ) adns_processreadable  (ads, w\->fd, &tv_now);
\&     if (revents & EV_WRITE) adns_processwriteable (ads, w\->fd, &tv_now);
\&   }
\&
\&   // do not ever call adns_afterpoll
.Ve
.PP
Method 4: Do not use a prepare or check watcher because the module you
want to embed is not flexible enough to support it. Instead, you can
override their poll function. The drawback with this solution is that the
main loop is now no longer controllable by \s-1EV.\s0 The \f(CW\*(C`Glib::EV\*(C'\fR module uses
this approach, effectively embedding \s-1EV\s0 as a client into the horrible
libglib event loop.
.PP
.Vb 4
\&   static gint
\&   event_poll_func (GPollFD *fds, guint nfds, gint timeout)
\&   {
\&     int got_events = 0;
\&
\&     for (n = 0; n < nfds; ++n)
\&       // create/start io watcher that sets the relevant bits in fds[n] and increment got_events
\&
\&     if (timeout >= 0)
\&       // create/start timer
\&
\&     // poll
\&     ev_run (EV_A_ 0);
\&
\&     // stop timer again
\&     if (timeout >= 0)
\&       ev_timer_stop (EV_A_ &to);
\&
\&     // stop io watchers again \- their callbacks should have set
\&     for (n = 0; n < nfds; ++n)
\&       ev_io_stop (EV_A_ iow [n]);
\&
\&     return got_events;
\&   }
.Ve
.ie n .SS """ev_embed"" \- when one backend isn't enough..."
.el .SS "\f(CWev_embed\fP \- when one backend isn't enough..."
.IX Subsection "ev_embed - when one backend isn't enough..."
This is a rather advanced watcher type that lets you embed one event loop
into another (currently only \f(CW\*(C`ev_io\*(C'\fR events are supported in the embedded
loop, other types of watchers might be handled in a delayed or incorrect
fashion and must not be used).
.PP
There are primarily two reasons you would want that: work around bugs and
prioritise I/O.
.PP
As an example for a bug workaround, the kqueue backend might only support
sockets on some platform, so it is unusable as generic backend, but you
still want to make use of it because you have many sockets and it scales
so nicely. In this case, you would create a kqueue-based loop and embed
it into your default loop (which might use e.g. poll). Overall operation
will be a bit slower because first libev has to call \f(CW\*(C`poll\*(C'\fR and then
\&\f(CW\*(C`kevent\*(C'\fR, but at least you can use both mechanisms for what they are
best: \f(CW\*(C`kqueue\*(C'\fR for scalable sockets and \f(CW\*(C`poll\*(C'\fR if you want it to work :)
.PP
As for prioritising I/O: under rare circumstances you have the case where
some fds have to be watched and handled very quickly (with low latency),
and even priorities and idle watchers might have too much overhead. In
this case you would put all the high priority stuff in one loop and all
the rest in a second one, and embed the second one in the first.
.PP
As long as the watcher is active, the callback will be invoked every
time there might be events pending in the embedded loop. The callback
must then call \f(CW\*(C`ev_embed_sweep (mainloop, watcher)\*(C'\fR to make a single
sweep and invoke their callbacks (the callback doesn't need to invoke the
\&\f(CW\*(C`ev_embed_sweep\*(C'\fR function directly, it could also start an idle watcher
to give the embedded loop strictly lower priority for example).
.PP
You can also set the callback to \f(CW0\fR, in which case the embed watcher
will automatically execute the embedded loop sweep whenever necessary.
.PP
Fork detection will be handled transparently while the \f(CW\*(C`ev_embed\*(C'\fR watcher
is active, i.e., the embedded loop will automatically be forked when the
embedding loop forks. In other cases, the user is responsible for calling
\&\f(CW\*(C`ev_loop_fork\*(C'\fR on the embedded loop.
.PP
Unfortunately, not all backends are embeddable: only the ones returned by
\&\f(CW\*(C`ev_embeddable_backends\*(C'\fR are, which, unfortunately, does not include any
portable one.
.PP
So when you want to use this feature you will always have to be prepared
that you cannot get an embeddable loop. The recommended way to get around
this is to have a separate variables for your embeddable loop, try to
create it, and if that fails, use the normal loop for everything.
.PP
\fI\f(CI\*(C`ev_embed\*(C'\fI and fork\fR
.IX Subsection "ev_embed and fork"
.PP
While the \f(CW\*(C`ev_embed\*(C'\fR watcher is running, forks in the embedding loop will
automatically be applied to the embedded loop as well, so no special
fork handling is required in that case. When the watcher is not running,
however, it is still the task of the libev user to call \f(CW\*(C`ev_loop_fork ()\*(C'\fR
as applicable.
.PP
\fIWatcher-Specific Functions and Data Members\fR
.IX Subsection "Watcher-Specific Functions and Data Members"
.IP "ev_embed_init (ev_embed *, callback, struct ev_loop *embedded_loop)" 4
.IX Item "ev_embed_init (ev_embed *, callback, struct ev_loop *embedded_loop)"
.PD 0
.IP "ev_embed_set (ev_embed *, struct ev_loop *embedded_loop)" 4
.IX Item "ev_embed_set (ev_embed *, struct ev_loop *embedded_loop)"
.PD
Configures the watcher to embed the given loop, which must be
embeddable. If the callback is \f(CW0\fR, then \f(CW\*(C`ev_embed_sweep\*(C'\fR will be
invoked automatically, otherwise it is the responsibility of the callback
to invoke it (it will continue to be called until the sweep has been done,
if you do not want that, you need to temporarily stop the embed watcher).
.IP "ev_embed_sweep (loop, ev_embed *)" 4
.IX Item "ev_embed_sweep (loop, ev_embed *)"
Make a single, non-blocking sweep over the embedded loop. This works
similarly to \f(CW\*(C`ev_run (embedded_loop, EVRUN_NOWAIT)\*(C'\fR, but in the most
appropriate way for embedded loops.
.IP "struct ev_loop *other [read\-only]" 4
.IX Item "struct ev_loop *other [read-only]"
The embedded event loop.
.PP
\fIExamples\fR
.IX Subsection "Examples"
.PP
Example: Try to get an embeddable event loop and embed it into the default
event loop. If that is not possible, use the default loop. The default
loop is stored in \f(CW\*(C`loop_hi\*(C'\fR, while the embeddable loop is stored in
\&\f(CW\*(C`loop_lo\*(C'\fR (which is \f(CW\*(C`loop_hi\*(C'\fR in the case no embeddable loop can be
used).
.PP
.Vb 3
\&   struct ev_loop *loop_hi = ev_default_init (0);
\&   struct ev_loop *loop_lo = 0;
\&   ev_embed embed;
\&
\&   // see if there is a chance of getting one that works
\&   // (remember that a flags value of 0 means autodetection)
\&   loop_lo = ev_embeddable_backends () & ev_recommended_backends ()
\&     ? ev_loop_new (ev_embeddable_backends () & ev_recommended_backends ())
\&     : 0;
\&
\&   // if we got one, then embed it, otherwise default to loop_hi
\&   if (loop_lo)
\&     {
\&       ev_embed_init (&embed, 0, loop_lo);
\&       ev_embed_start (loop_hi, &embed);
\&     }
\&   else
\&     loop_lo = loop_hi;
.Ve
.PP
Example: Check if kqueue is available but not recommended and create
a kqueue backend for use with sockets (which usually work with any
kqueue implementation). Store the kqueue/socket\-only event loop in
\&\f(CW\*(C`loop_socket\*(C'\fR. (One might optionally use \f(CW\*(C`EVFLAG_NOENV\*(C'\fR, too).
.PP
.Vb 3
\&   struct ev_loop *loop = ev_default_init (0);
\&   struct ev_loop *loop_socket = 0;
\&   ev_embed embed;
\&
\&   if (ev_supported_backends () & ~ev_recommended_backends () & EVBACKEND_KQUEUE)
\&     if ((loop_socket = ev_loop_new (EVBACKEND_KQUEUE))
\&       {
\&         ev_embed_init (&embed, 0, loop_socket);
\&         ev_embed_start (loop, &embed);
\&       }
\&
\&   if (!loop_socket)
\&     loop_socket = loop;
\&
\&   // now use loop_socket for all sockets, and loop for everything else
.Ve
.ie n .SS """ev_fork"" \- the audacity to resume the event loop after a fork"
.el .SS "\f(CWev_fork\fP \- the audacity to resume the event loop after a fork"
.IX Subsection "ev_fork - the audacity to resume the event loop after a fork"
Fork watchers are called when a \f(CW\*(C`fork ()\*(C'\fR was detected (usually because
whoever is a good citizen cared to tell libev about it by calling
\&\f(CW\*(C`ev_loop_fork\*(C'\fR). The invocation is done before the event loop blocks next
and before \f(CW\*(C`ev_check\*(C'\fR watchers are being called, and only in the child
after the fork. If whoever good citizen calling \f(CW\*(C`ev_default_fork\*(C'\fR cheats
and calls it in the wrong process, the fork handlers will be invoked, too,
of course.
.PP
\fIThe special problem of life after fork \- how is it possible?\fR
.IX Subsection "The special problem of life after fork - how is it possible?"
.PP
Most uses of \f(CW\*(C`fork ()\*(C'\fR consist of forking, then some simple calls to set
up/change the process environment, followed by a call to \f(CW\*(C`exec()\*(C'\fR. This
sequence should be handled by libev without any problems.
.PP
This changes when the application actually wants to do event handling
in the child, or both parent in child, in effect \*(L"continuing\*(R" after the
fork.
.PP
The default mode of operation (for libev, with application help to detect
forks) is to duplicate all the state in the child, as would be expected
when \fIeither\fR the parent \fIor\fR the child process continues.
.PP
When both processes want to continue using libev, then this is usually the
wrong result. In that case, usually one process (typically the parent) is
supposed to continue with all watchers in place as before, while the other
process typically wants to start fresh, i.e. without any active watchers.
.PP
The cleanest and most efficient way to achieve that with libev is to
simply create a new event loop, which of course will be \*(L"empty\*(R", and
use that for new watchers. This has the advantage of not touching more
memory than necessary, and thus avoiding the copy-on-write, and the
disadvantage of having to use multiple event loops (which do not support
signal watchers).
.PP
When this is not possible, or you want to use the default loop for
other reasons, then in the process that wants to start \*(L"fresh\*(R", call
\&\f(CW\*(C`ev_loop_destroy (EV_DEFAULT)\*(C'\fR followed by \f(CW\*(C`ev_default_loop (...)\*(C'\fR.
Destroying the default loop will \*(L"orphan\*(R" (not stop) all registered
watchers, so you have to be careful not to execute code that modifies
those watchers. Note also that in that case, you have to re-register any
signal watchers.
.PP
\fIWatcher-Specific Functions and Data Members\fR
.IX Subsection "Watcher-Specific Functions and Data Members"
.IP "ev_fork_init (ev_fork *, callback)" 4
.IX Item "ev_fork_init (ev_fork *, callback)"
Initialises and configures the fork watcher \- it has no parameters of any
kind. There is a \f(CW\*(C`ev_fork_set\*(C'\fR macro, but using it is utterly pointless,
really.
.ie n .SS """ev_cleanup"" \- even the best things end"
.el .SS "\f(CWev_cleanup\fP \- even the best things end"
.IX Subsection "ev_cleanup - even the best things end"
Cleanup watchers are called just before the event loop is being destroyed
by a call to \f(CW\*(C`ev_loop_destroy\*(C'\fR.
.PP
While there is no guarantee that the event loop gets destroyed, cleanup
watchers provide a convenient method to install cleanup hooks for your
program, worker threads and so on \- you just to make sure to destroy the
loop when you want them to be invoked.
.PP
Cleanup watchers are invoked in the same way as any other watcher. Unlike
all other watchers, they do not keep a reference to the event loop (which
makes a lot of sense if you think about it). Like all other watchers, you
can call libev functions in the callback, except \f(CW\*(C`ev_cleanup_start\*(C'\fR.
.PP
\fIWatcher-Specific Functions and Data Members\fR
.IX Subsection "Watcher-Specific Functions and Data Members"
.IP "ev_cleanup_init (ev_cleanup *, callback)" 4
.IX Item "ev_cleanup_init (ev_cleanup *, callback)"
Initialises and configures the cleanup watcher \- it has no parameters of
any kind. There is a \f(CW\*(C`ev_cleanup_set\*(C'\fR macro, but using it is utterly
pointless, I assure you.
.PP
Example: Register an atexit handler to destroy the default loop, so any
cleanup functions are called.
.PP
.Vb 5
\&   static void
\&   program_exits (void)
\&   {
\&     ev_loop_destroy (EV_DEFAULT_UC);
\&   }
\&
\&   ...
\&   atexit (program_exits);
.Ve
.ie n .SS """ev_async"" \- how to wake up an event loop"
.el .SS "\f(CWev_async\fP \- how to wake up an event loop"
.IX Subsection "ev_async - how to wake up an event loop"
In general, you cannot use an \f(CW\*(C`ev_loop\*(C'\fR from multiple threads or other
asynchronous sources such as signal handlers (as opposed to multiple event
loops \- those are of course safe to use in different threads).
.PP
Sometimes, however, you need to wake up an event loop you do not control,
for example because it belongs to another thread. This is what \f(CW\*(C`ev_async\*(C'\fR
watchers do: as long as the \f(CW\*(C`ev_async\*(C'\fR watcher is active, you can signal
it by calling \f(CW\*(C`ev_async_send\*(C'\fR, which is thread\- and signal safe.
.PP
This functionality is very similar to \f(CW\*(C`ev_signal\*(C'\fR watchers, as signals,
too, are asynchronous in nature, and signals, too, will be compressed
(i.e. the number of callback invocations may be less than the number of
\&\f(CW\*(C`ev_async_send\*(C'\fR calls). In fact, you could use signal watchers as a kind
of \*(L"global async watchers\*(R" by using a watcher on an otherwise unused
signal, and \f(CW\*(C`ev_feed_signal\*(C'\fR to signal this watcher from another thread,
even without knowing which loop owns the signal.
.PP
\fIQueueing\fR
.IX Subsection "Queueing"
.PP
\&\f(CW\*(C`ev_async\*(C'\fR does not support queueing of data in any way. The reason
is that the author does not know of a simple (or any) algorithm for a
multiple-writer-single-reader queue that works in all cases and doesn't
need elaborate support such as pthreads or unportable memory access
semantics.
.PP
That means that if you want to queue data, you have to provide your own
queue. But at least I can tell you how to implement locking around your
queue:
.IP "queueing from a signal handler context" 4
.IX Item "queueing from a signal handler context"
To implement race-free queueing, you simply add to the queue in the signal
handler but you block the signal handler in the watcher callback. Here is
an example that does that for some fictitious \s-1SIGUSR1\s0 handler:
.Sp
.Vb 1
\&   static ev_async mysig;
\&
\&   static void
\&   sigusr1_handler (void)
\&   {
\&     sometype data;
\&
\&     // no locking etc.
\&     queue_put (data);
\&     ev_async_send (EV_DEFAULT_ &mysig);
\&   }
\&
\&   static void
\&   mysig_cb (EV_P_ ev_async *w, int revents)
\&   {
\&     sometype data;
\&     sigset_t block, prev;
\&
\&     sigemptyset (&block);
\&     sigaddset (&block, SIGUSR1);
\&     sigprocmask (SIG_BLOCK, &block, &prev);
\&
\&     while (queue_get (&data))
\&       process (data);
\&
\&     if (sigismember (&prev, SIGUSR1)
\&       sigprocmask (SIG_UNBLOCK, &block, 0);
\&   }
.Ve
.Sp
(Note: pthreads in theory requires you to use \f(CW\*(C`pthread_setmask\*(C'\fR
instead of \f(CW\*(C`sigprocmask\*(C'\fR when you use threads, but libev doesn't do it
either...).
.IP "queueing from a thread context" 4
.IX Item "queueing from a thread context"
The strategy for threads is different, as you cannot (easily) block
threads but you can easily preempt them, so to queue safely you need to
employ a traditional mutex lock, such as in this pthread example:
.Sp
.Vb 2
\&   static ev_async mysig;
\&   static pthread_mutex_t mymutex = PTHREAD_MUTEX_INITIALIZER;
\&
\&   static void
\&   otherthread (void)
\&   {
\&     // only need to lock the actual queueing operation
\&     pthread_mutex_lock (&mymutex);
\&     queue_put (data);
\&     pthread_mutex_unlock (&mymutex);
\&
\&     ev_async_send (EV_DEFAULT_ &mysig);
\&   }
\&
\&   static void
\&   mysig_cb (EV_P_ ev_async *w, int revents)
\&   {
\&     pthread_mutex_lock (&mymutex);
\&
\&     while (queue_get (&data))
\&       process (data);
\&
\&     pthread_mutex_unlock (&mymutex);
\&   }
.Ve
.PP
\fIWatcher-Specific Functions and Data Members\fR
.IX Subsection "Watcher-Specific Functions and Data Members"
.IP "ev_async_init (ev_async *, callback)" 4
.IX Item "ev_async_init (ev_async *, callback)"
Initialises and configures the async watcher \- it has no parameters of any
kind. There is a \f(CW\*(C`ev_async_set\*(C'\fR macro, but using it is utterly pointless,
trust me.
.IP "ev_async_send (loop, ev_async *)" 4
.IX Item "ev_async_send (loop, ev_async *)"
Sends/signals/activates the given \f(CW\*(C`ev_async\*(C'\fR watcher, that is, feeds
an \f(CW\*(C`EV_ASYNC\*(C'\fR event on the watcher into the event loop, and instantly
returns.
.Sp
Unlike \f(CW\*(C`ev_feed_event\*(C'\fR, this call is safe to do from other threads,
signal or similar contexts (see the discussion of \f(CW\*(C`EV_ATOMIC_T\*(C'\fR in the
embedding section below on what exactly this means).
.Sp
Note that, as with other watchers in libev, multiple events might get
compressed into a single callback invocation (another way to look at
this is that \f(CW\*(C`ev_async\*(C'\fR watchers are level-triggered: they are set on
\&\f(CW\*(C`ev_async_send\*(C'\fR, reset when the event loop detects that).
.Sp
This call incurs the overhead of at most one extra system call per event
loop iteration, if the event loop is blocked, and no syscall at all if
the event loop (or your program) is processing events. That means that
repeated calls are basically free (there is no need to avoid calls for
performance reasons) and that the overhead becomes smaller (typically
zero) under load.
.IP "bool = ev_async_pending (ev_async *)" 4
.IX Item "bool = ev_async_pending (ev_async *)"
Returns a non-zero value when \f(CW\*(C`ev_async_send\*(C'\fR has been called on the
watcher but the event has not yet been processed (or even noted) by the
event loop.
.Sp
\&\f(CW\*(C`ev_async_send\*(C'\fR sets a flag in the watcher and wakes up the loop. When
the loop iterates next and checks for the watcher to have become active,
it will reset the flag again. \f(CW\*(C`ev_async_pending\*(C'\fR can be used to very
quickly check whether invoking the loop might be a good idea.
.Sp
Not that this does \fInot\fR check whether the watcher itself is pending,
only whether it has been requested to make this watcher pending: there
is a time window between the event loop checking and resetting the async
notification, and the callback being invoked.
.SH "OTHER FUNCTIONS"
.IX Header "OTHER FUNCTIONS"
There are some other functions of possible interest. Described. Here. Now.
.IP "ev_once (loop, int fd, int events, ev_tstamp timeout, callback)" 4
.IX Item "ev_once (loop, int fd, int events, ev_tstamp timeout, callback)"
This function combines a simple timer and an I/O watcher, calls your
callback on whichever event happens first and automatically stops both
watchers. This is useful if you want to wait for a single event on an fd
or timeout without having to allocate/configure/start/stop/free one or
more watchers yourself.
.Sp
If \f(CW\*(C`fd\*(C'\fR is less than 0, then no I/O watcher will be started and the
\&\f(CW\*(C`events\*(C'\fR argument is being ignored. Otherwise, an \f(CW\*(C`ev_io\*(C'\fR watcher for
the given \f(CW\*(C`fd\*(C'\fR and \f(CW\*(C`events\*(C'\fR set will be created and started.
.Sp
If \f(CW\*(C`timeout\*(C'\fR is less than 0, then no timeout watcher will be
started. Otherwise an \f(CW\*(C`ev_timer\*(C'\fR watcher with after = \f(CW\*(C`timeout\*(C'\fR (and
repeat = 0) will be started. \f(CW0\fR is a valid timeout.
.Sp
The callback has the type \f(CW\*(C`void (*cb)(int revents, void *arg)\*(C'\fR and is
passed an \f(CW\*(C`revents\*(C'\fR set like normal event callbacks (a combination of
\&\f(CW\*(C`EV_ERROR\*(C'\fR, \f(CW\*(C`EV_READ\*(C'\fR, \f(CW\*(C`EV_WRITE\*(C'\fR or \f(CW\*(C`EV_TIMER\*(C'\fR) and the \f(CW\*(C`arg\*(C'\fR
value passed to \f(CW\*(C`ev_once\*(C'\fR. Note that it is possible to receive \fIboth\fR
a timeout and an io event at the same time \- you probably should give io
events precedence.
.Sp
Example: wait up to ten seconds for data to appear on \s-1STDIN_FILENO.\s0
.Sp
.Vb 7
\&   static void stdin_ready (int revents, void *arg)
\&   {
\&     if (revents & EV_READ)
\&       /* stdin might have data for us, joy! */;
\&     else if (revents & EV_TIMER)
\&       /* doh, nothing entered */;
\&   }
\&
\&   ev_once (STDIN_FILENO, EV_READ, 10., stdin_ready, 0);
.Ve
.IP "ev_feed_fd_event (loop, int fd, int revents)" 4
.IX Item "ev_feed_fd_event (loop, int fd, int revents)"
Feed an event on the given fd, as if a file descriptor backend detected
the given events.
.IP "ev_feed_signal_event (loop, int signum)" 4
.IX Item "ev_feed_signal_event (loop, int signum)"
Feed an event as if the given signal occurred. See also \f(CW\*(C`ev_feed_signal\*(C'\fR,
which is async-safe.
.SH "COMMON OR USEFUL IDIOMS (OR BOTH)"
.IX Header "COMMON OR USEFUL IDIOMS (OR BOTH)"
This section explains some common idioms that are not immediately
obvious. Note that examples are sprinkled over the whole manual, and this
section only contains stuff that wouldn't fit anywhere else.
.SS "\s-1ASSOCIATING CUSTOM DATA WITH A WATCHER\s0"
.IX Subsection "ASSOCIATING CUSTOM DATA WITH A WATCHER"
Each watcher has, by default, a \f(CW\*(C`void *data\*(C'\fR member that you can read
or modify at any time: libev will completely ignore it. This can be used
to associate arbitrary data with your watcher. If you need more data and
don't want to allocate memory separately and store a pointer to it in that
data member, you can also \*(L"subclass\*(R" the watcher type and provide your own
data:
.PP
.Vb 7
\&   struct my_io
\&   {
\&     ev_io io;
\&     int otherfd;
\&     void *somedata;
\&     struct whatever *mostinteresting;
\&   };
\&
\&   ...
\&   struct my_io w;
\&   ev_io_init (&w.io, my_cb, fd, EV_READ);
.Ve
.PP
And since your callback will be called with a pointer to the watcher, you
can cast it back to your own type:
.PP
.Vb 5
\&   static void my_cb (struct ev_loop *loop, ev_io *w_, int revents)
\&   {
\&     struct my_io *w = (struct my_io *)w_;
\&     ...
\&   }
.Ve
.PP
More interesting and less C\-conformant ways of casting your callback
function type instead have been omitted.
.SS "\s-1BUILDING YOUR OWN COMPOSITE WATCHERS\s0"
.IX Subsection "BUILDING YOUR OWN COMPOSITE WATCHERS"
Another common scenario is to use some data structure with multiple
embedded watchers, in effect creating your own watcher that combines
multiple libev event sources into one \*(L"super-watcher\*(R":
.PP
.Vb 6
\&   struct my_biggy
\&   {
\&     int some_data;
\&     ev_timer t1;
\&     ev_timer t2;
\&   }
.Ve
.PP
In this case getting the pointer to \f(CW\*(C`my_biggy\*(C'\fR is a bit more
complicated: Either you store the address of your \f(CW\*(C`my_biggy\*(C'\fR struct in
the \f(CW\*(C`data\*(C'\fR member of the watcher (for woozies or \*(C+ coders), or you need
to use some pointer arithmetic using \f(CW\*(C`offsetof\*(C'\fR inside your watchers (for
real programmers):
.PP
.Vb 1
\&   #include <stddef.h>
\&
\&   static void
\&   t1_cb (EV_P_ ev_timer *w, int revents)
\&   {
\&     struct my_biggy big = (struct my_biggy *)
\&       (((char *)w) \- offsetof (struct my_biggy, t1));
\&   }
\&
\&   static void
\&   t2_cb (EV_P_ ev_timer *w, int revents)
\&   {
\&     struct my_biggy big = (struct my_biggy *)
\&       (((char *)w) \- offsetof (struct my_biggy, t2));
\&   }
.Ve
.SS "\s-1AVOIDING FINISHING BEFORE RETURNING\s0"
.IX Subsection "AVOIDING FINISHING BEFORE RETURNING"
Often you have structures like this in event-based programs:
.PP
.Vb 4
\&  callback ()
\&  {
\&    free (request);
\&  }
\&
\&  request = start_new_request (..., callback);
.Ve
.PP
The intent is to start some \*(L"lengthy\*(R" operation. The \f(CW\*(C`request\*(C'\fR could be
used to cancel the operation, or do other things with it.
.PP
It's not uncommon to have code paths in \f(CW\*(C`start_new_request\*(C'\fR that
immediately invoke the callback, for example, to report errors. Or you add
some caching layer that finds that it can skip the lengthy aspects of the
operation and simply invoke the callback with the result.
.PP
The problem here is that this will happen \fIbefore\fR \f(CW\*(C`start_new_request\*(C'\fR
has returned, so \f(CW\*(C`request\*(C'\fR is not set.
.PP
Even if you pass the request by some safer means to the callback, you
might want to do something to the request after starting it, such as
canceling it, which probably isn't working so well when the callback has
already been invoked.
.PP
A common way around all these issues is to make sure that
\&\f(CW\*(C`start_new_request\*(C'\fR \fIalways\fR returns before the callback is invoked. If
\&\f(CW\*(C`start_new_request\*(C'\fR immediately knows the result, it can artificially
delay invoking the callback by using a \f(CW\*(C`prepare\*(C'\fR or \f(CW\*(C`idle\*(C'\fR watcher for
example, or more sneakily, by reusing an existing (stopped) watcher and
pushing it into the pending queue:
.PP
.Vb 2
\&   ev_set_cb (watcher, callback);
\&   ev_feed_event (EV_A_ watcher, 0);
.Ve
.PP
This way, \f(CW\*(C`start_new_request\*(C'\fR can safely return before the callback is
invoked, while not delaying callback invocation too much.
.SS "\s-1MODEL/NESTED EVENT LOOP INVOCATIONS AND EXIT CONDITIONS\s0"
.IX Subsection "MODEL/NESTED EVENT LOOP INVOCATIONS AND EXIT CONDITIONS"
Often (especially in \s-1GUI\s0 toolkits) there are places where you have
\&\fImodal\fR interaction, which is most easily implemented by recursively
invoking \f(CW\*(C`ev_run\*(C'\fR.
.PP
This brings the problem of exiting \- a callback might want to finish the
main \f(CW\*(C`ev_run\*(C'\fR call, but not the nested one (e.g. user clicked \*(L"Quit\*(R", but
a modal \*(L"Are you sure?\*(R" dialog is still waiting), or just the nested one
and not the main one (e.g. user clocked \*(L"Ok\*(R" in a modal dialog), or some
other combination: In these cases, a simple \f(CW\*(C`ev_break\*(C'\fR will not work.
.PP
The solution is to maintain \*(L"break this loop\*(R" variable for each \f(CW\*(C`ev_run\*(C'\fR
invocation, and use a loop around \f(CW\*(C`ev_run\*(C'\fR until the condition is
triggered, using \f(CW\*(C`EVRUN_ONCE\*(C'\fR:
.PP
.Vb 2
\&   // main loop
\&   int exit_main_loop = 0;
\&
\&   while (!exit_main_loop)
\&     ev_run (EV_DEFAULT_ EVRUN_ONCE);
\&
\&   // in a modal watcher
\&   int exit_nested_loop = 0;
\&
\&   while (!exit_nested_loop)
\&     ev_run (EV_A_ EVRUN_ONCE);
.Ve
.PP
To exit from any of these loops, just set the corresponding exit variable:
.PP
.Vb 2
\&   // exit modal loop
\&   exit_nested_loop = 1;
\&
\&   // exit main program, after modal loop is finished
\&   exit_main_loop = 1;
\&
\&   // exit both
\&   exit_main_loop = exit_nested_loop = 1;
.Ve
.SS "\s-1THREAD LOCKING EXAMPLE\s0"
.IX Subsection "THREAD LOCKING EXAMPLE"
Here is a fictitious example of how to run an event loop in a different
thread from where callbacks are being invoked and watchers are
created/added/removed.
.PP
For a real-world example, see the \f(CW\*(C`EV::Loop::Async\*(C'\fR perl module,
which uses exactly this technique (which is suited for many high-level
languages).
.PP
The example uses a pthread mutex to protect the loop data, a condition
variable to wait for callback invocations, an async watcher to notify the
event loop thread and an unspecified mechanism to wake up the main thread.
.PP
First, you need to associate some data with the event loop:
.PP
.Vb 6
\&   typedef struct {
\&     mutex_t lock; /* global loop lock */
\&     ev_async async_w;
\&     thread_t tid;
\&     cond_t invoke_cv;
\&   } userdata;
\&
\&   void prepare_loop (EV_P)
\&   {
\&      // for simplicity, we use a static userdata struct.
\&      static userdata u;
\&
\&      ev_async_init (&u\->async_w, async_cb);
\&      ev_async_start (EV_A_ &u\->async_w);
\&
\&      pthread_mutex_init (&u\->lock, 0);
\&      pthread_cond_init (&u\->invoke_cv, 0);
\&
\&      // now associate this with the loop
\&      ev_set_userdata (EV_A_ u);
\&      ev_set_invoke_pending_cb (EV_A_ l_invoke);
\&      ev_set_loop_release_cb (EV_A_ l_release, l_acquire);
\&
\&      // then create the thread running ev_run
\&      pthread_create (&u\->tid, 0, l_run, EV_A);
\&   }
.Ve
.PP
The callback for the \f(CW\*(C`ev_async\*(C'\fR watcher does nothing: the watcher is used
solely to wake up the event loop so it takes notice of any new watchers
that might have been added:
.PP
.Vb 5
\&   static void
\&   async_cb (EV_P_ ev_async *w, int revents)
\&   {
\&      // just used for the side effects
\&   }
.Ve
.PP
The \f(CW\*(C`l_release\*(C'\fR and \f(CW\*(C`l_acquire\*(C'\fR callbacks simply unlock/lock the mutex
protecting the loop data, respectively.
.PP
.Vb 6
\&   static void
\&   l_release (EV_P)
\&   {
\&     userdata *u = ev_userdata (EV_A);
\&     pthread_mutex_unlock (&u\->lock);
\&   }
\&
\&   static void
\&   l_acquire (EV_P)
\&   {
\&     userdata *u = ev_userdata (EV_A);
\&     pthread_mutex_lock (&u\->lock);
\&   }
.Ve
.PP
The event loop thread first acquires the mutex, and then jumps straight
into \f(CW\*(C`ev_run\*(C'\fR:
.PP
.Vb 4
\&   void *
\&   l_run (void *thr_arg)
\&   {
\&     struct ev_loop *loop = (struct ev_loop *)thr_arg;
\&
\&     l_acquire (EV_A);
\&     pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS, 0);
\&     ev_run (EV_A_ 0);
\&     l_release (EV_A);
\&
\&     return 0;
\&   }
.Ve
.PP
Instead of invoking all pending watchers, the \f(CW\*(C`l_invoke\*(C'\fR callback will
signal the main thread via some unspecified mechanism (signals? pipe
writes? \f(CW\*(C`Async::Interrupt\*(C'\fR?) and then waits until all pending watchers
have been called (in a while loop because a) spurious wakeups are possible
and b) skipping inter-thread-communication when there are no pending
watchers is very beneficial):
.PP
.Vb 4
\&   static void
\&   l_invoke (EV_P)
\&   {
\&     userdata *u = ev_userdata (EV_A);
\&
\&     while (ev_pending_count (EV_A))
\&       {
\&         wake_up_other_thread_in_some_magic_or_not_so_magic_way ();
\&         pthread_cond_wait (&u\->invoke_cv, &u\->lock);
\&       }
\&   }
.Ve
.PP
Now, whenever the main thread gets told to invoke pending watchers, it
will grab the lock, call \f(CW\*(C`ev_invoke_pending\*(C'\fR and then signal the loop
thread to continue:
.PP
.Vb 4
\&   static void
\&   real_invoke_pending (EV_P)
\&   {
\&     userdata *u = ev_userdata (EV_A);
\&
\&     pthread_mutex_lock (&u\->lock);
\&     ev_invoke_pending (EV_A);
\&     pthread_cond_signal (&u\->invoke_cv);
\&     pthread_mutex_unlock (&u\->lock);
\&   }
.Ve
.PP
Whenever you want to start/stop a watcher or do other modifications to an
event loop, you will now have to lock:
.PP
.Vb 2
\&   ev_timer timeout_watcher;
\&   userdata *u = ev_userdata (EV_A);
\&
\&   ev_timer_init (&timeout_watcher, timeout_cb, 5.5, 0.);
\&
\&   pthread_mutex_lock (&u\->lock);
\&   ev_timer_start (EV_A_ &timeout_watcher);
\&   ev_async_send (EV_A_ &u\->async_w);
\&   pthread_mutex_unlock (&u\->lock);
.Ve
.PP
Note that sending the \f(CW\*(C`ev_async\*(C'\fR watcher is required because otherwise
an event loop currently blocking in the kernel will have no knowledge
about the newly added timer. By waking up the loop it will pick up any new
watchers in the next event loop iteration.
.SS "\s-1THREADS, COROUTINES, CONTINUATIONS, QUEUES... INSTEAD OF CALLBACKS\s0"
.IX Subsection "THREADS, COROUTINES, CONTINUATIONS, QUEUES... INSTEAD OF CALLBACKS"
While the overhead of a callback that e.g. schedules a thread is small, it
is still an overhead. If you embed libev, and your main usage is with some
kind of threads or coroutines, you might want to customise libev so that
doesn't need callbacks anymore.
.PP
Imagine you have coroutines that you can switch to using a function
\&\f(CW\*(C`switch_to (coro)\*(C'\fR, that libev runs in a coroutine called \f(CW\*(C`libev_coro\*(C'\fR
and that due to some magic, the currently active coroutine is stored in a
global called \f(CW\*(C`current_coro\*(C'\fR. Then you can build your own \*(L"wait for libev
event\*(R" primitive by changing \f(CW\*(C`EV_CB_DECLARE\*(C'\fR and \f(CW\*(C`EV_CB_INVOKE\*(C'\fR (note
the differing \f(CW\*(C`;\*(C'\fR conventions):
.PP
.Vb 2
\&   #define EV_CB_DECLARE(type)   struct my_coro *cb;
\&   #define EV_CB_INVOKE(watcher) switch_to ((watcher)\->cb)
.Ve
.PP
That means instead of having a C callback function, you store the
coroutine to switch to in each watcher, and instead of having libev call
your callback, you instead have it switch to that coroutine.
.PP
A coroutine might now wait for an event with a function called
\&\f(CW\*(C`wait_for_event\*(C'\fR. (the watcher needs to be started, as always, but it doesn't
matter when, or whether the watcher is active or not when this function is
called):
.PP
.Vb 6
\&   void
\&   wait_for_event (ev_watcher *w)
\&   {
\&     ev_set_cb (w, current_coro);
\&     switch_to (libev_coro);
\&   }
.Ve
.PP
That basically suspends the coroutine inside \f(CW\*(C`wait_for_event\*(C'\fR and
continues the libev coroutine, which, when appropriate, switches back to
this or any other coroutine.
.PP
You can do similar tricks if you have, say, threads with an event queue \-
instead of storing a coroutine, you store the queue object and instead of
switching to a coroutine, you push the watcher onto the queue and notify
any waiters.
.PP
To embed libev, see \*(L"\s-1EMBEDDING\*(R"\s0, but in short, it's easiest to create two
files, \fImy_ev.h\fR and \fImy_ev.c\fR that include the respective libev files:
.PP
.Vb 4
\&   // my_ev.h
\&   #define EV_CB_DECLARE(type)   struct my_coro *cb;
\&   #define EV_CB_INVOKE(watcher) switch_to ((watcher)\->cb)
\&   #include "../libev/ev.h"
\&
\&   // my_ev.c
\&   #define EV_H "my_ev.h"
\&   #include "../libev/ev.c"
.Ve
.PP
And then use \fImy_ev.h\fR when you would normally use \fIev.h\fR, and compile
\&\fImy_ev.c\fR into your project. When properly specifying include paths, you
can even use \fIev.h\fR as header file name directly.
.SH "LIBEVENT EMULATION"
.IX Header "LIBEVENT EMULATION"
Libev offers a compatibility emulation layer for libevent. It cannot
emulate the internals of libevent, so here are some usage hints:
.IP "\(bu" 4
Only the libevent\-1.4.1\-beta \s-1API\s0 is being emulated.
.Sp
This was the newest libevent version available when libev was implemented,
and is still mostly unchanged in 2010.
.IP "\(bu" 4
Use it by including <event.h>, as usual.
.IP "\(bu" 4
The following members are fully supported: ev_base, ev_callback,
ev_arg, ev_fd, ev_res, ev_events.
.IP "\(bu" 4
Avoid using ev_flags and the EVLIST_*\-macros, while it is
maintained by libev, it does not work exactly the same way as in libevent (consider
it a private \s-1API\s0).
.IP "\(bu" 4
Priorities are not currently supported. Initialising priorities
will fail and all watchers will have the same priority, even though there
is an ev_pri field.
.IP "\(bu" 4
In libevent, the last base created gets the signals, in libev, the
base that registered the signal gets the signals.
.IP "\(bu" 4
Other members are not supported.
.IP "\(bu" 4
The libev emulation is \fInot\fR \s-1ABI\s0 compatible to libevent, you need
to use the libev header file and library.
.SH "\*(C+ SUPPORT"
.IX Header " SUPPORT"
.SS "C \s-1API\s0"
.IX Subsection "C API"
The normal C \s-1API\s0 should work fine when used from \*(C+: both ev.h and the
libev sources can be compiled as \*(C+. Therefore, code that uses the C \s-1API\s0
will work fine.
.PP
Proper exception specifications might have to be added to callbacks passed
to libev: exceptions may be thrown only from watcher callbacks, all
other callbacks (allocator, syserr, loop acquire/release and periodic
reschedule callbacks) must not throw exceptions, and might need a \f(CW\*(C`throw
()\*(C'\fR specification. If you have code that needs to be compiled as both C
and \*(C+ you can use the \f(CW\*(C`EV_THROW\*(C'\fR macro for this:
.PP
.Vb 6
\&   static void
\&   fatal_error (const char *msg) EV_THROW
\&   {
\&     perror (msg);
\&     abort ();
\&   }
\&
\&   ...
\&   ev_set_syserr_cb (fatal_error);
.Ve
.PP
The only \s-1API\s0 functions that can currently throw exceptions are \f(CW\*(C`ev_run\*(C'\fR,
\&\f(CW\*(C`ev_invoke\*(C'\fR, \f(CW\*(C`ev_invoke_pending\*(C'\fR and \f(CW\*(C`ev_loop_destroy\*(C'\fR (the latter
because it runs cleanup watchers).
.PP
Throwing exceptions in watcher callbacks is only supported if libev itself
is compiled with a \*(C+ compiler or your C and \*(C+ environments allow
throwing exceptions through C libraries (most do).
.SS "\*(C+ \s-1API\s0"
.IX Subsection " API"
Libev comes with some simplistic wrapper classes for \*(C+ that mainly allow
you to use some convenience methods to start/stop watchers and also change
the callback model to a model using method callbacks on objects.
.PP
To use it,
.PP
.Vb 1
\&   #include <ev++.h>
.Ve
.PP
This automatically includes \fIev.h\fR and puts all of its definitions (many
of them macros) into the global namespace. All \*(C+ specific things are
put into the \f(CW\*(C`ev\*(C'\fR namespace. It should support all the same embedding
options as \fIev.h\fR, most notably \f(CW\*(C`EV_MULTIPLICITY\*(C'\fR.
.PP
Care has been taken to keep the overhead low. The only data member the \*(C+
classes add (compared to plain C\-style watchers) is the event loop pointer
that the watcher is associated with (or no additional members at all if
you disable \f(CW\*(C`EV_MULTIPLICITY\*(C'\fR when embedding libev).
.PP
Currently, functions, static and non-static member functions and classes
with \f(CW\*(C`operator ()\*(C'\fR can be used as callbacks. Other types should be easy
to add as long as they only need one additional pointer for context. If
you need support for other types of functors please contact the author
(preferably after implementing it).
.PP
For all this to work, your \*(C+ compiler either has to use the same calling
conventions as your C compiler (for static member functions), or you have
to embed libev and compile libev itself as \*(C+.
.PP
Here is a list of things available in the \f(CW\*(C`ev\*(C'\fR namespace:
.ie n .IP """ev::READ"", ""ev::WRITE"" etc." 4
.el .IP "\f(CWev::READ\fR, \f(CWev::WRITE\fR etc." 4
.IX Item "ev::READ, ev::WRITE etc."
These are just enum values with the same values as the \f(CW\*(C`EV_READ\*(C'\fR etc.
macros from \fIev.h\fR.
.ie n .IP """ev::tstamp"", ""ev::now""" 4
.el .IP "\f(CWev::tstamp\fR, \f(CWev::now\fR" 4
.IX Item "ev::tstamp, ev::now"
Aliases to the same types/functions as with the \f(CW\*(C`ev_\*(C'\fR prefix.
.ie n .IP """ev::io"", ""ev::timer"", ""ev::periodic"", ""ev::idle"", ""ev::sig"" etc." 4
.el .IP "\f(CWev::io\fR, \f(CWev::timer\fR, \f(CWev::periodic\fR, \f(CWev::idle\fR, \f(CWev::sig\fR etc." 4
.IX Item "ev::io, ev::timer, ev::periodic, ev::idle, ev::sig etc."
For each \f(CW\*(C`ev_TYPE\*(C'\fR watcher in \fIev.h\fR there is a corresponding class of
the same name in the \f(CW\*(C`ev\*(C'\fR namespace, with the exception of \f(CW\*(C`ev_signal\*(C'\fR
which is called \f(CW\*(C`ev::sig\*(C'\fR to avoid clashes with the \f(CW\*(C`signal\*(C'\fR macro
defined by many implementations.
.Sp
All of those classes have these methods:
.RS 4
.IP "ev::TYPE::TYPE ()" 4
.IX Item "ev::TYPE::TYPE ()"
.PD 0
.IP "ev::TYPE::TYPE (loop)" 4
.IX Item "ev::TYPE::TYPE (loop)"
.IP "ev::TYPE::~TYPE" 4
.IX Item "ev::TYPE::~TYPE"
.PD
The constructor (optionally) takes an event loop to associate the watcher
with. If it is omitted, it will use \f(CW\*(C`EV_DEFAULT\*(C'\fR.
.Sp
The constructor calls \f(CW\*(C`ev_init\*(C'\fR for you, which means you have to call the
\&\f(CW\*(C`set\*(C'\fR method before starting it.
.Sp
It will not set a callback, however: You have to call the templated \f(CW\*(C`set\*(C'\fR
method to set a callback before you can start the watcher.
.Sp
(The reason why you have to use a method is a limitation in \*(C+ which does
not allow explicit template arguments for constructors).
.Sp
The destructor automatically stops the watcher if it is active.
.IP "w\->set<class, &class::method> (object *)" 4
.IX Item "w->set<class, &class::method> (object *)"
This method sets the callback method to call. The method has to have a
signature of \f(CW\*(C`void (*)(ev_TYPE &, int)\*(C'\fR, it receives the watcher as
first argument and the \f(CW\*(C`revents\*(C'\fR as second. The object must be given as
parameter and is stored in the \f(CW\*(C`data\*(C'\fR member of the watcher.
.Sp
This method synthesizes efficient thunking code to call your method from
the C callback that libev requires. If your compiler can inline your
callback (i.e. it is visible to it at the place of the \f(CW\*(C`set\*(C'\fR call and
your compiler is good :), then the method will be fully inlined into the
thunking function, making it as fast as a direct C callback.
.Sp
Example: simple class declaration and watcher initialisation
.Sp
.Vb 4
\&   struct myclass
\&   {
\&     void io_cb (ev::io &w, int revents) { }
\&   }
\&
\&   myclass obj;
\&   ev::io iow;
\&   iow.set <myclass, &myclass::io_cb> (&obj);
.Ve
.IP "w\->set (object *)" 4
.IX Item "w->set (object *)"
This is a variation of a method callback \- leaving out the method to call
will default the method to \f(CW\*(C`operator ()\*(C'\fR, which makes it possible to use
functor objects without having to manually specify the \f(CW\*(C`operator ()\*(C'\fR all
the time. Incidentally, you can then also leave out the template argument
list.
.Sp
The \f(CW\*(C`operator ()\*(C'\fR method prototype must be \f(CW\*(C`void operator ()(watcher &w,
int revents)\*(C'\fR.
.Sp
See the method\-\f(CW\*(C`set\*(C'\fR above for more details.
.Sp
Example: use a functor object as callback.
.Sp
.Vb 7
\&   struct myfunctor
\&   {
\&     void operator() (ev::io &w, int revents)
\&     {
\&       ...
\&     }
\&   }
\&
\&   myfunctor f;
\&
\&   ev::io w;
\&   w.set (&f);
.Ve
.IP "w\->set<function> (void *data = 0)" 4
.IX Item "w->set<function> (void *data = 0)"
Also sets a callback, but uses a static method or plain function as
callback. The optional \f(CW\*(C`data\*(C'\fR argument will be stored in the watcher's
\&\f(CW\*(C`data\*(C'\fR member and is free for you to use.
.Sp
The prototype of the \f(CW\*(C`function\*(C'\fR must be \f(CW\*(C`void (*)(ev::TYPE &w, int)\*(C'\fR.
.Sp
See the method\-\f(CW\*(C`set\*(C'\fR above for more details.
.Sp
Example: Use a plain function as callback.
.Sp
.Vb 2
\&   static void io_cb (ev::io &w, int revents) { }
\&   iow.set <io_cb> ();
.Ve
.IP "w\->set (loop)" 4
.IX Item "w->set (loop)"
Associates a different \f(CW\*(C`struct ev_loop\*(C'\fR with this watcher. You can only
do this when the watcher is inactive (and not pending either).
.IP "w\->set ([arguments])" 4
.IX Item "w->set ([arguments])"
Basically the same as \f(CW\*(C`ev_TYPE_set\*(C'\fR (except for \f(CW\*(C`ev::embed\*(C'\fR watchers>),
with the same arguments. Either this method or a suitable start method
must be called at least once. Unlike the C counterpart, an active watcher
gets automatically stopped and restarted when reconfiguring it with this
method.
.Sp
For \f(CW\*(C`ev::embed\*(C'\fR watchers this method is called \f(CW\*(C`set_embed\*(C'\fR, to avoid
clashing with the \f(CW\*(C`set (loop)\*(C'\fR method.
.IP "w\->start ()" 4
.IX Item "w->start ()"
Starts the watcher. Note that there is no \f(CW\*(C`loop\*(C'\fR argument, as the
constructor already stores the event loop.
.IP "w\->start ([arguments])" 4
.IX Item "w->start ([arguments])"
Instead of calling \f(CW\*(C`set\*(C'\fR and \f(CW\*(C`start\*(C'\fR methods separately, it is often
convenient to wrap them in one call. Uses the same type of arguments as
the configure \f(CW\*(C`set\*(C'\fR method of the watcher.
.IP "w\->stop ()" 4
.IX Item "w->stop ()"
Stops the watcher if it is active. Again, no \f(CW\*(C`loop\*(C'\fR argument.
.ie n .IP "w\->again () (""ev::timer"", ""ev::periodic"" only)" 4
.el .IP "w\->again () (\f(CWev::timer\fR, \f(CWev::periodic\fR only)" 4
.IX Item "w->again () (ev::timer, ev::periodic only)"
For \f(CW\*(C`ev::timer\*(C'\fR and \f(CW\*(C`ev::periodic\*(C'\fR, this invokes the corresponding
\&\f(CW\*(C`ev_TYPE_again\*(C'\fR function.
.ie n .IP "w\->sweep () (""ev::embed"" only)" 4
.el .IP "w\->sweep () (\f(CWev::embed\fR only)" 4
.IX Item "w->sweep () (ev::embed only)"
Invokes \f(CW\*(C`ev_embed_sweep\*(C'\fR.
.ie n .IP "w\->update () (""ev::stat"" only)" 4
.el .IP "w\->update () (\f(CWev::stat\fR only)" 4
.IX Item "w->update () (ev::stat only)"
Invokes \f(CW\*(C`ev_stat_stat\*(C'\fR.
.RE
.RS 4
.RE
.PP
Example: Define a class with two I/O and idle watchers, start the I/O
watchers in the constructor.
.PP
.Vb 5
\&   class myclass
\&   {
\&     ev::io   io  ; void io_cb   (ev::io   &w, int revents);
\&     ev::io   io2 ; void io2_cb  (ev::io   &w, int revents);
\&     ev::idle idle; void idle_cb (ev::idle &w, int revents);
\&
\&     myclass (int fd)
\&     {
\&       io  .set <myclass, &myclass::io_cb  > (this);
\&       io2 .set <myclass, &myclass::io2_cb > (this);
\&       idle.set <myclass, &myclass::idle_cb> (this);
\&
\&       io.set (fd, ev::WRITE); // configure the watcher
\&       io.start ();            // start it whenever convenient
\&
\&       io2.start (fd, ev::READ); // set + start in one call
\&     }
\&   };
.Ve
.SH "OTHER LANGUAGE BINDINGS"
.IX Header "OTHER LANGUAGE BINDINGS"
Libev does not offer other language bindings itself, but bindings for a
number of languages exist in the form of third-party packages. If you know
any interesting language binding in addition to the ones listed here, drop
me a note.
.IP "Perl" 4
.IX Item "Perl"
The \s-1EV\s0 module implements the full libev \s-1API\s0 and is actually used to test
libev. \s-1EV\s0 is developed together with libev. Apart from the \s-1EV\s0 core module,
there are additional modules that implement libev-compatible interfaces
to \f(CW\*(C`libadns\*(C'\fR (\f(CW\*(C`EV::ADNS\*(C'\fR, but \f(CW\*(C`AnyEvent::DNS\*(C'\fR is preferred nowadays),
\&\f(CW\*(C`Net::SNMP\*(C'\fR (\f(CW\*(C`Net::SNMP::EV\*(C'\fR) and the \f(CW\*(C`libglib\*(C'\fR event core (\f(CW\*(C`Glib::EV\*(C'\fR
and \f(CW\*(C`EV::Glib\*(C'\fR).
.Sp
It can be found and installed via \s-1CPAN,\s0 its homepage is at
<http://software.schmorp.de/pkg/EV>.
.IP "Python" 4
.IX Item "Python"
Python bindings can be found at <http://code.google.com/p/pyev/>. It
seems to be quite complete and well-documented.
.IP "Ruby" 4
.IX Item "Ruby"
Tony Arcieri has written a ruby extension that offers access to a subset
of the libev \s-1API\s0 and adds file handle abstractions, asynchronous \s-1DNS\s0 and
more on top of it. It can be found via gem servers. Its homepage is at
<http://rev.rubyforge.org/>.
.Sp
Roger Pack reports that using the link order \f(CW\*(C`\-lws2_32 \-lmsvcrt\-ruby\-190\*(C'\fR
makes rev work even on mingw.
.IP "Haskell" 4
.IX Item "Haskell"
A haskell binding to libev is available at
<http://hackage.haskell.org/cgi\-bin/hackage\-scripts/package/hlibev>.
.IP "D" 4
.IX Item "D"
Leandro Lucarella has written a D language binding (\fIev.d\fR) for libev, to
be found at <http://www.llucax.com.ar/proj/ev.d/index.html>.
.IP "Ocaml" 4
.IX Item "Ocaml"
Erkki Seppala has written Ocaml bindings for libev, to be found at
<http://modeemi.cs.tut.fi/~flux/software/ocaml\-ev/>.
.IP "Lua" 4
.IX Item "Lua"
Brian Maher has written a partial interface to libev for lua (at the
time of this writing, only \f(CW\*(C`ev_io\*(C'\fR and \f(CW\*(C`ev_timer\*(C'\fR), to be found at
<http://github.com/brimworks/lua\-ev>.
.IP "Javascript" 4
.IX Item "Javascript"
Node.js (<http://nodejs.org>) uses libev as the underlying event library.
.IP "Others" 4
.IX Item "Others"
There are others, and I stopped counting.
.SH "MACRO MAGIC"
.IX Header "MACRO MAGIC"
Libev can be compiled with a variety of options, the most fundamental
of which is \f(CW\*(C`EV_MULTIPLICITY\*(C'\fR. This option determines whether (most)
functions and callbacks have an initial \f(CW\*(C`struct ev_loop *\*(C'\fR argument.
.PP
To make it easier to write programs that cope with either variant, the
following macros are defined:
.ie n .IP """EV_A"", ""EV_A_""" 4
.el .IP "\f(CWEV_A\fR, \f(CWEV_A_\fR" 4
.IX Item "EV_A, EV_A_"
This provides the loop \fIargument\fR for functions, if one is required (\*(L"ev
loop argument\*(R"). The \f(CW\*(C`EV_A\*(C'\fR form is used when this is the sole argument,
\&\f(CW\*(C`EV_A_\*(C'\fR is used when other arguments are following. Example:
.Sp
.Vb 3
\&   ev_unref (EV_A);
\&   ev_timer_add (EV_A_ watcher);
\&   ev_run (EV_A_ 0);
.Ve
.Sp
It assumes the variable \f(CW\*(C`loop\*(C'\fR of type \f(CW\*(C`struct ev_loop *\*(C'\fR is in scope,
which is often provided by the following macro.
.ie n .IP """EV_P"", ""EV_P_""" 4
.el .IP "\f(CWEV_P\fR, \f(CWEV_P_\fR" 4
.IX Item "EV_P, EV_P_"
This provides the loop \fIparameter\fR for functions, if one is required (\*(L"ev
loop parameter\*(R"). The \f(CW\*(C`EV_P\*(C'\fR form is used when this is the sole parameter,
\&\f(CW\*(C`EV_P_\*(C'\fR is used when other parameters are following. Example:
.Sp
.Vb 2
\&   // this is how ev_unref is being declared
\&   static void ev_unref (EV_P);
\&
\&   // this is how you can declare your typical callback
\&   static void cb (EV_P_ ev_timer *w, int revents)
.Ve
.Sp
It declares a parameter \f(CW\*(C`loop\*(C'\fR of type \f(CW\*(C`struct ev_loop *\*(C'\fR, quite
suitable for use with \f(CW\*(C`EV_A\*(C'\fR.
.ie n .IP """EV_DEFAULT"", ""EV_DEFAULT_""" 4
.el .IP "\f(CWEV_DEFAULT\fR, \f(CWEV_DEFAULT_\fR" 4
.IX Item "EV_DEFAULT, EV_DEFAULT_"
Similar to the other two macros, this gives you the value of the default
loop, if multiple loops are supported (\*(L"ev loop default\*(R"). The default loop
will be initialised if it isn't already initialised.
.Sp
For non-multiplicity builds, these macros do nothing, so you always have
to initialise the loop somewhere.
.ie n .IP """EV_DEFAULT_UC"", ""EV_DEFAULT_UC_""" 4
.el .IP "\f(CWEV_DEFAULT_UC\fR, \f(CWEV_DEFAULT_UC_\fR" 4
.IX Item "EV_DEFAULT_UC, EV_DEFAULT_UC_"
Usage identical to \f(CW\*(C`EV_DEFAULT\*(C'\fR and \f(CW\*(C`EV_DEFAULT_\*(C'\fR, but requires that the
default loop has been initialised (\f(CW\*(C`UC\*(C'\fR == unchecked). Their behaviour
is undefined when the default loop has not been initialised by a previous
execution of \f(CW\*(C`EV_DEFAULT\*(C'\fR, \f(CW\*(C`EV_DEFAULT_\*(C'\fR or \f(CW\*(C`ev_default_init (...)\*(C'\fR.
.Sp
It is often prudent to use \f(CW\*(C`EV_DEFAULT\*(C'\fR when initialising the first
watcher in a function but use \f(CW\*(C`EV_DEFAULT_UC\*(C'\fR afterwards.
.PP
Example: Declare and initialise a check watcher, utilising the above
macros so it will work regardless of whether multiple loops are supported
or not.
.PP
.Vb 5
\&   static void
\&   check_cb (EV_P_ ev_timer *w, int revents)
\&   {
\&     ev_check_stop (EV_A_ w);
\&   }
\&
\&   ev_check check;
\&   ev_check_init (&check, check_cb);
\&   ev_check_start (EV_DEFAULT_ &check);
\&   ev_run (EV_DEFAULT_ 0);
.Ve
.SH "EMBEDDING"
.IX Header "EMBEDDING"
Libev can (and often is) directly embedded into host
applications. Examples of applications that embed it include the Deliantra
Game Server, the \s-1EV\s0 perl module, the \s-1GNU\s0 Virtual Private Ethernet (gvpe)
and rxvt-unicode.
.PP
The goal is to enable you to just copy the necessary files into your
source directory without having to change even a single line in them, so
you can easily upgrade by simply copying (or having a checked-out copy of
libev somewhere in your source tree).
.SS "\s-1FILESETS\s0"
.IX Subsection "FILESETS"
Depending on what features you need you need to include one or more sets of files
in your application.
.PP
\fI\s-1CORE EVENT LOOP\s0\fR
.IX Subsection "CORE EVENT LOOP"
.PP
To include only the libev core (all the \f(CW\*(C`ev_*\*(C'\fR functions), with manual
configuration (no autoconf):
.PP
.Vb 2
\&   #define EV_STANDALONE 1
\&   #include "ev.c"
.Ve
.PP
This will automatically include \fIev.h\fR, too, and should be done in a
single C source file only to provide the function implementations. To use
it, do the same for \fIev.h\fR in all files wishing to use this \s-1API \s0(best
done by writing a wrapper around \fIev.h\fR that you can include instead and
where you can put other configuration options):
.PP
.Vb 2
\&   #define EV_STANDALONE 1
\&   #include "ev.h"
.Ve
.PP
Both header files and implementation files can be compiled with a \*(C+
compiler (at least, that's a stated goal, and breakage will be treated
as a bug).
.PP
You need the following files in your source tree, or in a directory
in your include path (e.g. in libev/ when using \-Ilibev):
.PP
.Vb 4
\&   ev.h
\&   ev.c
\&   ev_vars.h
\&   ev_wrap.h
\&
\&   ev_win32.c      required on win32 platforms only
\&
\&   ev_select.c     only when select backend is enabled
\&   ev_poll.c       only when poll backend is enabled
\&   ev_epoll.c      only when the epoll backend is enabled
\&   ev_kqueue.c     only when the kqueue backend is enabled
\&   ev_port.c       only when the solaris port backend is enabled
.Ve
.PP
\&\fIev.c\fR includes the backend files directly when enabled, so you only need
to compile this single file.
.PP
\fI\s-1LIBEVENT COMPATIBILITY API\s0\fR
.IX Subsection "LIBEVENT COMPATIBILITY API"
.PP
To include the libevent compatibility \s-1API,\s0 also include:
.PP
.Vb 1
\&   #include "event.c"
.Ve
.PP
in the file including \fIev.c\fR, and:
.PP
.Vb 1
\&   #include "event.h"
.Ve
.PP
in the files that want to use the libevent \s-1API.\s0 This also includes \fIev.h\fR.
.PP
You need the following additional files for this:
.PP
.Vb 2
\&   event.h
\&   event.c
.Ve
.PP
\fI\s-1AUTOCONF SUPPORT\s0\fR
.IX Subsection "AUTOCONF SUPPORT"
.PP
Instead of using \f(CW\*(C`EV_STANDALONE=1\*(C'\fR and providing your configuration in
whatever way you want, you can also \f(CW\*(C`m4_include([libev.m4])\*(C'\fR in your
\&\fIconfigure.ac\fR and leave \f(CW\*(C`EV_STANDALONE\*(C'\fR undefined. \fIev.c\fR will then
include \fIconfig.h\fR and configure itself accordingly.
.PP
For this of course you need the m4 file:
.PP
.Vb 1
\&   libev.m4
.Ve
.SS "\s-1PREPROCESSOR SYMBOLS/MACROS\s0"
.IX Subsection "PREPROCESSOR SYMBOLS/MACROS"
Libev can be configured via a variety of preprocessor symbols you have to
define before including (or compiling) any of its files. The default in
the absence of autoconf is documented for every option.
.PP
Symbols marked with \*(L"(h)\*(R" do not change the \s-1ABI,\s0 and can have different
values when compiling libev vs. including \fIev.h\fR, so it is permissible
to redefine them before including \fIev.h\fR without breaking compatibility
to a compiled library. All other symbols change the \s-1ABI,\s0 which means all
users of libev and the libev code itself must be compiled with compatible
settings.
.IP "\s-1EV_COMPAT3 \s0(h)" 4
.IX Item "EV_COMPAT3 (h)"
Backwards compatibility is a major concern for libev. This is why this
release of libev comes with wrappers for the functions and symbols that
have been renamed between libev version 3 and 4.
.Sp
You can disable these wrappers (to test compatibility with future
versions) by defining \f(CW\*(C`EV_COMPAT3\*(C'\fR to \f(CW0\fR when compiling your
sources. This has the additional advantage that you can drop the \f(CW\*(C`struct\*(C'\fR
from \f(CW\*(C`struct ev_loop\*(C'\fR declarations, as libev will provide an \f(CW\*(C`ev_loop\*(C'\fR
typedef in that case.
.Sp
In some future version, the default for \f(CW\*(C`EV_COMPAT3\*(C'\fR will become \f(CW0\fR,
and in some even more future version the compatibility code will be
removed completely.
.IP "\s-1EV_STANDALONE \s0(h)" 4
.IX Item "EV_STANDALONE (h)"
Must always be \f(CW1\fR if you do not use autoconf configuration, which
keeps libev from including \fIconfig.h\fR, and it also defines dummy
implementations for some libevent functions (such as logging, which is not
supported). It will also not define any of the structs usually found in
\&\fIevent.h\fR that are not directly supported by the libev core alone.
.Sp
In standalone mode, libev will still try to automatically deduce the
configuration, but has to be more conservative.
.IP "\s-1EV_USE_FLOOR\s0" 4
.IX Item "EV_USE_FLOOR"
If defined to be \f(CW1\fR, libev will use the \f(CW\*(C`floor ()\*(C'\fR function for its
periodic reschedule calculations, otherwise libev will fall back on a
portable (slower) implementation. If you enable this, you usually have to
link against libm or something equivalent. Enabling this when the \f(CW\*(C`floor\*(C'\fR
function is not available will fail, so the safe default is to not enable
this.
.IP "\s-1EV_USE_MONOTONIC\s0" 4
.IX Item "EV_USE_MONOTONIC"
If defined to be \f(CW1\fR, libev will try to detect the availability of the
monotonic clock option at both compile time and runtime. Otherwise no
use of the monotonic clock option will be attempted. If you enable this,
you usually have to link against librt or something similar. Enabling it
when the functionality isn't available is safe, though, although you have
to make sure you link against any libraries where the \f(CW\*(C`clock_gettime\*(C'\fR
function is hiding in (often \fI\-lrt\fR). See also \f(CW\*(C`EV_USE_CLOCK_SYSCALL\*(C'\fR.
.IP "\s-1EV_USE_REALTIME\s0" 4
.IX Item "EV_USE_REALTIME"
If defined to be \f(CW1\fR, libev will try to detect the availability of the
real-time clock option at compile time (and assume its availability
at runtime if successful). Otherwise no use of the real-time clock
option will be attempted. This effectively replaces \f(CW\*(C`gettimeofday\*(C'\fR
by \f(CW\*(C`clock_get (CLOCK_REALTIME, ...)\*(C'\fR and will not normally affect
correctness. See the note about libraries in the description of
\&\f(CW\*(C`EV_USE_MONOTONIC\*(C'\fR, though. Defaults to the opposite value of
\&\f(CW\*(C`EV_USE_CLOCK_SYSCALL\*(C'\fR.
.IP "\s-1EV_USE_CLOCK_SYSCALL\s0" 4
.IX Item "EV_USE_CLOCK_SYSCALL"
If defined to be \f(CW1\fR, libev will try to use a direct syscall instead
of calling the system-provided \f(CW\*(C`clock_gettime\*(C'\fR function. This option
exists because on GNU/Linux, \f(CW\*(C`clock_gettime\*(C'\fR is in \f(CW\*(C`librt\*(C'\fR, but \f(CW\*(C`librt\*(C'\fR
unconditionally pulls in \f(CW\*(C`libpthread\*(C'\fR, slowing down single-threaded
programs needlessly. Using a direct syscall is slightly slower (in
theory), because no optimised vdso implementation can be used, but avoids
the pthread dependency. Defaults to \f(CW1\fR on GNU/Linux with glibc 2.x or
higher, as it simplifies linking (no need for \f(CW\*(C`\-lrt\*(C'\fR).
.IP "\s-1EV_USE_NANOSLEEP\s0" 4
.IX Item "EV_USE_NANOSLEEP"
If defined to be \f(CW1\fR, libev will assume that \f(CW\*(C`nanosleep ()\*(C'\fR is available
and will use it for delays. Otherwise it will use \f(CW\*(C`select ()\*(C'\fR.
.IP "\s-1EV_USE_EVENTFD\s0" 4
.IX Item "EV_USE_EVENTFD"
If defined to be \f(CW1\fR, then libev will assume that \f(CW\*(C`eventfd ()\*(C'\fR is
available and will probe for kernel support at runtime. This will improve
\&\f(CW\*(C`ev_signal\*(C'\fR and \f(CW\*(C`ev_async\*(C'\fR performance and reduce resource consumption.
If undefined, it will be enabled if the headers indicate GNU/Linux + Glibc
2.7 or newer, otherwise disabled.
.IP "\s-1EV_USE_SELECT\s0" 4
.IX Item "EV_USE_SELECT"
If undefined or defined to be \f(CW1\fR, libev will compile in support for the
\&\f(CW\*(C`select\*(C'\fR(2) backend. No attempt at auto-detection will be done: if no
other method takes over, select will be it. Otherwise the select backend
will not be compiled in.
.IP "\s-1EV_SELECT_USE_FD_SET\s0" 4
.IX Item "EV_SELECT_USE_FD_SET"
If defined to \f(CW1\fR, then the select backend will use the system \f(CW\*(C`fd_set\*(C'\fR
structure. This is useful if libev doesn't compile due to a missing
\&\f(CW\*(C`NFDBITS\*(C'\fR or \f(CW\*(C`fd_mask\*(C'\fR definition or it mis-guesses the bitset layout
on exotic systems. This usually limits the range of file descriptors to
some low limit such as 1024 or might have other limitations (winsocket
only allows 64 sockets). The \f(CW\*(C`FD_SETSIZE\*(C'\fR macro, set before compilation,
configures the maximum size of the \f(CW\*(C`fd_set\*(C'\fR.
.IP "\s-1EV_SELECT_IS_WINSOCKET\s0" 4
.IX Item "EV_SELECT_IS_WINSOCKET"
When defined to \f(CW1\fR, the select backend will assume that
select/socket/connect etc. don't understand file descriptors but
wants osf handles on win32 (this is the case when the select to
be used is the winsock select). This means that it will call
\&\f(CW\*(C`_get_osfhandle\*(C'\fR on the fd to convert it to an \s-1OS\s0 handle. Otherwise,
it is assumed that all these functions actually work on fds, even
on win32. Should not be defined on non\-win32 platforms.
.IP "\s-1EV_FD_TO_WIN32_HANDLE\s0(fd)" 4
.IX Item "EV_FD_TO_WIN32_HANDLE(fd)"
If \f(CW\*(C`EV_SELECT_IS_WINSOCKET\*(C'\fR is enabled, then libev needs a way to map
file descriptors to socket handles. When not defining this symbol (the
default), then libev will call \f(CW\*(C`_get_osfhandle\*(C'\fR, which is usually
correct. In some cases, programs use their own file descriptor management,
in which case they can provide this function to map fds to socket handles.
.IP "\s-1EV_WIN32_HANDLE_TO_FD\s0(handle)" 4
.IX Item "EV_WIN32_HANDLE_TO_FD(handle)"
If \f(CW\*(C`EV_SELECT_IS_WINSOCKET\*(C'\fR then libev maps handles to file descriptors
using the standard \f(CW\*(C`_open_osfhandle\*(C'\fR function. For programs implementing
their own fd to handle mapping, overwriting this function makes it easier
to do so. This can be done by defining this macro to an appropriate value.
.IP "\s-1EV_WIN32_CLOSE_FD\s0(fd)" 4
.IX Item "EV_WIN32_CLOSE_FD(fd)"
If programs implement their own fd to handle mapping on win32, then this
macro can be used to override the \f(CW\*(C`close\*(C'\fR function, useful to unregister
file descriptors again. Note that the replacement function has to close
the underlying \s-1OS\s0 handle.
.IP "\s-1EV_USE_WSASOCKET\s0" 4
.IX Item "EV_USE_WSASOCKET"
If defined to be \f(CW1\fR, libev will use \f(CW\*(C`WSASocket\*(C'\fR to create its internal
communication socket, which works better in some environments. Otherwise,
the normal \f(CW\*(C`socket\*(C'\fR function will be used, which works better in other
environments.
.IP "\s-1EV_USE_POLL\s0" 4
.IX Item "EV_USE_POLL"
If defined to be \f(CW1\fR, libev will compile in support for the \f(CW\*(C`poll\*(C'\fR(2)
backend. Otherwise it will be enabled on non\-win32 platforms. It
takes precedence over select.
.IP "\s-1EV_USE_EPOLL\s0" 4
.IX Item "EV_USE_EPOLL"
If defined to be \f(CW1\fR, libev will compile in support for the Linux
\&\f(CW\*(C`epoll\*(C'\fR(7) backend. Its availability will be detected at runtime,
otherwise another method will be used as fallback. This is the preferred
backend for GNU/Linux systems. If undefined, it will be enabled if the
headers indicate GNU/Linux + Glibc 2.4 or newer, otherwise disabled.
.IP "\s-1EV_USE_KQUEUE\s0" 4
.IX Item "EV_USE_KQUEUE"
If defined to be \f(CW1\fR, libev will compile in support for the \s-1BSD\s0 style
\&\f(CW\*(C`kqueue\*(C'\fR(2) backend. Its actual availability will be detected at runtime,
otherwise another method will be used as fallback. This is the preferred
backend for \s-1BSD\s0 and BSD-like systems, although on most BSDs kqueue only
supports some types of fds correctly (the only platform we found that
supports ptys for example was NetBSD), so kqueue might be compiled in, but
not be used unless explicitly requested. The best way to use it is to find
out whether kqueue supports your type of fd properly and use an embedded
kqueue loop.
.IP "\s-1EV_USE_PORT\s0" 4
.IX Item "EV_USE_PORT"
If defined to be \f(CW1\fR, libev will compile in support for the Solaris
10 port style backend. Its availability will be detected at runtime,
otherwise another method will be used as fallback. This is the preferred
backend for Solaris 10 systems.
.IP "\s-1EV_USE_DEVPOLL\s0" 4
.IX Item "EV_USE_DEVPOLL"
Reserved for future expansion, works like the \s-1USE\s0 symbols above.
.IP "\s-1EV_USE_INOTIFY\s0" 4
.IX Item "EV_USE_INOTIFY"
If defined to be \f(CW1\fR, libev will compile in support for the Linux inotify
interface to speed up \f(CW\*(C`ev_stat\*(C'\fR watchers. Its actual availability will
be detected at runtime. If undefined, it will be enabled if the headers
indicate GNU/Linux + Glibc 2.4 or newer, otherwise disabled.
.IP "\s-1EV_NO_SMP\s0" 4
.IX Item "EV_NO_SMP"
If defined to be \f(CW1\fR, libev will assume that memory is always coherent
between threads, that is, threads can be used, but threads never run on
different cpus (or different cpu cores). This reduces dependencies
and makes libev faster.
.IP "\s-1EV_NO_THREADS\s0" 4
.IX Item "EV_NO_THREADS"
If defined to be \f(CW1\fR, libev will assume that it will never be called from
different threads (that includes signal handlers), which is a stronger
assumption than \f(CW\*(C`EV_NO_SMP\*(C'\fR, above. This reduces dependencies and makes
libev faster.
.IP "\s-1EV_ATOMIC_T\s0" 4
.IX Item "EV_ATOMIC_T"
Libev requires an integer type (suitable for storing \f(CW0\fR or \f(CW1\fR) whose
access is atomic with respect to other threads or signal contexts. No
such type is easily found in the C language, so you can provide your own
type that you know is safe for your purposes. It is used both for signal
handler \*(L"locking\*(R" as well as for signal and thread safety in \f(CW\*(C`ev_async\*(C'\fR
watchers.
.Sp
In the absence of this define, libev will use \f(CW\*(C`sig_atomic_t volatile\*(C'\fR
(from \fIsignal.h\fR), which is usually good enough on most platforms.
.IP "\s-1EV_H \s0(h)" 4
.IX Item "EV_H (h)"
The name of the \fIev.h\fR header file used to include it. The default if
undefined is \f(CW"ev.h"\fR in \fIevent.h\fR, \fIev.c\fR and \fIev++.h\fR. This can be
used to virtually rename the \fIev.h\fR header file in case of conflicts.
.IP "\s-1EV_CONFIG_H \s0(h)" 4
.IX Item "EV_CONFIG_H (h)"
If \f(CW\*(C`EV_STANDALONE\*(C'\fR isn't \f(CW1\fR, this variable can be used to override
\&\fIev.c\fR's idea of where to find the \fIconfig.h\fR file, similarly to
\&\f(CW\*(C`EV_H\*(C'\fR, above.
.IP "\s-1EV_EVENT_H \s0(h)" 4
.IX Item "EV_EVENT_H (h)"
Similarly to \f(CW\*(C`EV_H\*(C'\fR, this macro can be used to override \fIevent.c\fR's idea
of how the \fIevent.h\fR header can be found, the default is \f(CW"event.h"\fR.
.IP "\s-1EV_PROTOTYPES \s0(h)" 4
.IX Item "EV_PROTOTYPES (h)"
If defined to be \f(CW0\fR, then \fIev.h\fR will not define any function
prototypes, but still define all the structs and other symbols. This is
occasionally useful if you want to provide your own wrapper functions
around libev functions.
.IP "\s-1EV_MULTIPLICITY\s0" 4
.IX Item "EV_MULTIPLICITY"
If undefined or defined to \f(CW1\fR, then all event-loop-specific functions
will have the \f(CW\*(C`struct ev_loop *\*(C'\fR as first argument, and you can create
additional independent event loops. Otherwise there will be no support
for multiple event loops and there is no first event loop pointer
argument. Instead, all functions act on the single default loop.
.Sp
Note that \f(CW\*(C`EV_DEFAULT\*(C'\fR and \f(CW\*(C`EV_DEFAULT_\*(C'\fR will no longer provide a
default loop when multiplicity is switched off \- you always have to
initialise the loop manually in this case.
.IP "\s-1EV_MINPRI\s0" 4
.IX Item "EV_MINPRI"
.PD 0
.IP "\s-1EV_MAXPRI\s0" 4
.IX Item "EV_MAXPRI"
.PD
The range of allowed priorities. \f(CW\*(C`EV_MINPRI\*(C'\fR must be smaller or equal to
\&\f(CW\*(C`EV_MAXPRI\*(C'\fR, but otherwise there are no non-obvious limitations. You can
provide for more priorities by overriding those symbols (usually defined
to be \f(CW\*(C`\-2\*(C'\fR and \f(CW2\fR, respectively).
.Sp
When doing priority-based operations, libev usually has to linearly search
all the priorities, so having many of them (hundreds) uses a lot of space
and time, so using the defaults of five priorities (\-2 .. +2) is usually
fine.
.Sp
If your embedding application does not need any priorities, defining these
both to \f(CW0\fR will save some memory and \s-1CPU.\s0
.IP "\s-1EV_PERIODIC_ENABLE, EV_IDLE_ENABLE, EV_EMBED_ENABLE, EV_STAT_ENABLE, EV_PREPARE_ENABLE, EV_CHECK_ENABLE, EV_FORK_ENABLE, EV_SIGNAL_ENABLE, EV_ASYNC_ENABLE, EV_CHILD_ENABLE.\s0" 4
.IX Item "EV_PERIODIC_ENABLE, EV_IDLE_ENABLE, EV_EMBED_ENABLE, EV_STAT_ENABLE, EV_PREPARE_ENABLE, EV_CHECK_ENABLE, EV_FORK_ENABLE, EV_SIGNAL_ENABLE, EV_ASYNC_ENABLE, EV_CHILD_ENABLE."
If undefined or defined to be \f(CW1\fR (and the platform supports it), then
the respective watcher type is supported. If defined to be \f(CW0\fR, then it
is not. Disabling watcher types mainly saves code size.
.IP "\s-1EV_FEATURES\s0" 4
.IX Item "EV_FEATURES"
If you need to shave off some kilobytes of code at the expense of some
speed (but with the full \s-1API\s0), you can define this symbol to request
certain subsets of functionality. The default is to enable all features
that can be enabled on the platform.
.Sp
A typical way to use this symbol is to define it to \f(CW0\fR (or to a bitset
with some broad features you want) and then selectively re-enable
additional parts you want, for example if you want everything minimal,
but multiple event loop support, async and child watchers and the poll
backend, use this:
.Sp
.Vb 5
\&   #define EV_FEATURES 0
\&   #define EV_MULTIPLICITY 1
\&   #define EV_USE_POLL 1
\&   #define EV_CHILD_ENABLE 1
\&   #define EV_ASYNC_ENABLE 1
.Ve
.Sp
The actual value is a bitset, it can be a combination of the following
values (by default, all of these are enabled):
.RS 4
.ie n .IP "1 \- faster/larger code" 4
.el .IP "\f(CW1\fR \- faster/larger code" 4
.IX Item "1 - faster/larger code"
Use larger code to speed up some operations.
.Sp
Currently this is used to override some inlining decisions (enlarging the
code size by roughly 30% on amd64).
.Sp
When optimising for size, use of compiler flags such as \f(CW\*(C`\-Os\*(C'\fR with
gcc is recommended, as well as \f(CW\*(C`\-DNDEBUG\*(C'\fR, as libev contains a number of
assertions.
.Sp
The default is off when \f(CW\*(C`_\|_OPTIMIZE_SIZE_\|_\*(C'\fR is defined by your compiler
(e.g. gcc with \f(CW\*(C`\-Os\*(C'\fR).
.ie n .IP "2 \- faster/larger data structures" 4
.el .IP "\f(CW2\fR \- faster/larger data structures" 4
.IX Item "2 - faster/larger data structures"
Replaces the small 2\-heap for timer management by a faster 4\-heap, larger
hash table sizes and so on. This will usually further increase code size
and can additionally have an effect on the size of data structures at
runtime.
.Sp
The default is off when \f(CW\*(C`_\|_OPTIMIZE_SIZE_\|_\*(C'\fR is defined by your compiler
(e.g. gcc with \f(CW\*(C`\-Os\*(C'\fR).
.ie n .IP "4 \- full \s-1API\s0 configuration" 4
.el .IP "\f(CW4\fR \- full \s-1API\s0 configuration" 4
.IX Item "4 - full API configuration"
This enables priorities (sets \f(CW\*(C`EV_MAXPRI\*(C'\fR=2 and \f(CW\*(C`EV_MINPRI\*(C'\fR=\-2), and
enables multiplicity (\f(CW\*(C`EV_MULTIPLICITY\*(C'\fR=1).
.ie n .IP "8 \- full \s-1API\s0" 4
.el .IP "\f(CW8\fR \- full \s-1API\s0" 4
.IX Item "8 - full API"
This enables a lot of the \*(L"lesser used\*(R" \s-1API\s0 functions. See \f(CW\*(C`ev.h\*(C'\fR for
details on which parts of the \s-1API\s0 are still available without this
feature, and do not complain if this subset changes over time.
.ie n .IP "16 \- enable all optional watcher types" 4
.el .IP "\f(CW16\fR \- enable all optional watcher types" 4
.IX Item "16 - enable all optional watcher types"
Enables all optional watcher types.  If you want to selectively enable
only some watcher types other than I/O and timers (e.g. prepare,
embed, async, child...) you can enable them manually by defining
\&\f(CW\*(C`EV_watchertype_ENABLE\*(C'\fR to \f(CW1\fR instead.
.ie n .IP "32 \- enable all backends" 4
.el .IP "\f(CW32\fR \- enable all backends" 4
.IX Item "32 - enable all backends"
This enables all backends \- without this feature, you need to enable at
least one backend manually (\f(CW\*(C`EV_USE_SELECT\*(C'\fR is a good choice).
.ie n .IP "64 \- enable OS-specific ""helper"" APIs" 4
.el .IP "\f(CW64\fR \- enable OS-specific ``helper'' APIs" 4
.IX Item "64 - enable OS-specific helper APIs"
Enable inotify, eventfd, signalfd and similar OS-specific helper APIs by
default.
.RE
.RS 4
.Sp
Compiling with \f(CW\*(C`gcc \-Os \-DEV_STANDALONE \-DEV_USE_EPOLL=1 \-DEV_FEATURES=0\*(C'\fR
reduces the compiled size of libev from 24.7Kb code/2.8Kb data to 6.5Kb
code/0.3Kb data on my GNU/Linux amd64 system, while still giving you I/O
watchers, timers and monotonic clock support.
.Sp
With an intelligent-enough linker (gcc+binutils are intelligent enough
when you use \f(CW\*(C`\-Wl,\-\-gc\-sections \-ffunction\-sections\*(C'\fR) functions unused by
your program might be left out as well \- a binary starting a timer and an
I/O watcher then might come out at only 5Kb.
.RE
.IP "\s-1EV_API_STATIC\s0" 4
.IX Item "EV_API_STATIC"
If this symbol is defined (by default it is not), then all identifiers
will have static linkage. This means that libev will not export any
identifiers, and you cannot link against libev anymore. This can be useful
when you embed libev, only want to use libev functions in a single file,
and do not want its identifiers to be visible.
.Sp
To use this, define \f(CW\*(C`EV_API_STATIC\*(C'\fR and include \fIev.c\fR in the file that
wants to use libev.
.Sp
This option only works when libev is compiled with a C compiler, as \*(C+
doesn't support the required declaration syntax.
.IP "\s-1EV_AVOID_STDIO\s0" 4
.IX Item "EV_AVOID_STDIO"
If this is set to \f(CW1\fR at compiletime, then libev will avoid using stdio
functions (printf, scanf, perror etc.). This will increase the code size
somewhat, but if your program doesn't otherwise depend on stdio and your
libc allows it, this avoids linking in the stdio library which is quite
big.
.Sp
Note that error messages might become less precise when this option is
enabled.
.IP "\s-1EV_NSIG\s0" 4
.IX Item "EV_NSIG"
The highest supported signal number, +1 (or, the number of
signals): Normally, libev tries to deduce the maximum number of signals
automatically, but sometimes this fails, in which case it can be
specified. Also, using a lower number than detected (\f(CW32\fR should be
good for about any system in existence) can save some memory, as libev
statically allocates some 12\-24 bytes per signal number.
.IP "\s-1EV_PID_HASHSIZE\s0" 4
.IX Item "EV_PID_HASHSIZE"
\&\f(CW\*(C`ev_child\*(C'\fR watchers use a small hash table to distribute workload by
pid. The default size is \f(CW16\fR (or \f(CW1\fR with \f(CW\*(C`EV_FEATURES\*(C'\fR disabled),
usually more than enough. If you need to manage thousands of children you
might want to increase this value (\fImust\fR be a power of two).
.IP "\s-1EV_INOTIFY_HASHSIZE\s0" 4
.IX Item "EV_INOTIFY_HASHSIZE"
\&\f(CW\*(C`ev_stat\*(C'\fR watchers use a small hash table to distribute workload by
inotify watch id. The default size is \f(CW16\fR (or \f(CW1\fR with \f(CW\*(C`EV_FEATURES\*(C'\fR
disabled), usually more than enough. If you need to manage thousands of
\&\f(CW\*(C`ev_stat\*(C'\fR watchers you might want to increase this value (\fImust\fR be a
power of two).
.IP "\s-1EV_USE_4HEAP\s0" 4
.IX Item "EV_USE_4HEAP"
Heaps are not very cache-efficient. To improve the cache-efficiency of the
timer and periodics heaps, libev uses a 4\-heap when this symbol is defined
to \f(CW1\fR. The 4\-heap uses more complicated (longer) code but has noticeably
faster performance with many (thousands) of watchers.
.Sp
The default is \f(CW1\fR, unless \f(CW\*(C`EV_FEATURES\*(C'\fR overrides it, in which case it
will be \f(CW0\fR.
.IP "\s-1EV_HEAP_CACHE_AT\s0" 4
.IX Item "EV_HEAP_CACHE_AT"
Heaps are not very cache-efficient. To improve the cache-efficiency of the
timer and periodics heaps, libev can cache the timestamp (\fIat\fR) within
the heap structure (selected by defining \f(CW\*(C`EV_HEAP_CACHE_AT\*(C'\fR to \f(CW1\fR),
which uses 8\-12 bytes more per watcher and a few hundred bytes more code,
but avoids random read accesses on heap changes. This improves performance
noticeably with many (hundreds) of watchers.
.Sp
The default is \f(CW1\fR, unless \f(CW\*(C`EV_FEATURES\*(C'\fR overrides it, in which case it
will be \f(CW0\fR.
.IP "\s-1EV_VERIFY\s0" 4
.IX Item "EV_VERIFY"
Controls how much internal verification (see \f(CW\*(C`ev_verify ()\*(C'\fR) will
be done: If set to \f(CW0\fR, no internal verification code will be compiled
in. If set to \f(CW1\fR, then verification code will be compiled in, but not
called. If set to \f(CW2\fR, then the internal verification code will be
called once per loop, which can slow down libev. If set to \f(CW3\fR, then the
verification code will be called very frequently, which will slow down
libev considerably.
.Sp
The default is \f(CW1\fR, unless \f(CW\*(C`EV_FEATURES\*(C'\fR overrides it, in which case it
will be \f(CW0\fR.
.IP "\s-1EV_COMMON\s0" 4
.IX Item "EV_COMMON"
By default, all watchers have a \f(CW\*(C`void *data\*(C'\fR member. By redefining
this macro to something else you can include more and other types of
members. You have to define it each time you include one of the files,
though, and it must be identical each time.
.Sp
For example, the perl \s-1EV\s0 module uses something like this:
.Sp
.Vb 3
\&   #define EV_COMMON                       \e
\&     SV *self; /* contains this struct */  \e
\&     SV *cb_sv, *fh /* note no trailing ";" */
.Ve
.IP "\s-1EV_CB_DECLARE \s0(type)" 4
.IX Item "EV_CB_DECLARE (type)"
.PD 0
.IP "\s-1EV_CB_INVOKE \s0(watcher, revents)" 4
.IX Item "EV_CB_INVOKE (watcher, revents)"
.IP "ev_set_cb (ev, cb)" 4
.IX Item "ev_set_cb (ev, cb)"
.PD
Can be used to change the callback member declaration in each watcher,
and the way callbacks are invoked and set. Must expand to a struct member
definition and a statement, respectively. See the \fIev.h\fR header file for
their default definitions. One possible use for overriding these is to
avoid the \f(CW\*(C`struct ev_loop *\*(C'\fR as first argument in all cases, or to use
method calls instead of plain function calls in \*(C+.
.SS "\s-1EXPORTED API SYMBOLS\s0"
.IX Subsection "EXPORTED API SYMBOLS"
If you need to re-export the \s-1API \s0(e.g. via a \s-1DLL\s0) and you need a list of
exported symbols, you can use the provided \fISymbol.*\fR files which list
all public symbols, one per line:
.PP
.Vb 2
\&   Symbols.ev      for libev proper
\&   Symbols.event   for the libevent emulation
.Ve
.PP
This can also be used to rename all public symbols to avoid clashes with
multiple versions of libev linked together (which is obviously bad in
itself, but sometimes it is inconvenient to avoid this).
.PP
A sed command like this will create wrapper \f(CW\*(C`#define\*(C'\fR's that you need to
include before including \fIev.h\fR:
.PP
.Vb 1
\&   <Symbols.ev sed \-e "s/.*/#define & myprefix_&/" >wrap.h
.Ve
.PP
This would create a file \fIwrap.h\fR which essentially looks like this:
.PP
.Vb 4
\&   #define ev_backend     myprefix_ev_backend
\&   #define ev_check_start myprefix_ev_check_start
\&   #define ev_check_stop  myprefix_ev_check_stop
\&   ...
.Ve
.SS "\s-1EXAMPLES\s0"
.IX Subsection "EXAMPLES"
For a real-world example of a program the includes libev
verbatim, you can have a look at the \s-1EV\s0 perl module
(<http://software.schmorp.de/pkg/EV.html>). It has the libev files in
the \fIlibev/\fR subdirectory and includes them in the \fI\s-1EV/EVAPI\s0.h\fR (public
interface) and \fI\s-1EV\s0.xs\fR (implementation) files. Only the \fI\s-1EV\s0.xs\fR file
will be compiled. It is pretty complex because it provides its own header
file.
.PP
The usage in rxvt-unicode is simpler. It has a \fIev_cpp.h\fR header file
that everybody includes and which overrides some configure choices:
.PP
.Vb 8
\&   #define EV_FEATURES 8
\&   #define EV_USE_SELECT 1
\&   #define EV_PREPARE_ENABLE 1
\&   #define EV_IDLE_ENABLE 1
\&   #define EV_SIGNAL_ENABLE 1
\&   #define EV_CHILD_ENABLE 1
\&   #define EV_USE_STDEXCEPT 0
\&   #define EV_CONFIG_H <config.h>
\&
\&   #include "ev++.h"
.Ve
.PP
And a \fIev_cpp.C\fR implementation file that contains libev proper and is compiled:
.PP
.Vb 2
\&   #include "ev_cpp.h"
\&   #include "ev.c"
.Ve
.SH "INTERACTION WITH OTHER PROGRAMS, LIBRARIES OR THE ENVIRONMENT"
.IX Header "INTERACTION WITH OTHER PROGRAMS, LIBRARIES OR THE ENVIRONMENT"
.SS "\s-1THREADS AND COROUTINES\s0"
.IX Subsection "THREADS AND COROUTINES"
\fI\s-1THREADS\s0\fR
.IX Subsection "THREADS"
.PP
All libev functions are reentrant and thread-safe unless explicitly
documented otherwise, but libev implements no locking itself. This means
that you can use as many loops as you want in parallel, as long as there
are no concurrent calls into any libev function with the same loop
parameter (\f(CW\*(C`ev_default_*\*(C'\fR calls have an implicit default loop parameter,
of course): libev guarantees that different event loops share no data
structures that need any locking.
.PP
Or to put it differently: calls with different loop parameters can be done
concurrently from multiple threads, calls with the same loop parameter
must be done serially (but can be done from different threads, as long as
only one thread ever is inside a call at any point in time, e.g. by using
a mutex per loop).
.PP
Specifically to support threads (and signal handlers), libev implements
so-called \f(CW\*(C`ev_async\*(C'\fR watchers, which allow some limited form of
concurrency on the same event loop, namely waking it up \*(L"from the
outside\*(R".
.PP
If you want to know which design (one loop, locking, or multiple loops
without or something else still) is best for your problem, then I cannot
help you, but here is some generic advice:
.IP "\(bu" 4
most applications have a main thread: use the default libev loop
in that thread, or create a separate thread running only the default loop.
.Sp
This helps integrating other libraries or software modules that use libev
themselves and don't care/know about threading.
.IP "\(bu" 4
one loop per thread is usually a good model.
.Sp
Doing this is almost never wrong, sometimes a better-performance model
exists, but it is always a good start.
.IP "\(bu" 4
other models exist, such as the leader/follower pattern, where one
loop is handed through multiple threads in a kind of round-robin fashion.
.Sp
Choosing a model is hard \- look around, learn, know that usually you can do
better than you currently do :\-)
.IP "\(bu" 4
often you need to talk to some other thread which blocks in the
event loop.
.Sp
\&\f(CW\*(C`ev_async\*(C'\fR watchers can be used to wake them up from other threads safely
(or from signal contexts...).
.Sp
An example use would be to communicate signals or other events that only
work in the default loop by registering the signal watcher with the
default loop and triggering an \f(CW\*(C`ev_async\*(C'\fR watcher from the default loop
watcher callback into the event loop interested in the signal.
.PP
See also \*(L"\s-1THREAD LOCKING EXAMPLE\*(R"\s0.
.PP
\fI\s-1COROUTINES\s0\fR
.IX Subsection "COROUTINES"
.PP
Libev is very accommodating to coroutines (\*(L"cooperative threads\*(R"):
libev fully supports nesting calls to its functions from different
coroutines (e.g. you can call \f(CW\*(C`ev_run\*(C'\fR on the same loop from two
different coroutines, and switch freely between both coroutines running
the loop, as long as you don't confuse yourself). The only exception is
that you must not do this from \f(CW\*(C`ev_periodic\*(C'\fR reschedule callbacks.
.PP
Care has been taken to ensure that libev does not keep local state inside
\&\f(CW\*(C`ev_run\*(C'\fR, and other calls do not usually allow for coroutine switches as
they do not call any callbacks.
.SS "\s-1COMPILER WARNINGS\s0"
.IX Subsection "COMPILER WARNINGS"
Depending on your compiler and compiler settings, you might get no or a
lot of warnings when compiling libev code. Some people are apparently
scared by this.
.PP
However, these are unavoidable for many reasons. For one, each compiler
has different warnings, and each user has different tastes regarding
warning options. \*(L"Warn-free\*(R" code therefore cannot be a goal except when
targeting a specific compiler and compiler-version.
.PP
Another reason is that some compiler warnings require elaborate
workarounds, or other changes to the code that make it less clear and less
maintainable.
.PP
And of course, some compiler warnings are just plain stupid, or simply
wrong (because they don't actually warn about the condition their message
seems to warn about). For example, certain older gcc versions had some
warnings that resulted in an extreme number of false positives. These have
been fixed, but some people still insist on making code warn-free with
such buggy versions.
.PP
While libev is written to generate as few warnings as possible,
\&\*(L"warn-free\*(R" code is not a goal, and it is recommended not to build libev
with any compiler warnings enabled unless you are prepared to cope with
them (e.g. by ignoring them). Remember that warnings are just that:
warnings, not errors, or proof of bugs.
.SS "\s-1VALGRIND\s0"
.IX Subsection "VALGRIND"
Valgrind has a special section here because it is a popular tool that is
highly useful. Unfortunately, valgrind reports are very hard to interpret.
.PP
If you think you found a bug (memory leak, uninitialised data access etc.)
in libev, then check twice: If valgrind reports something like:
.PP
.Vb 3
\&   ==2274==    definitely lost: 0 bytes in 0 blocks.
\&   ==2274==      possibly lost: 0 bytes in 0 blocks.
\&   ==2274==    still reachable: 256 bytes in 1 blocks.
.Ve
.PP
Then there is no memory leak, just as memory accounted to global variables
is not a memleak \- the memory is still being referenced, and didn't leak.
.PP
Similarly, under some circumstances, valgrind might report kernel bugs
as if it were a bug in libev (e.g. in realloc or in the poll backend,
although an acceptable workaround has been found here), or it might be
confused.
.PP
Keep in mind that valgrind is a very good tool, but only a tool. Don't
make it into some kind of religion.
.PP
If you are unsure about something, feel free to contact the mailing list
with the full valgrind report and an explanation on why you think this
is a bug in libev (best check the archives, too :). However, don't be
annoyed when you get a brisk \*(L"this is no bug\*(R" answer and take the chance
of learning how to interpret valgrind properly.
.PP
If you need, for some reason, empty reports from valgrind for your project
I suggest using suppression lists.
.SH "PORTABILITY NOTES"
.IX Header "PORTABILITY NOTES"
.SS "\s-1GNU/LINUX 32 BIT LIMITATIONS\s0"
.IX Subsection "GNU/LINUX 32 BIT LIMITATIONS"
GNU/Linux is the only common platform that supports 64 bit file/large file
interfaces but \fIdisables\fR them by default.
.PP
That means that libev compiled in the default environment doesn't support
files larger than 2GiB or so, which mainly affects \f(CW\*(C`ev_stat\*(C'\fR watchers.
.PP
Unfortunately, many programs try to work around this GNU/Linux issue
by enabling the large file \s-1API,\s0 which makes them incompatible with the
standard libev compiled for their system.
.PP
Likewise, libev cannot enable the large file \s-1API\s0 itself as this would
suddenly make it incompatible to the default compile time environment,
i.e. all programs not using special compile switches.
.SS "\s-1OS/X AND DARWIN BUGS\s0"
.IX Subsection "OS/X AND DARWIN BUGS"
The whole thing is a bug if you ask me \- basically any system interface
you touch is broken, whether it is locales, poll, kqueue or even the
OpenGL drivers.
.PP
\fI\f(CI\*(C`kqueue\*(C'\fI is buggy\fR
.IX Subsection "kqueue is buggy"
.PP
The kqueue syscall is broken in all known versions \- most versions support
only sockets, many support pipes.
.PP
Libev tries to work around this by not using \f(CW\*(C`kqueue\*(C'\fR by default on this
rotten platform, but of course you can still ask for it when creating a
loop \- embedding a socket-only kqueue loop into a select-based one is
probably going to work well.
.PP
\fI\f(CI\*(C`poll\*(C'\fI is buggy\fR
.IX Subsection "poll is buggy"
.PP
Instead of fixing \f(CW\*(C`kqueue\*(C'\fR, Apple replaced their (working) \f(CW\*(C`poll\*(C'\fR
implementation by something calling \f(CW\*(C`kqueue\*(C'\fR internally around the 10.5.6
release, so now \f(CW\*(C`kqueue\*(C'\fR \fIand\fR \f(CW\*(C`poll\*(C'\fR are broken.
.PP
Libev tries to work around this by not using \f(CW\*(C`poll\*(C'\fR by default on
this rotten platform, but of course you can still ask for it when creating
a loop.
.PP
\fI\f(CI\*(C`select\*(C'\fI is buggy\fR
.IX Subsection "select is buggy"
.PP
All that's left is \f(CW\*(C`select\*(C'\fR, and of course Apple found a way to fuck this
one up as well: On \s-1OS/X, \s0\f(CW\*(C`select\*(C'\fR actively limits the number of file
descriptors you can pass in to 1024 \- your program suddenly crashes when
you use more.
.PP
There is an undocumented \*(L"workaround\*(R" for this \- defining
\&\f(CW\*(C`_DARWIN_UNLIMITED_SELECT\*(C'\fR, which libev tries to use, so select \fIshould\fR
work on \s-1OS/X.\s0
.SS "\s-1SOLARIS PROBLEMS AND WORKAROUNDS\s0"
.IX Subsection "SOLARIS PROBLEMS AND WORKAROUNDS"
\fI\f(CI\*(C`errno\*(C'\fI reentrancy\fR
.IX Subsection "errno reentrancy"
.PP
The default compile environment on Solaris is unfortunately so
thread-unsafe that you can't even use components/libraries compiled
without \f(CW\*(C`\-D_REENTRANT\*(C'\fR in a threaded program, which, of course, isn't
defined by default. A valid, if stupid, implementation choice.
.PP
If you want to use libev in threaded environments you have to make sure
it's compiled with \f(CW\*(C`_REENTRANT\*(C'\fR defined.
.PP
\fIEvent port backend\fR
.IX Subsection "Event port backend"
.PP
The scalable event interface for Solaris is called \*(L"event
ports\*(R". Unfortunately, this mechanism is very buggy in all major
releases. If you run into high \s-1CPU\s0 usage, your program freezes or you get
a large number of spurious wakeups, make sure you have all the relevant
and latest kernel patches applied. No, I don't know which ones, but there
are multiple ones to apply, and afterwards, event ports actually work
great.
.PP
If you can't get it to work, you can try running the program by setting
the environment variable \f(CW\*(C`LIBEV_FLAGS=3\*(C'\fR to only allow \f(CW\*(C`poll\*(C'\fR and
\&\f(CW\*(C`select\*(C'\fR backends.
.SS "\s-1AIX POLL BUG\s0"
.IX Subsection "AIX POLL BUG"
\&\s-1AIX\s0 unfortunately has a broken \f(CW\*(C`poll.h\*(C'\fR header. Libev works around
this by trying to avoid the poll backend altogether (i.e. it's not even
compiled in), which normally isn't a big problem as \f(CW\*(C`select\*(C'\fR works fine
with large bitsets on \s-1AIX,\s0 and \s-1AIX\s0 is dead anyway.
.SS "\s-1WIN32 PLATFORM LIMITATIONS AND WORKAROUNDS\s0"
.IX Subsection "WIN32 PLATFORM LIMITATIONS AND WORKAROUNDS"
\fIGeneral issues\fR
.IX Subsection "General issues"
.PP
Win32 doesn't support any of the standards (e.g. \s-1POSIX\s0) that libev
requires, and its I/O model is fundamentally incompatible with the \s-1POSIX\s0
model. Libev still offers limited functionality on this platform in
the form of the \f(CW\*(C`EVBACKEND_SELECT\*(C'\fR backend, and only supports socket
descriptors. This only applies when using Win32 natively, not when using
e.g. cygwin. Actually, it only applies to the microsofts own compilers,
as every compiler comes with a slightly differently broken/incompatible
environment.
.PP
Lifting these limitations would basically require the full
re-implementation of the I/O system. If you are into this kind of thing,
then note that glib does exactly that for you in a very portable way (note
also that glib is the slowest event library known to man).
.PP
There is no supported compilation method available on windows except
embedding it into other applications.
.PP
Sensible signal handling is officially unsupported by Microsoft \- libev
tries its best, but under most conditions, signals will simply not work.
.PP
Not a libev limitation but worth mentioning: windows apparently doesn't
accept large writes: instead of resulting in a partial write, windows will
either accept everything or return \f(CW\*(C`ENOBUFS\*(C'\fR if the buffer is too large,
so make sure you only write small amounts into your sockets (less than a
megabyte seems safe, but this apparently depends on the amount of memory
available).
.PP
Due to the many, low, and arbitrary limits on the win32 platform and
the abysmal performance of winsockets, using a large number of sockets
is not recommended (and not reasonable). If your program needs to use
more than a hundred or so sockets, then likely it needs to use a totally
different implementation for windows, as libev offers the \s-1POSIX\s0 readiness
notification model, which cannot be implemented efficiently on windows
(due to Microsoft monopoly games).
.PP
A typical way to use libev under windows is to embed it (see the embedding
section for details) and use the following \fIevwrap.h\fR header file instead
of \fIev.h\fR:
.PP
.Vb 2
\&   #define EV_STANDALONE              /* keeps ev from requiring config.h */
\&   #define EV_SELECT_IS_WINSOCKET 1   /* configure libev for windows select */
\&
\&   #include "ev.h"
.Ve
.PP
And compile the following \fIevwrap.c\fR file into your project (make sure
you do \fInot\fR compile the \fIev.c\fR or any other embedded source files!):
.PP
.Vb 2
\&   #include "evwrap.h"
\&   #include "ev.c"
.Ve
.PP
\fIThe winsocket \f(CI\*(C`select\*(C'\fI function\fR
.IX Subsection "The winsocket select function"
.PP
The winsocket \f(CW\*(C`select\*(C'\fR function doesn't follow \s-1POSIX\s0 in that it
requires socket \fIhandles\fR and not socket \fIfile descriptors\fR (it is
also extremely buggy). This makes select very inefficient, and also
requires a mapping from file descriptors to socket handles (the Microsoft
C runtime provides the function \f(CW\*(C`_open_osfhandle\*(C'\fR for this). See the
discussion of the \f(CW\*(C`EV_SELECT_USE_FD_SET\*(C'\fR, \f(CW\*(C`EV_SELECT_IS_WINSOCKET\*(C'\fR and
\&\f(CW\*(C`EV_FD_TO_WIN32_HANDLE\*(C'\fR preprocessor symbols for more info.
.PP
The configuration for a \*(L"naked\*(R" win32 using the Microsoft runtime
libraries and raw winsocket select is:
.PP
.Vb 2
\&   #define EV_USE_SELECT 1
\&   #define EV_SELECT_IS_WINSOCKET 1   /* forces EV_SELECT_USE_FD_SET, too */
.Ve
.PP
Note that winsockets handling of fd sets is O(n), so you can easily get a
complexity in the O(nX) range when using win32.
.PP
\fILimited number of file descriptors\fR
.IX Subsection "Limited number of file descriptors"
.PP
Windows has numerous arbitrary (and low) limits on things.
.PP
Early versions of winsocket's select only supported waiting for a maximum
of \f(CW64\fR handles (probably owning to the fact that all windows kernels
can only wait for \f(CW64\fR things at the same time internally; Microsoft
recommends spawning a chain of threads and wait for 63 handles and the
previous thread in each. Sounds great!).
.PP
Newer versions support more handles, but you need to define \f(CW\*(C`FD_SETSIZE\*(C'\fR
to some high number (e.g. \f(CW2048\fR) before compiling the winsocket select
call (which might be in libev or elsewhere, for example, perl and many
other interpreters do their own select emulation on windows).
.PP
Another limit is the number of file descriptors in the Microsoft runtime
libraries, which by default is \f(CW64\fR (there must be a hidden \fI64\fR
fetish or something like this inside Microsoft). You can increase this
by calling \f(CW\*(C`_setmaxstdio\*(C'\fR, which can increase this limit to \f(CW2048\fR
(another arbitrary limit), but is broken in many versions of the Microsoft
runtime libraries. This might get you to about \f(CW512\fR or \f(CW2048\fR sockets
(depending on windows version and/or the phase of the moon). To get more,
you need to wrap all I/O functions and provide your own fd management, but
the cost of calling select (O(nX)) will likely make this unworkable.
.SS "\s-1PORTABILITY REQUIREMENTS\s0"
.IX Subsection "PORTABILITY REQUIREMENTS"
In addition to a working ISO-C implementation and of course the
backend-specific APIs, libev relies on a few additional extensions:
.ie n .IP """void (*)(ev_watcher_type *, int revents)"" must have compatible calling conventions regardless of ""ev_watcher_type *""." 4
.el .IP "\f(CWvoid (*)(ev_watcher_type *, int revents)\fR must have compatible calling conventions regardless of \f(CWev_watcher_type *\fR." 4
.IX Item "void (*)(ev_watcher_type *, int revents) must have compatible calling conventions regardless of ev_watcher_type *."
Libev assumes not only that all watcher pointers have the same internal
structure (guaranteed by \s-1POSIX\s0 but not by \s-1ISO C\s0 for example), but it also
assumes that the same (machine) code can be used to call any watcher
callback: The watcher callbacks have different type signatures, but libev
calls them using an \f(CW\*(C`ev_watcher *\*(C'\fR internally.
.IP "null pointers and integer zero are represented by 0 bytes" 4
.IX Item "null pointers and integer zero are represented by 0 bytes"
Libev uses \f(CW\*(C`memset\*(C'\fR to initialise structs and arrays to \f(CW0\fR bytes, and
relies on this setting pointers and integers to null.
.IP "pointer accesses must be thread-atomic" 4
.IX Item "pointer accesses must be thread-atomic"
Accessing a pointer value must be atomic, it must both be readable and
writable in one piece \- this is the case on all current architectures.
.ie n .IP """sig_atomic_t volatile"" must be thread-atomic as well" 4
.el .IP "\f(CWsig_atomic_t volatile\fR must be thread-atomic as well" 4
.IX Item "sig_atomic_t volatile must be thread-atomic as well"
The type \f(CW\*(C`sig_atomic_t volatile\*(C'\fR (or whatever is defined as
\&\f(CW\*(C`EV_ATOMIC_T\*(C'\fR) must be atomic with respect to accesses from different
threads. This is not part of the specification for \f(CW\*(C`sig_atomic_t\*(C'\fR, but is
believed to be sufficiently portable.
.ie n .IP """sigprocmask"" must work in a threaded environment" 4
.el .IP "\f(CWsigprocmask\fR must work in a threaded environment" 4
.IX Item "sigprocmask must work in a threaded environment"
Libev uses \f(CW\*(C`sigprocmask\*(C'\fR to temporarily block signals. This is not
allowed in a threaded program (\f(CW\*(C`pthread_sigmask\*(C'\fR has to be used). Typical
pthread implementations will either allow \f(CW\*(C`sigprocmask\*(C'\fR in the \*(L"main
thread\*(R" or will block signals process-wide, both behaviours would
be compatible with libev. Interaction between \f(CW\*(C`sigprocmask\*(C'\fR and
\&\f(CW\*(C`pthread_sigmask\*(C'\fR could complicate things, however.
.Sp
The most portable way to handle signals is to block signals in all threads
except the initial one, and run the signal handling loop in the initial
thread as well.
.ie n .IP """long"" must be large enough for common memory allocation sizes" 4
.el .IP "\f(CWlong\fR must be large enough for common memory allocation sizes" 4
.IX Item "long must be large enough for common memory allocation sizes"
To improve portability and simplify its \s-1API,\s0 libev uses \f(CW\*(C`long\*(C'\fR internally
instead of \f(CW\*(C`size_t\*(C'\fR when allocating its data structures. On non-POSIX
systems (Microsoft...) this might be unexpectedly low, but is still at
least 31 bits everywhere, which is enough for hundreds of millions of
watchers.
.ie n .IP """double"" must hold a time value in seconds with enough accuracy" 4
.el .IP "\f(CWdouble\fR must hold a time value in seconds with enough accuracy" 4
.IX Item "double must hold a time value in seconds with enough accuracy"
The type \f(CW\*(C`double\*(C'\fR is used to represent timestamps. It is required to
have at least 51 bits of mantissa (and 9 bits of exponent), which is
good enough for at least into the year 4000 with millisecond accuracy
(the design goal for libev). This requirement is overfulfilled by
implementations using \s-1IEEE 754,\s0 which is basically all existing ones.
.Sp
With \s-1IEEE 754\s0 doubles, you get microsecond accuracy until at least the
year 2255 (and millisecond accuracy till the year 287396 \- by then, libev
is either obsolete or somebody patched it to use \f(CW\*(C`long double\*(C'\fR or
something like that, just kidding).
.PP
If you know of other additional requirements drop me a note.
.SH "ALGORITHMIC COMPLEXITIES"
.IX Header "ALGORITHMIC COMPLEXITIES"
In this section the complexities of (many of) the algorithms used inside
libev will be documented. For complexity discussions about backends see
the documentation for \f(CW\*(C`ev_default_init\*(C'\fR.
.PP
All of the following are about amortised time: If an array needs to be
extended, libev needs to realloc and move the whole array, but this
happens asymptotically rarer with higher number of elements, so O(1) might
mean that libev does a lengthy realloc operation in rare cases, but on
average it is much faster and asymptotically approaches constant time.
.IP "Starting and stopping timer/periodic watchers: O(log skipped_other_timers)" 4
.IX Item "Starting and stopping timer/periodic watchers: O(log skipped_other_timers)"
This means that, when you have a watcher that triggers in one hour and
there are 100 watchers that would trigger before that, then inserting will
have to skip roughly seven (\f(CW\*(C`ld 100\*(C'\fR) of these watchers.
.IP "Changing timer/periodic watchers (by autorepeat or calling again): O(log skipped_other_timers)" 4
.IX Item "Changing timer/periodic watchers (by autorepeat or calling again): O(log skipped_other_timers)"
That means that changing a timer costs less than removing/adding them,
as only the relative motion in the event queue has to be paid for.
.IP "Starting io/check/prepare/idle/signal/child/fork/async watchers: O(1)" 4
.IX Item "Starting io/check/prepare/idle/signal/child/fork/async watchers: O(1)"
These just add the watcher into an array or at the head of a list.
.IP "Stopping check/prepare/idle/fork/async watchers: O(1)" 4
.IX Item "Stopping check/prepare/idle/fork/async watchers: O(1)"
.PD 0
.IP "Stopping an io/signal/child watcher: O(number_of_watchers_for_this_(fd/signal/pid % \s-1EV_PID_HASHSIZE\s0))" 4
.IX Item "Stopping an io/signal/child watcher: O(number_of_watchers_for_this_(fd/signal/pid % EV_PID_HASHSIZE))"
.PD
These watchers are stored in lists, so they need to be walked to find the
correct watcher to remove. The lists are usually short (you don't usually
have many watchers waiting for the same fd or signal: one is typical, two
is rare).
.IP "Finding the next timer in each loop iteration: O(1)" 4
.IX Item "Finding the next timer in each loop iteration: O(1)"
By virtue of using a binary or 4\-heap, the next timer is always found at a
fixed position in the storage array.
.IP "Each change on a file descriptor per loop iteration: O(number_of_watchers_for_this_fd)" 4
.IX Item "Each change on a file descriptor per loop iteration: O(number_of_watchers_for_this_fd)"
A change means an I/O watcher gets started or stopped, which requires
libev to recalculate its status (and possibly tell the kernel, depending
on backend and whether \f(CW\*(C`ev_io_set\*(C'\fR was used).
.IP "Activating one watcher (putting it into the pending state): O(1)" 4
.IX Item "Activating one watcher (putting it into the pending state): O(1)"
.PD 0
.IP "Priority handling: O(number_of_priorities)" 4
.IX Item "Priority handling: O(number_of_priorities)"
.PD
Priorities are implemented by allocating some space for each
priority. When doing priority-based operations, libev usually has to
linearly search all the priorities, but starting/stopping and activating
watchers becomes O(1) with respect to priority handling.
.IP "Sending an ev_async: O(1)" 4
.IX Item "Sending an ev_async: O(1)"
.PD 0
.IP "Processing ev_async_send: O(number_of_async_watchers)" 4
.IX Item "Processing ev_async_send: O(number_of_async_watchers)"
.IP "Processing signals: O(max_signal_number)" 4
.IX Item "Processing signals: O(max_signal_number)"
.PD
Sending involves a system call \fIiff\fR there were no other \f(CW\*(C`ev_async_send\*(C'\fR
calls in the current loop iteration and the loop is currently
blocked. Checking for async and signal events involves iterating over all
running async watchers or all signal numbers.
.SH "PORTING FROM LIBEV 3.X TO 4.X"
.IX Header "PORTING FROM LIBEV 3.X TO 4.X"
The major version 4 introduced some incompatible changes to the \s-1API.\s0
.PP
At the moment, the \f(CW\*(C`ev.h\*(C'\fR header file provides compatibility definitions
for all changes, so most programs should still compile. The compatibility
layer might be removed in later versions of libev, so better update to the
new \s-1API\s0 early than late.
.ie n .IP """EV_COMPAT3"" backwards compatibility mechanism" 4
.el .IP "\f(CWEV_COMPAT3\fR backwards compatibility mechanism" 4
.IX Item "EV_COMPAT3 backwards compatibility mechanism"
The backward compatibility mechanism can be controlled by
\&\f(CW\*(C`EV_COMPAT3\*(C'\fR. See \*(L"\s-1PREPROCESSOR SYMBOLS/MACROS\*(R"\s0 in the \*(L"\s-1EMBEDDING\*(R"\s0
section.
.ie n .IP """ev_default_destroy"" and ""ev_default_fork"" have been removed" 4
.el .IP "\f(CWev_default_destroy\fR and \f(CWev_default_fork\fR have been removed" 4
.IX Item "ev_default_destroy and ev_default_fork have been removed"
These calls can be replaced easily by their \f(CW\*(C`ev_loop_xxx\*(C'\fR counterparts:
.Sp
.Vb 2
\&   ev_loop_destroy (EV_DEFAULT_UC);
\&   ev_loop_fork (EV_DEFAULT);
.Ve
.IP "function/symbol renames" 4
.IX Item "function/symbol renames"
A number of functions and symbols have been renamed:
.Sp
.Vb 3
\&  ev_loop         => ev_run
\&  EVLOOP_NONBLOCK => EVRUN_NOWAIT
\&  EVLOOP_ONESHOT  => EVRUN_ONCE
\&
\&  ev_unloop       => ev_break
\&  EVUNLOOP_CANCEL => EVBREAK_CANCEL
\&  EVUNLOOP_ONE    => EVBREAK_ONE
\&  EVUNLOOP_ALL    => EVBREAK_ALL
\&
\&  EV_TIMEOUT      => EV_TIMER
\&
\&  ev_loop_count   => ev_iteration
\&  ev_loop_depth   => ev_depth
\&  ev_loop_verify  => ev_verify
.Ve
.Sp
Most functions working on \f(CW\*(C`struct ev_loop\*(C'\fR objects don't have an
\&\f(CW\*(C`ev_loop_\*(C'\fR prefix, so it was removed; \f(CW\*(C`ev_loop\*(C'\fR, \f(CW\*(C`ev_unloop\*(C'\fR and
associated constants have been renamed to not collide with the \f(CW\*(C`struct
ev_loop\*(C'\fR anymore and \f(CW\*(C`EV_TIMER\*(C'\fR now follows the same naming scheme
as all other watcher types. Note that \f(CW\*(C`ev_loop_fork\*(C'\fR is still called
\&\f(CW\*(C`ev_loop_fork\*(C'\fR because it would otherwise clash with the \f(CW\*(C`ev_fork\*(C'\fR
typedef.
.ie n .IP """EV_MINIMAL"" mechanism replaced by ""EV_FEATURES""" 4
.el .IP "\f(CWEV_MINIMAL\fR mechanism replaced by \f(CWEV_FEATURES\fR" 4
.IX Item "EV_MINIMAL mechanism replaced by EV_FEATURES"
The preprocessor symbol \f(CW\*(C`EV_MINIMAL\*(C'\fR has been replaced by a different
mechanism, \f(CW\*(C`EV_FEATURES\*(C'\fR. Programs using \f(CW\*(C`EV_MINIMAL\*(C'\fR usually compile
and work, but the library code will of course be larger.
.SH "GLOSSARY"
.IX Header "GLOSSARY"
.IP "active" 4
.IX Item "active"
A watcher is active as long as it has been started and not yet stopped.
See \*(L"\s-1WATCHER STATES\*(R"\s0 for details.
.IP "application" 4
.IX Item "application"
In this document, an application is whatever is using libev.
.IP "backend" 4
.IX Item "backend"
The part of the code dealing with the operating system interfaces.
.IP "callback" 4
.IX Item "callback"
The address of a function that is called when some event has been
detected. Callbacks are being passed the event loop, the watcher that
received the event, and the actual event bitset.
.IP "callback/watcher invocation" 4
.IX Item "callback/watcher invocation"
The act of calling the callback associated with a watcher.
.IP "event" 4
.IX Item "event"
A change of state of some external event, such as data now being available
for reading on a file descriptor, time having passed or simply not having
any other events happening anymore.
.Sp
In libev, events are represented as single bits (such as \f(CW\*(C`EV_READ\*(C'\fR or
\&\f(CW\*(C`EV_TIMER\*(C'\fR).
.IP "event library" 4
.IX Item "event library"
A software package implementing an event model and loop.
.IP "event loop" 4
.IX Item "event loop"
An entity that handles and processes external events and converts them
into callback invocations.
.IP "event model" 4
.IX Item "event model"
The model used to describe how an event loop handles and processes
watchers and events.
.IP "pending" 4
.IX Item "pending"
A watcher is pending as soon as the corresponding event has been
detected. See \*(L"\s-1WATCHER STATES\*(R"\s0 for details.
.IP "real time" 4
.IX Item "real time"
The physical time that is observed. It is apparently strictly monotonic :)
.IP "wall-clock time" 4
.IX Item "wall-clock time"
The time and date as shown on clocks. Unlike real time, it can actually
be wrong and jump forwards and backwards, e.g. when you adjust your
clock.
.IP "watcher" 4
.IX Item "watcher"
A data structure that describes interest in certain events. Watchers need
to be started (attached to an event loop) before they can receive events.
.SH "AUTHOR"
.IX Header "AUTHOR"
Marc Lehmann <libev@schmorp.de>, with repeated corrections by Mikael
Magnusson and Emanuele Giaquinta, and minor corrections by many others.


================================================
FILE: libev/ev.c
================================================
/*
 * libev event processing core, watcher management
 *
 * Copyright (c) 2007,2008,2009,2010,2011,2012,2013 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

/* this big block deduces configuration from config.h */
#ifndef EV_STANDALONE
# ifdef EV_CONFIG_H
#  include EV_CONFIG_H
# else
#  include "config.h"
# endif

# if HAVE_FLOOR
#  ifndef EV_USE_FLOOR
#   define EV_USE_FLOOR 1
#  endif
# endif

# if HAVE_CLOCK_SYSCALL
#  ifndef EV_USE_CLOCK_SYSCALL
#   define EV_USE_CLOCK_SYSCALL 1
#   ifndef EV_USE_REALTIME
#    define EV_USE_REALTIME  0
#   endif
#   ifndef EV_USE_MONOTONIC
#    define EV_USE_MONOTONIC 1
#   endif
#  endif
# elif !defined EV_USE_CLOCK_SYSCALL
#  define EV_USE_CLOCK_SYSCALL 0
# endif

# if HAVE_CLOCK_GETTIME
#  ifndef EV_USE_MONOTONIC
#   define EV_USE_MONOTONIC 1
#  endif
#  ifndef EV_USE_REALTIME
#   define EV_USE_REALTIME  0
#  endif
# else
#  ifndef EV_USE_MONOTONIC
#   define EV_USE_MONOTONIC 0
#  endif
#  ifndef EV_USE_REALTIME
#   define EV_USE_REALTIME  0
#  endif
# endif

# if HAVE_NANOSLEEP
#  ifndef EV_USE_NANOSLEEP
#    define EV_USE_NANOSLEEP EV_FEATURE_OS
#  endif
# else
#   undef EV_USE_NANOSLEEP
#   define EV_USE_NANOSLEEP 0
# endif

# if HAVE_SELECT && HAVE_SYS_SELECT_H
#  ifndef EV_USE_SELECT
#   define EV_USE_SELECT EV_FEATURE_BACKENDS
#  endif
# else
#  undef EV_USE_SELECT
#  define EV_USE_SELECT 0
# endif

# if HAVE_POLL && HAVE_POLL_H
#  ifndef EV_USE_POLL
#   define EV_USE_POLL EV_FEATURE_BACKENDS
#  endif
# else
#  undef EV_USE_POLL
#  define EV_USE_POLL 0
# endif
   
# if HAVE_EPOLL_CTL && HAVE_SYS_EPOLL_H
#  ifndef EV_USE_EPOLL
#   define EV_USE_EPOLL EV_FEATURE_BACKENDS
#  endif
# else
#  undef EV_USE_EPOLL
#  define EV_USE_EPOLL 0
# endif
   
# if HAVE_KQUEUE && HAVE_SYS_EVENT_H
#  ifndef EV_USE_KQUEUE
#   define EV_USE_KQUEUE EV_FEATURE_BACKENDS
#  endif
# else
#  undef EV_USE_KQUEUE
#  define EV_USE_KQUEUE 0
# endif
   
# if HAVE_PORT_H && HAVE_PORT_CREATE
#  ifndef EV_USE_PORT
#   define EV_USE_PORT EV_FEATURE_BACKENDS
#  endif
# else
#  undef EV_USE_PORT
#  define EV_USE_PORT 0
# endif

# if HAVE_INOTIFY_INIT && HAVE_SYS_INOTIFY_H
#  ifndef EV_USE_INOTIFY
#   define EV_USE_INOTIFY EV_FEATURE_OS
#  endif
# else
#  undef EV_USE_INOTIFY
#  define EV_USE_INOTIFY 0
# endif

# if HAVE_SIGNALFD && HAVE_SYS_SIGNALFD_H
#  ifndef EV_USE_SIGNALFD
#   define EV_USE_SIGNALFD EV_FEATURE_OS
#  endif
# else
#  undef EV_USE_SIGNALFD
#  define EV_USE_SIGNALFD 0
# endif

# if HAVE_EVENTFD
#  ifndef EV_USE_EVENTFD
#   define EV_USE_EVENTFD EV_FEATURE_OS
#  endif
# else
#  undef EV_USE_EVENTFD
#  define EV_USE_EVENTFD 0
# endif
 
#endif

#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <stddef.h>

#include <stdio.h>

#include <assert.h>
#include <errno.h>
#include <sys/types.h>
#include <time.h>
#include <limits.h>

#include <signal.h>

#ifdef EV_H
# include EV_H
#else
# include "ev.h"
#endif

#if EV_NO_THREADS
# undef EV_NO_SMP
# define EV_NO_SMP 1
# undef ECB_NO_THREADS
# define ECB_NO_THREADS 1
#endif
#if EV_NO_SMP
# undef EV_NO_SMP
# define ECB_NO_SMP 1
#endif

#ifndef _WIN32
# include <sys/time.h>
# include <sys/wait.h>
# include <unistd.h>
#else
# include <io.h>
# define WIN32_LEAN_AND_MEAN
# include <winsock2.h>
# include <windows.h>
# ifndef EV_SELECT_IS_WINSOCKET
#  define EV_SELECT_IS_WINSOCKET 1
# endif
# undef EV_AVOID_STDIO
#endif

/* OS X, in its infinite idiocy, actually HARDCODES
 * a limit of 1024 into their select. Where people have brains,
 * OS X engineers apparently have a vacuum. Or maybe they were
 * ordered to have a vacuum, or they do anything for money.
 * This might help. Or not.
 */
#define _DARWIN_UNLIMITED_SELECT 1

/* this block tries to deduce configuration from header-defined symbols and defaults */

/* try to deduce the maximum number of signals on this platform */
#if defined EV_NSIG
/* use what's provided */
#elif defined NSIG
# define EV_NSIG (NSIG)
#elif defined _NSIG
# define EV_NSIG (_NSIG)
#elif defined SIGMAX
# define EV_NSIG (SIGMAX+1)
#elif defined SIG_MAX
# define EV_NSIG (SIG_MAX+1)
#elif defined _SIG_MAX
# define EV_NSIG (_SIG_MAX+1)
#elif defined MAXSIG
# define EV_NSIG (MAXSIG+1)
#elif defined MAX_SIG
# define EV_NSIG (MAX_SIG+1)
#elif defined SIGARRAYSIZE
# define EV_NSIG (SIGARRAYSIZE) /* Assume ary[SIGARRAYSIZE] */
#elif defined _sys_nsig
# define EV_NSIG (_sys_nsig) /* Solaris 2.5 */
#else
# define EV_NSIG (8 * sizeof (sigset_t) + 1)
#endif

#ifndef EV_USE_FLOOR
# define EV_USE_FLOOR 0
#endif

#ifndef EV_USE_CLOCK_SYSCALL
# if __linux && __GLIBC__ == 2 && __GLIBC_MINOR__ < 17
#  define EV_USE_CLOCK_SYSCALL EV_FEATURE_OS
# else
#  define EV_USE_CLOCK_SYSCALL 0
# endif
#endif

#if !(_POSIX_TIMERS > 0)
# ifndef EV_USE_MONOTONIC
#  define EV_USE_MONOTONIC 0
# endif
# ifndef EV_USE_REALTIME
#  define EV_USE_REALTIME 0
# endif
#endif

#ifndef EV_USE_MONOTONIC
# if defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
#  define EV_USE_MONOTONIC EV_FEATURE_OS
# else
#  define EV_USE_MONOTONIC 0
# endif
#endif

#ifndef EV_USE_REALTIME
# define EV_USE_REALTIME !EV_USE_CLOCK_SYSCALL
#endif

#ifndef EV_USE_NANOSLEEP
# if _POSIX_C_SOURCE >= 199309L
#  define EV_USE_NANOSLEEP EV_FEATURE_OS
# else
#  define EV_USE_NANOSLEEP 0
# endif
#endif

#ifndef EV_USE_SELECT
# define EV_USE_SELECT EV_FEATURE_BACKENDS
#endif

#ifndef EV_USE_POLL
# ifdef _WIN32
#  define EV_USE_POLL 0
# else
#  define EV_USE_POLL EV_FEATURE_BACKENDS
# endif
#endif

#ifndef EV_USE_EPOLL
# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 4))
#  define EV_USE_EPOLL EV_FEATURE_BACKENDS
# else
#  define EV_USE_EPOLL 0
# endif
#endif

#ifndef EV_USE_KQUEUE
# define EV_USE_KQUEUE 0
#endif

#ifndef EV_USE_PORT
# define EV_USE_PORT 0
#endif

#ifndef EV_USE_INOTIFY
# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 4))
#  define EV_USE_INOTIFY EV_FEATURE_OS
# else
#  define EV_USE_INOTIFY 0
# endif
#endif

#ifndef EV_PID_HASHSIZE
# define EV_PID_HASHSIZE EV_FEATURE_DATA ? 16 : 1
#endif

#ifndef EV_INOTIFY_HASHSIZE
# define EV_INOTIFY_HASHSIZE EV_FEATURE_DATA ? 16 : 1
#endif

#ifndef EV_USE_EVENTFD
# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 7))
#  define EV_USE_EVENTFD EV_FEATURE_OS
# else
#  define EV_USE_EVENTFD 0
# endif
#endif

#ifndef EV_USE_SIGNALFD
# if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 7))
#  define EV_USE_SIGNALFD EV_FEATURE_OS
# else
#  define EV_USE_SIGNALFD 0
# endif
#endif

#if 0 /* debugging */
# define EV_VERIFY 3
# define EV_USE_4HEAP 1
# define EV_HEAP_CACHE_AT 1
#endif

#ifndef EV_VERIFY
# define EV_VERIFY (EV_FEATURE_API ? 1 : 0)
#endif

#ifndef EV_USE_4HEAP
# define EV_USE_4HEAP EV_FEATURE_DATA
#endif

#ifndef EV_HEAP_CACHE_AT
# define EV_HEAP_CACHE_AT EV_FEATURE_DATA
#endif

#ifdef __ANDROID__
/* supposedly, android doesn't typedef fd_mask */
# undef EV_USE_SELECT
# define EV_USE_SELECT 0
/* supposedly, we need to include syscall.h, not sys/syscall.h, so just disable */
# undef EV_USE_CLOCK_SYSCALL
# define EV_USE_CLOCK_SYSCALL 0
#endif

/* aix's poll.h seems to cause lots of trouble */
#ifdef _AIX
/* AIX has a completely broken poll.h header */
# undef EV_USE_POLL
# define EV_USE_POLL 0
#endif

/* on linux, we can use a (slow) syscall to avoid a dependency on pthread, */
/* which makes programs even slower. might work on other unices, too. */
#if EV_USE_CLOCK_SYSCALL
# include <sys/syscall.h>
# ifdef SYS_clock_gettime
#  define clock_gettime(id, ts) syscall (SYS_clock_gettime, (id), (ts))
#  undef EV_USE_MONOTONIC
#  define EV_USE_MONOTONIC 1
# else
#  undef EV_USE_CLOCK_SYSCALL
#  define EV_USE_CLOCK_SYSCALL 0
# endif
#endif

/* this block fixes any misconfiguration where we know we run into trouble otherwise */

#ifndef CLOCK_MONOTONIC
# undef EV_USE_MONOTONIC
# define EV_USE_MONOTONIC 0
#endif

#ifndef CLOCK_REALTIME
# undef EV_USE_REALTIME
# define EV_USE_REALTIME 0
#endif

#if !EV_STAT_ENABLE
# undef EV_USE_INOTIFY
# define EV_USE_INOTIFY 0
#endif

#if !EV_USE_NANOSLEEP
/* hp-ux has it in sys/time.h, which we unconditionally include above */
# if !defined _WIN32 && !defined __hpux
#  include <sys/select.h>
# endif
#endif

#if EV_USE_INOTIFY
# include <sys/statfs.h>
# include <sys/inotify.h>
/* some very old inotify.h headers don't have IN_DONT_FOLLOW */
# ifndef IN_DONT_FOLLOW
#  undef EV_USE_INOTIFY
#  define EV_USE_INOTIFY 0
# endif
#endif

#if EV_USE_EVENTFD
/* our minimum requirement is glibc 2.7 which has the stub, but not the header */
# include <stdint.h>
# ifndef EFD_NONBLOCK
#  define EFD_NONBLOCK O_NONBLOCK
# endif
# ifndef EFD_CLOEXEC
#  ifdef O_CLOEXEC
#   define EFD_CLOEXEC O_CLOEXEC
#  else
#   define EFD_CLOEXEC 02000000
#  endif
# endif
EV_CPP(extern "C") int (eventfd) (unsigned int initval, int flags);
#endif

#if EV_USE_SIGNALFD
/* our minimum requirement is glibc 2.7 which has the stub, but not the header */
# include <stdint.h>
# ifndef SFD_NONBLOCK
#  define SFD_NONBLOCK O_NONBLOCK
# endif
# ifndef SFD_CLOEXEC
#  ifdef O_CLOEXEC
#   define SFD_CLOEXEC O_CLOEXEC
#  else
#   define SFD_CLOEXEC 02000000
#  endif
# endif
EV_CPP (extern "C") int signalfd (int fd, const sigset_t *mask, int flags);

struct signalfd_siginfo
{
  uint32_t ssi_signo;
  char pad[128 - sizeof (uint32_t)];
};
#endif

/**/

#if EV_VERIFY >= 3
# define EV_FREQUENT_CHECK ev_verify (EV_A)
#else
# define EV_FREQUENT_CHECK do { } while (0)
#endif

/*
 * This is used to work around floating point rounding problems.
 * This value is good at least till the year 4000.
 */
#define MIN_INTERVAL  0.0001220703125 /* 1/2**13, good till 4000 */
/*#define MIN_INTERVAL  0.00000095367431640625 /* 1/2**20, good till 2200 */

#define MIN_TIMEJUMP  1. /* minimum timejump that gets detected (if monotonic clock available) */
#define MAX_BLOCKTIME 59.743 /* never wait longer than this time (to detect time jumps) */

#define EV_TV_SET(tv,t) do { tv.tv_sec = (long)t; tv.tv_usec = (long)((t - tv.tv_sec) * 1e6); } while (0)
#define EV_TS_SET(ts,t) do { ts.tv_sec = (long)t; ts.tv_nsec = (long)((t - ts.tv_sec) * 1e9); } while (0)

/* the following is ecb.h embedded into libev - use update_ev_c to update from an external copy */
/* ECB.H BEGIN */
/*
 * libecb - http://software.schmorp.de/pkg/libecb
 *
 * Copyright (©) 2009-2015 Marc Alexander Lehmann <libecb@schmorp.de>
 * Copyright (©) 2011 Emanuele Giaquinta
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

#ifndef ECB_H
#define ECB_H

/* 16 bits major, 16 bits minor */
#define ECB_VERSION 0x00010005

#ifdef _WIN32
  typedef   signed char   int8_t;
  typedef unsigned char  uint8_t;
  typedef   signed short  int16_t;
  typedef unsigned short uint16_t;
  typedef   signed int    int32_t;
  typedef unsigned int   uint32_t;
  #if __GNUC__
    typedef   signed long long int64_t;
    typedef unsigned long long uint64_t;
  #else /* _MSC_VER || __BORLANDC__ */
    typedef   signed __int64   int64_t;
    typedef unsigned __int64   uint64_t;
  #endif
  #ifdef _WIN64
    #define ECB_PTRSIZE 8
    typedef uint64_t uintptr_t;
    typedef  int64_t  intptr_t;
  #else
    #define ECB_PTRSIZE 4
    typedef uint32_t uintptr_t;
    typedef  int32_t  intptr_t;
  #endif
#else
  #include <inttypes.h>
  #if (defined INTPTR_MAX ? INTPTR_MAX : ULONG_MAX) > 0xffffffffU
    #define ECB_PTRSIZE 8
  #else
    #define ECB_PTRSIZE 4
  #endif
#endif

#define ECB_GCC_AMD64 (__amd64 || __amd64__ || __x86_64 || __x86_64__)
#define ECB_MSVC_AMD64 (_M_AMD64 || _M_X64)

/* work around x32 idiocy by defining proper macros */
#if ECB_GCC_AMD64 || ECB_MSVC_AMD64
  #if _ILP32
    #define ECB_AMD64_X32 1
  #else
    #define ECB_AMD64 1
  #endif
#endif

/* many compilers define _GNUC_ to some versions but then only implement
 * what their idiot authors think are the "more important" extensions,
 * causing enormous grief in return for some better fake benchmark numbers.
 * or so.
 * we try to detect these and simply assume they are not gcc - if they have
 * an issue with that they should have done it right in the first place.
 */
#if !defined __GNUC_MINOR__ || defined __INTEL_COMPILER || defined __SUNPRO_C || defined __SUNPRO_CC || defined __llvm__ || defined __clang__
  #define ECB_GCC_VERSION(major,minor) 0
#else
  #define ECB_GCC_VERSION(major,minor) (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
#endif

#define ECB_CLANG_VERSION(major,minor) (__clang_major__ > (major) || (__clang_major__ == (major) && __clang_minor__ >= (minor)))

#if __clang__ && defined __has_builtin
  #define ECB_CLANG_BUILTIN(x) __has_builtin (x)
#else
  #define ECB_CLANG_BUILTIN(x) 0
#endif

#if __clang__ && defined __has_extension
  #define ECB_CLANG_EXTENSION(x) __has_extension (x)
#else
  #define ECB_CLANG_EXTENSION(x) 0
#endif

#define ECB_CPP   (__cplusplus+0)
#define ECB_CPP11 (__cplusplus >= 201103L)

#if ECB_CPP
  #define ECB_C            0
  #define ECB_STDC_VERSION 0
#else
  #define ECB_C            1
  #define ECB_STDC_VERSION __STDC_VERSION__
#endif

#define ECB_C99   (ECB_STDC_VERSION >= 199901L)
#define ECB_C11   (ECB_STDC_VERSION >= 201112L)

#if ECB_CPP
  #define ECB_EXTERN_C extern "C"
  #define ECB_EXTERN_C_BEG ECB_EXTERN_C {
  #define ECB_EXTERN_C_END }
#else
  #define ECB_EXTERN_C extern
  #define ECB_EXTERN_C_BEG
  #define ECB_EXTERN_C_END
#endif

/*****************************************************************************/

/* ECB_NO_THREADS - ecb is not used by multiple threads, ever */
/* ECB_NO_SMP     - ecb might be used in multiple threads, but only on a single cpu */

#if ECB_NO_THREADS
  #define ECB_NO_SMP 1
#endif

#if ECB_NO_SMP
  #define ECB_MEMORY_FENCE do { } while (0)
#endif

/* http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.0/com.ibm.xlcpp131.aix.doc/compiler_ref/compiler_builtins.html */
#if __xlC__ && ECB_CPP
  #include <builtins.h>
#endif

#if 1400 <= _MSC_VER
  #include <intrin.h> /* fence functions _ReadBarrier, also bit search functions _BitScanReverse */
#endif

#ifndef ECB_MEMORY_FENCE
  #if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110
    #if __i386 || __i386__
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ ("lock; orb $0, -1(%%esp)" : : : "memory")
      #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ (""                        : : : "memory")
      #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("")
    #elif ECB_GCC_AMD64
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ ("mfence"   : : : "memory")
      #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ (""         : : : "memory")
      #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("")
    #elif __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ ("sync"     : : : "memory")
    #elif defined __ARM_ARCH_2__ \
      || defined __ARM_ARCH_3__  || defined __ARM_ARCH_3M__  \
      || defined __ARM_ARCH_4__  || defined __ARM_ARCH_4T__  \
      || defined __ARM_ARCH_5__  || defined __ARM_ARCH_5E__  \
      || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__ \
      || defined __ARM_ARCH_5TEJ__
      /* should not need any, unless running old code on newer cpu - arm doesn't support that */
    #elif defined __ARM_ARCH_6__  || defined __ARM_ARCH_6J__  \
       || defined __ARM_ARCH_6K__ || defined __ARM_ARCH_6ZK__ \
       || defined __ARM_ARCH_6T2__
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ ("mcr p15,0,%0,c7,c10,5" : : "r" (0) : "memory")
    #elif defined __ARM_ARCH_7__  || defined __ARM_ARCH_7A__  \
       || defined __ARM_ARCH_7R__ || defined __ARM_ARCH_7M__
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ ("dmb"      : : : "memory")
    #elif __aarch64__
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ ("dmb ish"  : : : "memory")
    #elif (__sparc || __sparc__) && !(__sparc_v8__ || defined __sparcv8)
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ ("membar #LoadStore | #LoadLoad | #StoreStore | #StoreLoad" : : : "memory")
      #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad"                            : : : "memory")
      #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("membar #LoadStore             | #StoreStore")
    #elif defined __s390__ || defined __s390x__
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ ("bcr 15,0" : : : "memory")
    #elif defined __mips__
      /* GNU/Linux emulates sync on mips1 architectures, so we force its use */
      /* anybody else who still uses mips1 is supposed to send in their version, with detection code. */
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ (".set mips2; sync; .set mips0" : : : "memory")
    #elif defined __alpha__
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ ("mb"       : : : "memory")
    #elif defined __hppa__
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ (""         : : : "memory")
      #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("")
    #elif defined __ia64__
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ ("mf"       : : : "memory")
    #elif defined __m68k__
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ (""         : : : "memory")
    #elif defined __m88k__
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ ("tb1 0,%%r0,128" : : : "memory")
    #elif defined __sh__
      #define ECB_MEMORY_FENCE         __asm__ __volatile__ (""         : : : "memory")
    #endif
  #endif
#endif

#ifndef ECB_MEMORY_FENCE
  #if ECB_GCC_VERSION(4,7)
    /* see comment below (stdatomic.h) about the C11 memory model. */
    #define ECB_MEMORY_FENCE         __atomic_thread_fence (__ATOMIC_SEQ_CST)
    #define ECB_MEMORY_FENCE_ACQUIRE __atomic_thread_fence (__ATOMIC_ACQUIRE)
    #define ECB_MEMORY_FENCE_RELEASE __atomic_thread_fence (__ATOMIC_RELEASE)

  #elif ECB_CLANG_EXTENSION(c_atomic)
    /* see comment below (stdatomic.h) about the C11 memory model. */
    #define ECB_MEMORY_FENCE         __c11_atomic_thread_fence (__ATOMIC_SEQ_CST)
    #define ECB_MEMORY_FENCE_ACQUIRE __c11_atomic_thread_fence (__ATOMIC_ACQUIRE)
    #define ECB_MEMORY_FENCE_RELEASE __c11_atomic_thread_fence (__ATOMIC_RELEASE)

  #elif ECB_GCC_VERSION(4,4) || defined __INTEL_COMPILER || defined __clang__
    #define ECB_MEMORY_FENCE         __sync_synchronize ()
  #elif _MSC_VER >= 1500 /* VC++ 2008 */
    /* apparently, microsoft broke all the memory barrier stuff in Visual Studio 2008... */
    #pragma intrinsic(_ReadBarrier,_WriteBarrier,_ReadWriteBarrier)
    #define ECB_MEMORY_FENCE         _ReadWriteBarrier (); MemoryBarrier()
    #define ECB_MEMORY_FENCE_ACQUIRE _ReadWriteBarrier (); MemoryBarrier() /* according to msdn, _ReadBarrier is not a load fence */
    #define ECB_MEMORY_FENCE_RELEASE _WriteBarrier (); MemoryBarrier()
  #elif _MSC_VER >= 1400 /* VC++ 2005 */
    #pragma intrinsic(_ReadBarrier,_WriteBarrier,_ReadWriteBarrier)
    #define ECB_MEMORY_FENCE         _ReadWriteBarrier ()
    #define ECB_MEMORY_FENCE_ACQUIRE _ReadWriteBarrier () /* according to msdn, _ReadBarrier is not a load fence */
    #define ECB_MEMORY_FENCE_RELEASE _WriteBarrier ()
  #elif defined _WIN32
    #include <WinNT.h>
    #define ECB_MEMORY_FENCE         MemoryBarrier () /* actually just xchg on x86... scary */
  #elif __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110
    #include <mbarrier.h>
    #define ECB_MEMORY_FENCE         __machine_rw_barrier ()
    #define ECB_MEMORY_FENCE_ACQUIRE __machine_r_barrier  ()
    #define ECB_MEMORY_FENCE_RELEASE __machine_w_barrier  ()
  #elif __xlC__
    #define ECB_MEMORY_FENCE         __sync ()
  #endif
#endif

#ifndef ECB_MEMORY_FENCE
  #if ECB_C11 && !defined __STDC_NO_ATOMICS__
    /* we assume that these memory fences work on all variables/all memory accesses, */
    /* not just C11 atomics and atomic accesses */
    #include <stdatomic.h>
    /* Unfortunately, neither gcc 4.7 nor clang 3.1 generate any instructions for */
    /* any fence other than seq_cst, which isn't very efficient for us. */
    /* Why that is, we don't know - either the C11 memory model is quite useless */
    /* for most usages, or gcc and clang have a bug */
    /* I *currently* lean towards the latter, and inefficiently implement */
    /* all three of ecb's fences as a seq_cst fence */
    /* Update, gcc-4.8 generates mfence for all c++ fences, but nothing */
    /* for all __atomic_thread_fence's except seq_cst */
    #define ECB_MEMORY_FENCE         atomic_thread_fence (memory_order_seq_cst)
  #endif
#endif

#ifndef ECB_MEMORY_FENCE
  #if !ECB_AVOID_PTHREADS
    /*
     * if you get undefined symbol references to pthread_mutex_lock,
     * or failure to find pthread.h, then you should implement
     * the ECB_MEMORY_FENCE operations for your cpu/compiler
     * OR provide pthread.h and link against the posix thread library
     * of your system.
     */
    #include <pthread.h>
    #define ECB_NEEDS_PTHREADS 1
    #define ECB_MEMORY_FENCE_NEEDS_PTHREADS 1

    static pthread_mutex_t ecb_mf_lock = PTHREAD_MUTEX_INITIALIZER;
    #define ECB_MEMORY_FENCE do { pthread_mutex_lock (&ecb_mf_lock); pthread_mutex_unlock (&ecb_mf_lock); } while (0)
  #endif
#endif

#if !defined ECB_MEMORY_FENCE_ACQUIRE && defined ECB_MEMORY_FENCE
  #define ECB_MEMORY_FENCE_ACQUIRE ECB_MEMORY_FENCE
#endif

#if !defined ECB_MEMORY_FENCE_RELEASE && defined ECB_MEMORY_FENCE
  #define ECB_MEMORY_FENCE_RELEASE ECB_MEMORY_FENCE
#endif

/*****************************************************************************/

#if ECB_CPP
  #define ecb_inline static inline
#elif ECB_GCC_VERSION(2,5)
  #define ecb_inline static __inline__
#elif ECB_C99
  #define ecb_inline static inline
#else
  #define ecb_inline static
#endif

#if ECB_GCC_VERSION(3,3)
  #define ecb_restrict __restrict__
#elif ECB_C99
  #define ecb_restrict restrict
#else
  #define ecb_restrict
#endif

typedef int ecb_bool;

#define ECB_CONCAT_(a, b) a ## b
#define ECB_CONCAT(a, b) ECB_CONCAT_(a, b)
#define ECB_STRINGIFY_(a) # a
#define ECB_STRINGIFY(a) ECB_STRINGIFY_(a)
#define ECB_STRINGIFY_EXPR(expr) ((expr), ECB_STRINGIFY_ (expr))

#define ecb_function_ ecb_inline

#if ECB_GCC_VERSION(3,1) || ECB_CLANG_VERSION(2,8)
  #define ecb_attribute(attrlist)        __attribute__ (attrlist)
#else
  #define ecb_attribute(attrlist)
#endif

#if ECB_GCC_VERSION(3,1) || ECB_CLANG_BUILTIN(__builtin_constant_p)
  #define ecb_is_constant(expr)          __builtin_constant_p (expr)
#else
  /* possible C11 impl for integral types
  typedef struct ecb_is_constant_struct ecb_is_constant_struct;
  #define ecb_is_constant(expr)          _Generic ((1 ? (struct ecb_is_constant_struct *)0 : (void *)((expr) - (expr)), ecb_is_constant_struct *: 0, default: 1)) */

  #define ecb_is_constant(expr)          0
#endif

#if ECB_GCC_VERSION(3,1) || ECB_CLANG_BUILTIN(__builtin_expect)
  #define ecb_expect(expr,value)         __builtin_expect ((expr),(value))
#else
  #define ecb_expect(expr,value)         (expr)
#endif

#if ECB_GCC_VERSION(3,1) || ECB_CLANG_BUILTIN(__builtin_prefetch)
  #define ecb_prefetch(addr,rw,locality) __builtin_prefetch (addr, rw, locality)
#else
  #define ecb_prefetch(addr,rw,locality)
#endif

/* no emulation for ecb_decltype */
#if ECB_CPP11
  // older implementations might have problems with decltype(x)::type, work around it
  template<class T> struct ecb_decltype_t { typedef T type; };
  #define ecb_decltype(x) ecb_decltype_t<decltype (x)>::type
#elif ECB_GCC_VERSION(3,0) || ECB_CLANG_VERSION(2,8)
  #define ecb_decltype(x) __typeof__ (x)
#endif

#if _MSC_VER >= 1300
  #define ecb_deprecated __declspec (deprecated)
#else
  #define ecb_deprecated ecb_attribute ((__deprecated__))
#endif

#if _MSC_VER >= 1500
  #define ecb_deprecated_message(msg) __declspec (deprecated (msg))
#elif ECB_GCC_VERSION(4,5)
  #define ecb_deprecated_message(msg) ecb_attribute ((__deprecated__ (msg))
#else
  #define ecb_deprecated_message(msg) ecb_deprecated
#endif

#if _MSC_VER >= 1400
  #define ecb_noinline __declspec (noinline)
#else
  #define ecb_noinline ecb_attribute ((__noinline__))
#endif

#define ecb_unused     ecb_attribute ((__unused__))
#define ecb_const      ecb_attribute ((__const__))
#define ecb_pure       ecb_attribute ((__pure__))

#if ECB_C11 || __IBMC_NORETURN
  /* http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.0/com.ibm.xlcpp131.aix.doc/language_ref/noreturn.html */
  #define ecb_noreturn   _Noreturn
#elif ECB_CPP11
  #define ecb_noreturn   [[noreturn]]
#elif _MSC_VER >= 1200
  /* http://msdn.microsoft.com/en-us/library/k6ktzx3s.aspx */
  #define ecb_noreturn   __declspec (noreturn)
#else
  #define ecb_noreturn   ecb_attribute ((__noreturn__))
#endif

#if ECB_GCC_VERSION(4,3)
  #define ecb_artificial ecb_attribute ((__artificial__))
  #define ecb_hot        ecb_attribute ((__hot__))
  #define ecb_cold       ecb_attribute ((__cold__))
#else
  #define ecb_artificial
  #define ecb_hot
  #define ecb_cold
#endif

/* put around conditional expressions if you are very sure that the  */
/* expression is mostly true or mostly false. note that these return */
/* booleans, not the expression.                                     */
#define ecb_expect_false(expr) ecb_expect (!!(expr), 0)
#define ecb_expect_true(expr)  ecb_expect (!!(expr), 1)
/* for compatibility to the rest of the world */
#define ecb_likely(expr)   ecb_expect_true  (expr)
#define ecb_unlikely(expr) ecb_expect_false (expr)

/* count trailing zero bits and count # of one bits */
#if ECB_GCC_VERSION(3,4) \
    || (ECB_CLANG_BUILTIN(__builtin_clz) && ECB_CLANG_BUILTIN(__builtin_clzll) \
        && ECB_CLANG_BUILTIN(__builtin_ctz) && ECB_CLANG_BUILTIN(__builtin_ctzll) \
        && ECB_CLANG_BUILTIN(__builtin_popcount))
  /* we assume int == 32 bit, long == 32 or 64 bit and long long == 64 bit */
  #define ecb_ld32(x)      (__builtin_clz      (x) ^ 31)
  #define ecb_ld64(x)      (__builtin_clzll    (x) ^ 63)
  #define ecb_ctz32(x)      __builtin_ctz      (x)
  #define ecb_ctz64(x)      __builtin_ctzll    (x)
  #define ecb_popcount32(x) __builtin_popcount (x)
  /* no popcountll */
#else
  ecb_function_ ecb_const int ecb_ctz32 (uint32_t x);
  ecb_function_ ecb_const int
  ecb_ctz32 (uint32_t x)
  {
#if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM)
    unsigned long r;
    _BitScanForward (&r, x);
    return (int)r;
#else
    int r = 0;

    x &= ~x + 1; /* this isolates the lowest bit */

#if ECB_branchless_on_i386
    r += !!(x & 0xaaaaaaaa) << 0;
    r += !!(x & 0xcccccccc) << 1;
    r += !!(x & 0xf0f0f0f0) << 2;
    r += !!(x & 0xff00ff00) << 3;
    r += !!(x & 0xffff0000) << 4;
#else
    if (x & 0xaaaaaaaa) r +=  1;
    if (x & 0xcccccccc) r +=  2;
    if (x & 0xf0f0f0f0) r +=  4;
    if (x & 0xff00ff00) r +=  8;
    if (x & 0xffff0000) r += 16;
#endif

    return r;
#endif
  }

  ecb_function_ ecb_const int ecb_ctz64 (uint64_t x);
  ecb_function_ ecb_const int
  ecb_ctz64 (uint64_t x)
  {
#if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM)
    unsigned long r;
    _BitScanForward64 (&r, x);
    return (int)r;
#else
    int shift = x & 0xffffffff ? 0 : 32;
    return ecb_ctz32 (x >> shift) + shift;
#endif
  }

  ecb_function_ ecb_const int ecb_popcount32 (uint32_t x);
  ecb_function_ ecb_const int
  ecb_popcount32 (uint32_t x)
  {
    x -=  (x >> 1) & 0x55555555;
    x  = ((x >> 2) & 0x33333333) + (x & 0x33333333);
    x  = ((x >> 4) + x) & 0x0f0f0f0f;
    x *= 0x01010101;

    return x >> 24;
  }

  ecb_function_ ecb_const int ecb_ld32 (uint32_t x);
  ecb_function_ ecb_const int ecb_ld32 (uint32_t x)
  {
#if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM)
    unsigned long r;
    _BitScanReverse (&r, x);
    return (int)r;
#else
    int r = 0;

    if (x >> 16) { x >>= 16; r += 16; }
    if (x >>  8) { x >>=  8; r +=  8; }
    if (x >>  4) { x >>=  4; r +=  4; }
    if (x >>  2) { x >>=  2; r +=  2; }
    if (x >>  1) {           r +=  1; }

    return r;
#endif
  }

  ecb_function_ ecb_const int ecb_ld64 (uint64_t x);
  ecb_function_ ecb_const int ecb_ld64 (uint64_t x)
  {
#if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM)
    unsigned long r;
    _BitScanReverse64 (&r, x);
    return (int)r;
#else
    int r = 0;

    if (x >> 32) { x >>= 32; r += 32; }

    return r + ecb_ld32 (x);
#endif
  }
#endif

ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x);
ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x) { return !(x & (x - 1)); }
ecb_function_ ecb_const ecb_bool ecb_is_pot64 (uint64_t x);
ecb_function_ ecb_const ecb_bool ecb_is_pot64 (uint64_t x) { return !(x & (x - 1)); }

ecb_function_ ecb_const uint8_t  ecb_bitrev8  (uint8_t  x);
ecb_function_ ecb_const uint8_t  ecb_bitrev8  (uint8_t  x)
{
  return (  (x * 0x0802U & 0x22110U)
          | (x * 0x8020U & 0x88440U)) * 0x10101U >> 16;
}

ecb_function_ ecb_const uint16_t ecb_bitrev16 (uint16_t x);
ecb_function_ ecb_const uint16_t ecb_bitrev16 (uint16_t x)
{
  x = ((x >>  1) &     0x5555) | ((x &     0x5555) <<  1);
  x = ((x >>  2) &     0x3333) | ((x &     0x3333) <<  2);
  x = ((x >>  4) &     0x0f0f) | ((x &     0x0f0f) <<  4);
  x = ( x >>  8              ) | ( x               <<  8);

  return x;
}

ecb_function_ ecb_const uint32_t ecb_bitrev32 (uint32_t x);
ecb_function_ ecb_const uint32_t ecb_bitrev32 (uint32_t x)
{
  x = ((x >>  1) & 0x55555555) | ((x & 0x55555555) <<  1);
  x = ((x >>  2) & 0x33333333) | ((x & 0x33333333) <<  2);
  x = ((x >>  4) & 0x0f0f0f0f) | ((x & 0x0f0f0f0f) <<  4);
  x = ((x >>  8) & 0x00ff00ff) | ((x & 0x00ff00ff) <<  8);
  x = ( x >> 16              ) | ( x               << 16);

  return x;
}

/* popcount64 is only available on 64 bit cpus as gcc builtin */
/* so for this version we are lazy */
ecb_function_ ecb_const int ecb_popcount64 (uint64_t x);
ecb_function_ ecb_const int
ecb_popcount64 (uint64_t x)
{
  return ecb_popcount32 (x) + ecb_popcount32 (x >> 32);
}

ecb_inline ecb_const uint8_t  ecb_rotl8  (uint8_t  x, unsigned int count);
ecb_inline ecb_const uint8_t  ecb_rotr8  (uint8_t  x, unsigned int count);
ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count);
ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count);
ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count);
ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count);
ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count);
ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count);

ecb_inline ecb_const uint8_t  ecb_rotl8  (uint8_t  x, unsigned int count) { return (x >> ( 8 - count)) | (x << count); }
ecb_inline ecb_const uint8_t  ecb_rotr8  (uint8_t  x, unsigned int count) { return (x << ( 8 - count)) | (x >> count); }
ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (16 - count)) | (x << count); }
ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (16 - count)) | (x >> count); }
ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (32 - count)) | (x << count); }
ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (32 - count)) | (x >> count); }
ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (64 - count)) | (x << count); }
ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (64 - count)) | (x >> count); }

#if ECB_GCC_VERSION(4,3) || (ECB_CLANG_BUILTIN(__builtin_bswap32) && ECB_CLANG_BUILTIN(__builtin_bswap64))
  #if ECB_GCC_VERSION(4,8) || ECB_CLANG_BUILTIN(__builtin_bswap16)
  #define ecb_bswap16(x)  __builtin_bswap16 (x)
  #else
  #define ecb_bswap16(x) (__builtin_bswap32 (x) >> 16)
  #endif
  #define ecb_bswap32(x)  __builtin_bswap32 (x)
  #define ecb_bswap64(x)  __builtin_bswap64 (x)
#elif _MSC_VER
  #include <stdlib.h>
  #define ecb_bswap16(x) ((uint16_t)_byteswap_ushort ((uint16_t)(x)))
  #define ecb_bswap32(x) ((uint32_t)_byteswap_ulong  ((uint32_t)(x)))
  #define ecb_bswap64(x) ((uint64_t)_byteswap_uint64 ((uint64_t)(x)))
#else
  ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x);
  ecb_function_ ecb_const uint16_t
  ecb_bswap16 (uint16_t x)
  {
    return ecb_rotl16 (x, 8);
  }

  ecb_function_ ecb_const uint32_t ecb_bswap32 (uint32_t x);
  ecb_function_ ecb_const uint32_t
  ecb_bswap32 (uint32_t x)
  {
    return (((uint32_t)ecb_bswap16 (x)) << 16) | ecb_bswap16 (x >> 16);
  }

  ecb_function_ ecb_const uint64_t ecb_bswap64 (uint64_t x);
  ecb_function_ ecb_const uint64_t
  ecb_bswap64 (uint64_t x)
  {
    return (((uint64_t)ecb_bswap32 (x)) << 32) | ecb_bswap32 (x >> 32);
  }
#endif

#if ECB_GCC_VERSION(4,5) || ECB_CLANG_BUILTIN(__builtin_unreachable)
  #define ecb_unreachable() __builtin_unreachable ()
#else
  /* this seems to work fine, but gcc always emits a warning for it :/ */
  ecb_inline ecb_noreturn void ecb_unreachable (void);
  ecb_inline ecb_noreturn void ecb_unreachable (void) { }
#endif

/* try to tell the compiler that some condition is definitely true */
#define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0

ecb_inline ecb_const uint32_t ecb_byteorder_helper (void);
ecb_inline ecb_const uint32_t
ecb_byteorder_helper (void)
{
  /* the union code still generates code under pressure in gcc, */
  /* but less than using pointers, and always seems to */
  /* successfully return a constant. */
  /* the reason why we have this horrible preprocessor mess */
  /* is to avoid it in all cases, at least on common architectures */
  /* or when using a recent enough gcc version (>= 4.6) */
#if (defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
    || ((__i386 || __i386__ || _M_IX86 || ECB_GCC_AMD64 || ECB_MSVC_AMD64) && !__VOS__)
  #define ECB_LITTLE_ENDIAN 1
  return 0x44332211;
#elif (defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \
      || ((__AARCH64EB__ || __MIPSEB__ || __ARMEB__) && !__VOS__)
  #define ECB_BIG_ENDIAN 1
  return 0x11223344;
#else
  union
  {
    uint8_t c[4];
    uint32_t u;
  } u = { 0x11, 0x22, 0x33, 0x44 };
  return u.u;
#endif
}

ecb_inline ecb_const ecb_bool ecb_big_endian    (void);
ecb_inline ecb_const ecb_bool ecb_big_endian    (void) { return ecb_byteorder_helper () == 0x11223344; }
ecb_inline ecb_const ecb_bool ecb_little_endian (void);
ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; }

#if ECB_GCC_VERSION(3,0) || ECB_C99
  #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0))
#else
  #define ecb_mod(m,n) ((m) < 0 ? ((n) - 1 - ((-1 - (m)) % (n))) : ((m) % (n)))
#endif

#if ECB_CPP
  template<typename T>
  static inline T ecb_div_rd (T val, T div)
  {
    return val < 0 ? - ((-val + div - 1) / div) : (val          ) / div;
  }
  template<typename T>
  static inline T ecb_div_ru (T val, T div)
  {
    return val < 0 ? - ((-val          ) / div) : (val + div - 1) / div;
  }
#else
  #define ecb_div_rd(val,div) ((val) < 0 ? - ((-(val) + (div) - 1) / (div)) : ((val)            ) / (div))
  #define ecb_div_ru(val,div) ((val) < 0 ? - ((-(val)            ) / (div)) : ((val) + (div) - 1) / (div))
#endif

#if ecb_cplusplus_does_not_suck
  /* does not work for local types (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2657.htm) */
  template<typename T, int N>
  static inline int ecb_array_length (const T (&arr)[N])
  {
    return N;
  }
#else
  #define ecb_array_length(name) (sizeof (name) / sizeof (name [0]))
#endif

ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x);
ecb_function_ ecb_const uint32_t
ecb_binary16_to_binary32 (uint32_t x)
{
  unsigned int s = (x & 0x8000) << (31 - 15);
  int          e = (x >> 10) & 0x001f;
  unsigned int m =  x        & 0x03ff;

  if (ecb_expect_false (e == 31))
    /* infinity or NaN */
    e = 255 - (127 - 15);
  else if (ecb_expect_false (!e))
    {
      if (ecb_expect_true (!m))
        /* zero, handled by code below by forcing e to 0 */
        e = 0 - (127 - 15);
      else
        {
          /* subnormal, renormalise */
          unsigned int s = 10 - ecb_ld32 (m);

          m = (m << s) & 0x3ff; /* mask implicit bit */
          e -= s - 1;
        }
    }

  /* e and m now are normalised, or zero, (or inf or nan) */
  e += 127 - 15;

  return s | (e << 23) | (m << (23 - 10));
}

ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x);
ecb_function_ ecb_const uint16_t
ecb_binary32_to_binary16 (uint32_t x)
{
  unsigned int s =  (x >> 16) & 0x00008000; /* sign bit, the easy part */
  unsigned int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */
  unsigned int m =   x        & 0x007fffff;

  x &= 0x7fffffff;

  /* if it's within range of binary16 normals, use fast path */
  if (ecb_expect_true (0x38800000 <= x && x <= 0x477fefff))
    {
      /* mantissa round-to-even */
      m += 0x00000fff + ((m >> (23 - 10)) & 1);

      /* handle overflow */
      if (ecb_expect_false (m >= 0x00800000))
        {
          m >>= 1;
          e +=  1;
        }

      return s | (e << 10) | (m >> (23 - 10));
    }

  /* handle large numbers and infinity */
  if (ecb_expect_true (0x477fefff < x && x <= 0x7f800000))
    return s | 0x7c00;

  /* handle zero, subnormals and small numbers */
  if (ecb_expect_true (x < 0x38800000))
    {
      /* zero */
      if (ecb_expect_true (!x))
        return s;

      /* handle subnormals */

      /* too small, will be zero */
      if (e < (14 - 24)) /* might not be sharp, but is good enough */
        return s;

      m |= 0x00800000; /* make implicit bit explicit */

      /* very tricky - we need to round to the nearest e (+10) bit value */
      {
        unsigned int bits = 14 - e;
        unsigned int half = (1 << (bits - 1)) - 1;
        unsigned int even = (m >> bits) & 1;

        /* if this overflows, we will end up with a normalised number */
        m = (m + half + even) >> bits;
      }

      return s | m;
    }

  /* handle NaNs, preserve leftmost nan bits, but make sure we don't turn them into infinities */
  m >>= 13;

  return s | 0x7c00 | m | !m;
}

/*******************************************************************************/
/* floating point stuff, can be disabled by defining ECB_NO_LIBM */

/* basically, everything uses "ieee pure-endian" floating point numbers */
/* the only noteworthy exception is ancient armle, which uses order 43218765 */
#if 0 \
    || __i386 || __i386__ \
    || ECB_GCC_AMD64 \
    || __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__ \
    || defined __s390__ || defined __s390x__ \
    || defined __mips__ \
    || defined __alpha__ \
    || defined __hppa__ \
    || defined __ia64__ \
    || defined __m68k__ \
    || defined __m88k__ \
    || defined __sh__ \
    || defined _M_IX86 || defined ECB_MSVC_AMD64 || defined _M_IA64 \
    || (defined __arm__ && (defined __ARM_EABI__ || defined __EABI__ || defined __VFP_FP__ || defined _WIN32_WCE || defined __ANDROID__)) \
    || defined __aarch64__
  #define ECB_STDFP 1
  #include <string.h> /* for memcpy */
#else
  #define ECB_STDFP 0
#endif

#ifndef ECB_NO_LIBM

  #include <math.h> /* for frexp*, ldexp*, INFINITY, NAN */

  /* only the oldest of old doesn't have this one. solaris. */
  #ifdef INFINITY
    #define ECB_INFINITY INFINITY
  #else
    #define ECB_INFINITY HUGE_VAL
  #endif

  #ifdef NAN
    #define ECB_NAN NAN
  #else
    #define ECB_NAN ECB_INFINITY
  #endif

  #if ECB_C99 || _XOPEN_VERSION >= 600 || _POSIX_VERSION >= 200112L
    #define ecb_ldexpf(x,e) ldexpf ((x), (e))
    #define ecb_frexpf(x,e) frexpf ((x), (e))
  #else
    #define ecb_ldexpf(x,e) (float) ldexp ((double) (x), (e))
    #define ecb_frexpf(x,e) (float) frexp ((double) (x), (e))
  #endif

  /* convert a float to ieee single/binary32 */
  ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x);
  ecb_function_ ecb_const uint32_t
  ecb_float_to_binary32 (float x)
  {
    uint32_t r;

    #if ECB_STDFP
      memcpy (&r, &x, 4);
    #else
      /* slow emulation, works for anything but -0 */
      uint32_t m;
      int e;

      if (x == 0e0f                    ) return 0x00000000U;
      if (x > +3.40282346638528860e+38f) return 0x7f800000U;
      if (x < -3.40282346638528860e+38f) return 0xff800000U;
      if (x != x                       ) return 0x7fbfffffU;

      m = ecb_frexpf (x, &e) * 0x1000000U;

      r = m & 0x80000000U;

      if (r)
        m = -m;

      if (e <= -126)
        {
          m &= 0xffffffU;
          m >>= (-125 - e);
          e = -126;
        }

      r |= (e + 126) << 23;
      r |= m & 0x7fffffU;
    #endif

    return r;
  }

  /* converts an ieee single/binary32 to a float */
  ecb_function_ ecb_const float ecb_binary32_to_float (uint32_t x);
  ecb_function_ ecb_const float
  ecb_binary32_to_float (uint32_t x)
  {
    float r;

    #if ECB_STDFP
      memcpy (&r, &x, 4);
    #else
      /* emulation, only works for normals and subnormals and +0 */
      int neg = x >> 31;
      int e = (x >> 23) & 0xffU;

      x &= 0x7fffffU;

      if (e)
        x |= 0x800000U;
      else
        e = 1;

      /* we distrust ldexpf a bit and do the 2**-24 scaling by an extra multiply */
      r = ecb_ldexpf (x * (0.5f / 0x800000U), e - 126);

      r = neg ? -r : r;
    #endif

    return r;
  }

  /* convert a double to ieee double/binary64 */
  ecb_function_ ecb_const uint64_t ecb_double_to_binary64 (double x);
  ecb_function_ ecb_const uint64_t
  ecb_double_to_binary64 (double x)
  {
    uint64_t r;

    #if ECB_STDFP
      memcpy (&r, &x, 8);
    #else
      /* slow emulation, works for anything but -0 */
      uint64_t m;
      int e;

      if (x == 0e0                     ) return 0x0000000000000000U;
      if (x > +1.79769313486231470e+308) return 0x7ff0000000000000U;
      if (x < -1.79769313486231470e+308) return 0xfff0000000000000U;
      if (x != x                       ) return 0X7ff7ffffffffffffU;

      m = frexp (x, &e) * 0x20000000000000U;

      r = m & 0x8000000000000000;;

      if (r)
        m = -m;

      if (e <= -1022)
        {
          m &= 0x1fffffffffffffU;
          m >>= (-1021 - e);
          e = -1022;
        }

      r |= ((uint64_t)(e + 1022)) << 52;
      r |= m & 0xfffffffffffffU;
    #endif

    return r;
  }

  /* converts an ieee double/binary64 to a double */
  ecb_function_ ecb_const double ecb_binary64_to_double (uint64_t x);
  ecb_function_ ecb_const double
  ecb_binary64_to_double (uint64_t x)
  {
    double r;

    #if ECB_STDFP
      memcpy (&r, &x, 8);
    #else
      /* emulation, only works for normals and subnormals and +0 */
      int neg = x >> 63;
      int e = (x >> 52) & 0x7ffU;

      x &= 0xfffffffffffffU;

      if (e)
        x |= 0x10000000000000U;
      else
        e = 1;

      /* we distrust ldexp a bit and do the 2**-53 scaling by an extra multiply */
      r = ldexp (x * (0.5 / 0x10000000000000U), e - 1022);

      r = neg ? -r : r;
    #endif

    return r;
  }

  /* convert a float to ieee half/binary16 */
  ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x);
  ecb_function_ ecb_const uint16_t
  ecb_float_to_binary16 (float x)
  {
    return ecb_binary32_to_binary16 (ecb_float_to_binary32 (x));
  }

  /* convert an ieee half/binary16 to float */
  ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x);
  ecb_function_ ecb_const float
  ecb_binary16_to_float (uint16_t x)
  {
    return ecb_binary32_to_float (ecb_binary16_to_binary32 (x));
  }

#endif

#endif

/* ECB.H END */

#if ECB_MEMORY_FENCE_NEEDS_PTHREADS
/* if your architecture doesn't need memory fences, e.g. because it is
 * single-cpu/core, or if you use libev in a project that doesn't use libev
 * from multiple threads, then you can define ECB_AVOID_PTHREADS when compiling
 * libev, in which cases the memory fences become nops.
 * alternatively, you can remove this #error and link against libpthread,
 * which will then provide the memory fences.
 */
# error "memory fences not defined for your architecture, please report"
#endif

#ifndef ECB_MEMORY_FENCE
# define ECB_MEMORY_FENCE do { } while (0)
# define ECB_MEMORY_FENCE_ACQUIRE ECB_MEMORY_FENCE
# define ECB_MEMORY_FENCE_RELEASE ECB_MEMORY_FENCE
#endif

#define expect_false(cond) ecb_expect_false (cond)
#define expect_true(cond)  ecb_expect_true  (cond)
#define noinline           ecb_noinline

#define inline_size        ecb_inline

#if EV_FEATURE_CODE
# define inline_speed      ecb_inline
#else
# define inline_speed      noinline static
#endif

#define NUMPRI (EV_MAXPRI - EV_MINPRI + 1)

#if EV_MINPRI == EV_MAXPRI
# define ABSPRI(w) (((W)w), 0)
#else
# define ABSPRI(w) (((W)w)->priority - EV_MINPRI)
#endif

#define EMPTY       /* required for microsofts broken pseudo-c compiler */
#define EMPTY2(a,b) /* used to suppress some warnings */

typedef ev_watcher *W;
typedef ev_watcher_list *WL;
typedef ev_watcher_time *WT;

#define ev_active(w) ((W)(w))->active
#define ev_at(w) ((WT)(w))->at

#if EV_USE_REALTIME
/* sig_atomic_t is used to avoid per-thread variables or locking but still */
/* giving it a reasonably high chance of working on typical architectures */
static EV_ATOMIC_T have_realtime; /* did clock_gettime (CLOCK_REALTIME) work? */
#endif

#if EV_USE_MONOTONIC
static EV_ATOMIC_T have_monotonic; /* did clock_gettime (CLOCK_MONOTONIC) work? */
#endif

#ifndef EV_FD_TO_WIN32_HANDLE
# define EV_FD_TO_WIN32_HANDLE(fd) _get_osfhandle (fd)
#endif
#ifndef EV_WIN32_HANDLE_TO_FD
# define EV_WIN32_HANDLE_TO_FD(handle) _open_osfhandle (handle, 0)
#endif
#ifndef EV_WIN32_CLOSE_FD
# define EV_WIN32_CLOSE_FD(fd) close (fd)
#endif

#ifdef _WIN32
# include "ev_win32.c"
#endif

/*****************************************************************************/

/* define a suitable floor function (only used by periodics atm) */

#if EV_USE_FLOOR
# include <math.h>
# define ev_floor(v) floor (v)
#else

#include <float.h>

/* a floor() replacement function, should be independent of ev_tstamp type */
noinline
static ev_tstamp
ev_floor (ev_tstamp v)
{
  /* the choice of shift factor is not terribly important */
#if FLT_RADIX != 2 /* assume FLT_RADIX == 10 */
  const ev_tstamp shift = sizeof (unsigned long) >= 8 ? 10000000000000000000. : 1000000000.;
#else
  const ev_tstamp shift = sizeof (unsigned long) >= 8 ? 18446744073709551616. : 4294967296.;
#endif

  /* argument too large for an unsigned long? */
  if (expect_false (v >= shift))
    {
      ev_tstamp f;

      if (v == v - 1.)
        return v; /* very large number */

      f = shift * ev_floor (v * (1. / shift));
      return f + ev_floor (v - f);
    }

  /* special treatment for negative args? */
  if (expect_false (v < 0.))
    {
      ev_tstamp f = -ev_floor (-v);

      return f - (f == v ? 0 : 1);
    }

  /* fits into an unsigned long */
  return (unsigned long)v;
}

#endif

/*****************************************************************************/

#ifdef __linux
# include <sys/utsname.h>
#endif

noinline ecb_cold
static unsigned int
ev_linux_version (void)
{
#ifdef __linux
  unsigned int v = 0;
  struct utsname buf;
  int i;
  char *p = buf.release;

  if (uname (&buf))
    return 0;

  for (i = 3+1; --i; )
    {
      unsigned int c = 0;

      for (;;)
        {
          if (*p >= '0' && *p <= '9')
            c = c * 10 + *p++ - '0';
          else
            {
              p += *p == '.';
              break;
            }
        }

      v = (v << 8) | c;
    }

  return v;
#else
  return 0;
#endif
}

/*****************************************************************************/

#if EV_AVOID_STDIO
noinline ecb_cold
static void
ev_printerr (const char *msg)
{
  write (STDERR_FILENO, msg, strlen (msg));
}
#endif

static void (*syserr_cb)(const char *msg) EV_THROW;

ecb_cold
void
ev_set_syserr_cb (void (*cb)(const char *msg) EV_THROW) EV_THROW
{
  syserr_cb = cb;
}

noinline ecb_cold
static void
ev_syserr (const char *msg)
{
  if (!msg)
    msg = "(libev) system error";

  if (syserr_cb)
    syserr_cb (msg);
  else
    {
#if EV_AVOID_STDIO
      ev_printerr (msg);
      ev_printerr (": ");
      ev_printerr (strerror (errno));
      ev_printerr ("\n");
#else
      perror (msg);
#endif
      abort ();
    }
}

static void *
ev_realloc_emul (void *ptr, long size) EV_THROW
{
  /* some systems, notably openbsd and darwin, fail to properly
   * implement realloc (x, 0) (as required by both ansi c-89 and
   * the single unix specification, so work around them here.
   * recently, also (at least) fedora and debian started breaking it,
   * despite documenting it otherwise.
   */

  if (size)
    return realloc (ptr, size);

  free (ptr);
  return 0;
}

static void *(*alloc)(void *ptr, long size) EV_THROW = ev_realloc_emul;

ecb_cold
void
ev_set_allocator (void *(*cb)(void *ptr, long size) EV_THROW) EV_THROW
{
  alloc = cb;
}

inline_speed void *
ev_realloc (void *ptr, long size)
{
  ptr = alloc (ptr, size);

  if (!ptr && size)
    {
#if EV_AVOID_STDIO
      ev_printerr ("(libev) memory allocation failed, aborting.\n");
#else
      fprintf (stderr, "(libev) cannot allocate %ld bytes, aborting.", size);
#endif
      abort ();
    }

  return ptr;
}

#define ev_malloc(size) ev_realloc (0, (size))
#define ev_free(ptr)    ev_realloc ((ptr), 0)

/*****************************************************************************/

/* set in reify when reification needed */
#define EV_ANFD_REIFY 1

/* file descriptor info structure */
typedef struct
{
  WL head;
  unsigned char events; /* the events watched for */
  unsigned char reify;  /* flag set when this ANFD needs reification (EV_ANFD_REIFY, EV__IOFDSET) */
  unsigned char emask;  /* the epoll backend stores the actual kernel mask in here */
  unsigned char unused;
#if EV_USE_EPOLL
  unsigned int egen;    /* generation counter to counter epoll bugs */
#endif
#if EV_SELECT_IS_WINSOCKET || EV_USE_IOCP
  SOCKET handle;
#endif
#if EV_USE_IOCP
  OVERLAPPED or, ow;
#endif
} ANFD;

/* stores the pending event set for a given watcher */
typedef struct
{
  W w;
  int events; /* the pending event set for the given watcher */
} ANPENDING;

#if EV_USE_INOTIFY
/* hash table entry per inotify-id */
typedef struct
{
  WL head;
} ANFS;
#endif

/* Heap Entry */
#if EV_HEAP_CACHE_AT
  /* a heap element */
  typedef struct {
    ev_tstamp at;
    WT w;
  } ANHE;

  #define ANHE_w(he)        (he).w     /* access watcher, read-write */
  #define ANHE_at(he)       (he).at    /* access cached at, read-only */
  #define ANHE_at_cache(he) (he).at = (he).w->at /* update at from watcher */
#else
  /* a heap element */
  typedef WT ANHE;

  #define ANHE_w(he)        (he)
  #define ANHE_at(he)       (he)->at
  #define ANHE_at_cache(he)
#endif

#if EV_MULTIPLICITY

  struct ev_loop
  {
    ev_tstamp ev_rt_now;
    #define ev_rt_now ((loop)->ev_rt_now)
    #define VAR(name,decl) decl;
      #include "ev_vars.h"
    #undef VAR
  };
  #include "ev_wrap.h"

  static struct ev_loop default_loop_struct;
  EV_API_DECL struct ev_loop *ev_default_loop_ptr = 0; /* needs to be initialised to make it a definition despite extern */

#else

  EV_API_DECL ev_tstamp ev_rt_now = 0; /* needs to be initialised to make it a definition despite extern */
  #define VAR(name,decl) static decl;
    #include "ev_vars.h"
  #undef VAR

  static int ev_default_loop_ptr;

#endif

#if EV_FEATURE_API
# define EV_RELEASE_CB if (expect_false (release_cb)) release_cb (EV_A)
# define EV_ACQUIRE_CB if (expect_false (acquire_cb)) acquire_cb (EV_A)
# define EV_INVOKE_PENDING invoke_cb (EV_A)
#else
# define EV_RELEASE_CB (void)0
# define EV_ACQUIRE_CB (void)0
# define EV_INVOKE_PENDING ev_invoke_pending (EV_A)
#endif

#define EVBREAK_RECURSE 0x80

/*****************************************************************************/

#ifndef EV_HAVE_EV_TIME
ev_tstamp
ev_time (void) EV_THROW
{
#if EV_USE_REALTIME
  if (expect_true (have_realtime))
    {
      struct timespec ts;
      clock_gettime (CLOCK_REALTIME, &ts);
      return ts.tv_sec + ts.tv_nsec * 1e-9;
    }
#endif

  struct timeval tv;
  gettimeofday (&tv, 0);
  return tv.tv_sec + tv.tv_usec * 1e-6;
}
#endif

inline_size ev_tstamp
get_clock (void)
{
#if EV_USE_MONOTONIC
  if (expect_true (have_monotonic))
    {
      struct timespec ts;
      clock_gettime (CLOCK_MONOTONIC, &ts);
      return ts.tv_sec + ts.tv_nsec * 1e-9;
    }
#endif

  return ev_time ();
}

#if EV_MULTIPLICITY
ev_tstamp
ev_now (EV_P) EV_THROW
{
  return ev_rt_now;
}
#endif

void
ev_sleep (ev_tstamp delay) EV_THROW
{
  if (delay > 0.)
    {
#if EV_USE_NANOSLEEP
      struct timespec ts;

      EV_TS_SET (ts, delay);
      nanosleep (&ts, 0);
#elif defined _WIN32
      Sleep ((unsigned long)(delay * 1e3));
#else
      struct timeval tv;

      /* here we rely on sys/time.h + sys/types.h + unistd.h providing select */
      /* something not guaranteed by newer posix versions, but guaranteed */
      /* by older ones */
      EV_TV_SET (tv, delay);
      select (0, 0, 0, 0, &tv);
#endif
    }
}

/*****************************************************************************/

#define MALLOC_ROUND 4096 /* prefer to allocate in chunks of this size, must be 2**n and >> 4 longs */

/* find a suitable new size for the given array, */
/* hopefully by rounding to a nice-to-malloc size */
inline_size int
array_nextsize (int elem, int cur, int cnt)
{
  int ncur = cur + 1;

  do
    ncur <<= 1;
  while (cnt > ncur);

  /* if size is large, round to MALLOC_ROUND - 4 * longs to accommodate malloc overhead */
  if (elem * ncur > MALLOC_ROUND - sizeof (void *) * 4)
    {
      ncur *= elem;
      ncur = (ncur + elem + (MALLOC_ROUND - 1) + sizeof (void *) * 4) & ~(MALLOC_ROUND - 1);
      ncur = ncur - sizeof (void *) * 4;
      ncur /= elem;
    }

  return ncur;
}

noinline ecb_cold
static void *
array_realloc (int elem, void *base, int *cur, int cnt)
{
  *cur = array_nextsize (elem, *cur, cnt);
  return ev_realloc (base, elem * *cur);
}

#define array_init_zero(base,count)	\
  memset ((void *)(base), 0, sizeof (*(base)) * (count))

#define array_needsize(type,base,cur,cnt,init)			\
  if (expect_false ((cnt) > (cur)))				\
    {								\
      ecb_unused int ocur_ = (cur);				\
      (base) = (type *)array_realloc				\
         (sizeof (type), (base), &(cur), (cnt));		\
      init ((base) + (ocur_), (cur) - ocur_);			\
    }

#if 0
#define array_slim(type,stem)					\
  if (stem ## max < array_roundsize (stem ## cnt >> 2))		\
    {								\
      stem ## max = array_roundsize (stem ## cnt >> 1);		\
      base = (type *)ev_realloc (base, sizeof (type) * (stem ## max));\
      fprintf (stderr, "slimmed down " # stem " to %d\n", stem ## max);/*D*/\
    }
#endif

#define array_free(stem, idx) \
  ev_free (stem ## s idx); stem ## cnt idx = stem ## max idx = 0; stem ## s idx = 0

/*****************************************************************************/

/* dummy callback for pending events */
noinline
static void
pendingcb (EV_P_ ev_prepare *w, int revents)
{
}

noinline
void
ev_feed_event (EV_P_ void *w, int revents) EV_THROW
{
  W w_ = (W)w;
  int pri = ABSPRI (w_);

  if (expect_false (w_->pending))
    pendings [pri][w_->pending - 1].events |= revents;
  else
    {
      w_->pending = ++pendingcnt [pri];
      array_needsize (ANPENDING, pendings [pri], pendingmax [pri], w_->pending, EMPTY2);
      pendings [pri][w_->pending - 1].w      = w_;
      pendings [pri][w_->pending - 1].events = revents;
    }

  pendingpri = NUMPRI - 1;
}

inline_speed void
feed_reverse (EV_P_ W w)
{
  array_needsize (W, rfeeds, rfeedmax, rfeedcnt + 1, EMPTY2);
  rfeeds [rfeedcnt++] = w;
}

inline_size void
feed_reverse_done (EV_P_ int revents)
{
  do
    ev_feed_event (EV_A_ rfeeds [--rfeedcnt], revents);
  while (rfeedcnt);
}

inline_speed void
queue_events (EV_P_ W *events, int eventcnt, int type)
{
  int i;

  for (i = 0; i < eventcnt; ++i)
    ev_feed_event (EV_A_ events [i], type);
}

/*****************************************************************************/

inline_speed void
fd_event_nocheck (EV_P_ int fd, int revents)
{
  ANFD *anfd = anfds + fd;
  ev_io *w;

  for (w = (ev_io *)anfd->head; w; w = (ev_io *)((WL)w)->next)
    {
      int ev = w->events & revents;

      if (ev)
        ev_feed_event (EV_A_ (W)w, ev);
    }
}

/* do not submit kernel events for fds that have reify set */
/* because that means they changed while we were polling for new events */
inline_speed void
fd_event (EV_P_ int fd, int revents)
{
  ANFD *anfd = anfds + fd;

  if (expect_true (!anfd->reify))
    fd_event_nocheck (EV_A_ fd, revents);
}

void
ev_feed_fd_event (EV_P_ int fd, int revents) EV_THROW
{
  if (fd >= 0 && fd < anfdmax)
    fd_event_nocheck (EV_A_ fd, revents);
}

/* make sure the external fd watch events are in-sync */
/* with the kernel/libev internal state */
inline_size void
fd_reify (EV_P)
{
  int i;

#if EV_SELECT_IS_WINSOCKET || EV_USE_IOCP
  for (i = 0; i < fdchangecnt; ++i)
    {
      int fd = fdchanges [i];
      ANFD *anfd = anfds + fd;

      if (anfd->reify & EV__IOFDSET && anfd->head)
        {
          SOCKET handle = EV_FD_TO_WIN32_HANDLE (fd);

          if (handle != anfd->handle)
            {
              unsigned long arg;

              assert (("libev: only socket fds supported in this configuration", ioctlsocket (handle, FIONREAD, &arg) == 0));

              /* handle changed, but fd didn't - we need to do it in two steps */
              backend_modify (EV_A_ fd, anfd->events, 0);
              anfd->events = 0;
              anfd->handle = handle;
            }
        }
    }
#endif

  for (i = 0; i < fdchangecnt; ++i)
    {
      int fd = fdchanges [i];
      ANFD *anfd = anfds + fd;
      ev_io *w;

      unsigned char o_events = anfd->events;
      unsigned char o_reify  = anfd->reify;

      anfd->reify  = 0;

      /*if (expect_true (o_reify & EV_ANFD_REIFY)) probably a deoptimisation */
        {
          anfd->events = 0;

          for (w = (ev_io *)anfd->head; w; w = (ev_io *)((WL)w)->next)
            anfd->events |= (unsigned char)w->events;

          if (o_events != anfd->events)
            o_reify = EV__IOFDSET; /* actually |= */
        }

      if (o_reify & EV__IOFDSET)
        backend_modify (EV_A_ fd, o_events, anfd->events);
    }

  fdchangecnt = 0;
}

/* something about the given fd changed */
inline_size
void
fd_change (EV_P_ int fd, int flags)
{
  unsigned char reify = anfds [fd].reify;
  anfds [fd].reify |= flags;

  if (expect_true (!reify))
    {
      ++fdchangecnt;
      array_needsize (int, fdchanges, fdchangemax, fdchangecnt, EMPTY2);
      fdchanges [fdchangecnt - 1] = fd;
    }
}

/* the given fd is invalid/unusable, so make sure it doesn't hurt us anymore */
inline_speed ecb_cold void
fd_kill (EV_P_ int fd)
{
  ev_io *w;

  while ((w = (ev_io *)anfds [fd].head))
    {
      ev_io_stop (EV_A_ w);
      ev_feed_event (EV_A_ (W)w, EV_ERROR | EV_READ | EV_WRITE);
    }
}

/* check whether the given fd is actually valid, for error recovery */
inline_size ecb_cold int
fd_valid (int fd)
{
#ifdef _WIN32
  return EV_FD_TO_WIN32_HANDLE (fd) != -1;
#else
  return fcntl (fd, F_GETFD) != -1;
#endif
}

/* called on EBADF to verify fds */
noinline ecb_cold
static void
fd_ebadf (EV_P)
{
  int fd;

  for (fd = 0; fd < anfdmax; ++fd)
    if (anfds [fd].events)
      if (!fd_valid (fd) && errno == EBADF)
        fd_kill (EV_A_ fd);
}

/* called on ENOMEM in select/poll to kill some fds and retry */
noinline ecb_cold
static void
fd_enomem (EV_P)
{
  int fd;

  for (fd = anfdmax; fd--; )
    if (anfds [fd].events)
      {
        fd_kill (EV_A_ fd);
        break;
      }
}

/* usually called after fork if backend needs to re-arm all fds from scratch */
noinline
static void
fd_rearm_all (EV_P)
{
  int fd;

  for (fd = 0; fd < anfdmax; ++fd)
    if (anfds [fd].events)
      {
        anfds [fd].events = 0;
        anfds [fd].emask  = 0;
        fd_change (EV_A_ fd, EV__IOFDSET | EV_ANFD_REIFY);
      }
}

/* used to prepare libev internal fd's */
/* this is not fork-safe */
inline_speed void
fd_intern (int fd)
{
#ifdef _WIN32
  unsigned long arg = 1;
  ioctlsocket (EV_FD_TO_WIN32_HANDLE (fd), FIONBIO, &arg);
#else
  fcntl (fd, F_SETFD, FD_CLOEXEC);
  fcntl (fd, F_SETFL, O_NONBLOCK);
#endif
}

/*****************************************************************************/

/*
 * the heap functions want a real array index. array index 0 is guaranteed to not
 * be in-use at any time. the first heap entry is at array [HEAP0]. DHEAP gives
 * the branching factor of the d-tree.
 */

/*
 * at the moment we allow libev the luxury of two heaps,
 * a small-code-size 2-heap one and a ~1.5kb larger 4-heap
 * which is more cache-efficient.
 * the difference is about 5% with 50000+ watchers.
 */
#if EV_USE_4HEAP

#define DHEAP 4
#define HEAP0 (DHEAP - 1) /* index of first element in heap */
#define HPARENT(k) ((((k) - HEAP0 - 1) / DHEAP) + HEAP0)
#define UPHEAP_DONE(p,k) ((p) == (k))

/* away from the root */
inline_speed void
downheap (ANHE *heap, int N, int k)
{
  ANHE he = heap [k];
  ANHE *E = heap + N + HEAP0;

  for (;;)
    {
      ev_tstamp minat;
      ANHE *minpos;
      ANHE *pos = heap + DHEAP * (k - HEAP0) + HEAP0 + 1;

      /* find minimum child */
      if (expect_true (pos + DHEAP - 1 < E))
        {
          /* fast path */                               (minpos = pos + 0), (minat = ANHE_at (*minpos));
          if (               ANHE_at (pos [1]) < minat) (minpos = pos + 1), (minat = ANHE_at (*minpos));
          if (               ANHE_at (pos [2]) < minat) (minpos = pos + 2), (minat = ANHE_at (*minpos));
          if (               ANHE_at (pos [3]) < minat) (minpos = pos + 3), (minat = ANHE_at (*minpos));
        }
      else if (pos < E)
        {
          /* slow path */                               (minpos = pos + 0), (minat = ANHE_at (*minpos));
          if (pos + 1 < E && ANHE_at (pos [1]) < minat) (minpos = pos + 1), (minat = ANHE_at (*minpos));
          if (pos + 2 < E && ANHE_at (pos [2]) < minat) (minpos = pos + 2), (minat = ANHE_at (*minpos));
          if (pos + 3 < E && ANHE_at (pos [3]) < minat) (minpos = pos + 3), (minat = ANHE_at (*minpos));
        }
      else
        break;

      if (ANHE_at (he) <= minat)
        break;

      heap [k] = *minpos;
      ev_active (ANHE_w (*minpos)) = k;

      k = minpos - heap;
    }

  heap [k] = he;
  ev_active (ANHE_w (he)) = k;
}

#else /* 4HEAP */

#define HEAP0 1
#define HPARENT(k) ((k) >> 1)
#define UPHEAP_DONE(p,k) (!(p))

/* away from the root */
inline_speed void
downheap (ANHE *heap, int N, int k)
{
  ANHE he = heap [k];

  for (;;)
    {
      int c = k << 1;

      if (c >= N + HEAP0)
        break;

      c += c + 1 < N + HEAP0 && ANHE_at (heap [c]) > ANHE_at (heap [c + 1])
           ? 1 : 0;

      if (ANHE_at (he) <= ANHE_at (heap [c]))
        break;

      heap [k] = heap [c];
      ev_active (ANHE_w (heap [k])) = k;
      
      k = c;
    }

  heap [k] = he;
  ev_active (ANHE_w (he)) = k;
}
#endif

/* towards the root */
inline_speed void
upheap (ANHE *heap, int k)
{
  ANHE he = heap [k];

  for (;;)
    {
      int p = HPARENT (k);

      if (UPHEAP_DONE (p, k) || ANHE_at (heap [p]) <= ANHE_at (he))
        break;

      heap [k] = heap [p];
      ev_active (ANHE_w (heap [k])) = k;
      k = p;
    }

  heap [k] = he;
  ev_active (ANHE_w (he)) = k;
}

/* move an element suitably so it is in a correct place */
inline_size void
adjustheap (ANHE *heap, int N, int k)
{
  if (k > HEAP0 && ANHE_at (heap [k]) <= ANHE_at (heap [HPARENT (k)]))
    upheap (heap, k);
  else
    downheap (heap, N, k);
}

/* rebuild the heap: this function is used only once and executed rarely */
inline_size void
reheap (ANHE *heap, int N)
{
  int i;

  /* we don't use floyds algorithm, upheap is simpler and is more cache-efficient */
  /* also, this is easy to implement and correct for both 2-heaps and 4-heaps */
  for (i = 0; i < N; ++i)
    upheap (heap, i + HEAP0);
}

/*****************************************************************************/

/* associate signal watchers to a signal signal */
typedef struct
{
  EV_ATOMIC_T pending;
#if EV_MULTIPLICITY
  EV_P;
#endif
  WL head;
} ANSIG;

static ANSIG signals [EV_NSIG - 1];

/*****************************************************************************/

#if EV_SIGNAL_ENABLE || EV_ASYNC_ENABLE

noinline ecb_cold
static void
evpipe_init (EV_P)
{
  if (!ev_is_active (&pipe_w))
    {
      int fds [2];

# if EV_USE_EVENTFD
      fds [0] = -1;
      fds [1] = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC);
      if (fds [1] < 0 && errno == EINVAL)
        fds [1] = eventfd (0, 0);

      if (fds [1] < 0)
# endif
        {
          while (pipe (fds))
            ev_syserr ("(libev) error creating signal/async pipe");

          fd_intern (fds [0]);
        }

      evpipe [0] = fds [0];

      if (evpipe [1] < 0)
        evpipe [1] = fds [1]; /* first call, set write fd */
      else
        {
          /* on subsequent calls, do not change evpipe [1] */
          /* so that evpipe_write can always rely on its value. */
          /* this branch does not do anything sensible on windows, */
          /* so must not be executed on windows */

          dup2 (fds [1], evpipe [1]);
          close (fds [1]);
        }

      fd_intern (evpipe [1]);

      ev_io_set (&pipe_w, evpipe [0] < 0 ? evpipe [1] : evpipe [0], EV_READ);
      ev_io_start (EV_A_ &pipe_w);
      ev_unref (EV_A); /* watcher should not keep loop alive */
    }
}

inline_speed void
evpipe_write (EV_P_ EV_ATOMIC_T *flag)
{
  ECB_MEMORY_FENCE; /* push out the write before this function was called, acquire flag */

  if (expect_true (*flag))
    return;

  *flag = 1;
  ECB_MEMORY_FENCE_RELEASE; /* make sure flag is visible before the wakeup */

  pipe_write_skipped = 1;

  ECB_MEMORY_FENCE; /* make sure pipe_write_skipped is visible before we check pipe_write_wanted */

  if (pipe_write_wanted)
    {
      int old_errno;

      pipe_write_skipped = 0;
      ECB_MEMORY_FENCE_RELEASE;

      old_errno = errno; /* save errno because write will clobber it */

#if EV_USE_EVENTFD
      if (evpipe [0] < 0)
        {
          uint64_t counter = 1;
          write (evpipe [1], &counter, sizeof (uint64_t));
        }
      else
#endif
        {
#ifdef _WIN32
          WSABUF buf;
          DWORD sent;
//	  buf.buf=&buf;
          buf.buf = (char *)&buf;
          buf.len = 1;
          WSASend (EV_FD_TO_WIN32_HANDLE (evpipe [1]), &buf, 1, &sent, 0, 0, 0);
#else
          write (evpipe [1], &(evpipe [1]), 1);
#endif
        }

      errno = old_errno;
    }
}

/* called whenever the libev signal pipe */
/* got some events (signal, async) */
static void
pipecb (EV_P_ ev_io *iow, int revents)
{
  int i;

  if (revents & EV_READ)
    {
#if EV_USE_EVENTFD
      if (evpipe [0] < 0)
        {
          uint64_t counter;
          read (evpipe [1], &counter, sizeof (uint64_t));
        }
      else
#endif
        {
          char dummy[4];
#ifdef _WIN32
          WSABUF buf;
          DWORD recvd;
          DWORD flags = 0;
          buf.buf = dummy;
          buf.len = sizeof (dummy);
          WSARecv (EV_FD_TO_WIN32_HANDLE (evpipe [0]), &buf, 1, &recvd, &flags, 0, 0);
#else
          read (evpipe [0], &dummy, sizeof (dummy));
#endif
        }
    }

  pipe_write_skipped = 0;

  ECB_MEMORY_FENCE; /* push out skipped, acquire flags */

#if EV_SIGNAL_ENABLE
  if (sig_pending)
    {
      sig_pending = 0;

      ECB_MEMORY_FENCE;

      for (i = EV_NSIG - 1; i--; )
        if (expect_false (signals [i].pending))
          ev_feed_signal_event (EV_A_ i + 1);
    }
#endif

#if EV_ASYNC_ENABLE
  if (async_pending)
    {
      async_pending = 0;

      ECB_MEMORY_FENCE;

      for (i = asynccnt; i--; )
        if (asyncs [i]->sent)
          {
            asyncs [i]->sent = 0;
            ECB_MEMORY_FENCE_RELEASE;
            ev_feed_event (EV_A_ asyncs [i], EV_ASYNC);
          }
    }
#endif
}

/*****************************************************************************/

void
ev_feed_signal (int signum) EV_THROW
{
#if EV_MULTIPLICITY
  EV_P;
  ECB_MEMORY_FENCE_ACQUIRE;
  EV_A = signals [signum - 1].loop;

  if (!EV_A)
    return;
#endif

  signals [signum - 1].pending = 1;
  evpipe_write (EV_A_ &sig_pending);
}

static void
ev_sighandler (int signum)
{
#ifdef _WIN32
  signal (signum, ev_sighandler);
#endif

  ev_feed_signal (signum);
}

noinline
void
ev_feed_signal_event (EV_P_ int signum) EV_THROW
{
  WL w;

  if (expect_false (signum <= 0 || signum >= EV_NSIG))
    return;

  --signum;

#if EV_MULTIPLICITY
  /* it is permissible to try to feed a signal to the wrong loop */
  /* or, likely more useful, feeding a signal nobody is waiting for */

  if (expect_false (signals [signum].loop != EV_A))
    return;
#endif

  signals [signum].pending = 0;
  ECB_MEMORY_FENCE_RELEASE;

  for (w = signals [signum].head; w; w = w->next)
    ev_feed_event (EV_A_ (W)w, EV_SIGNAL);
}

#if EV_USE_SIGNALFD
static void
sigfdcb (EV_P_ ev_io *iow, int revents)
{
  struct signalfd_siginfo si[2], *sip; /* these structs are big */

  for (;;)
    {
      ssize_t res = read (sigfd, si, sizeof (si));

      /* not ISO-C, as res might be -1, but works with SuS */
      for (sip = si; (char *)sip < (char *)si + res; ++sip)
        ev_feed_signal_event (EV_A_ sip->ssi_signo);

      if (res < (ssize_t)sizeof (si))
        break;
    }
}
#endif

#endif

/*****************************************************************************/

#if EV_CHILD_ENABLE
static WL childs [EV_PID_HASHSIZE];

static ev_signal childev;

#ifndef WIFCONTINUED
# define WIFCONTINUED(status) 0
#endif

/* handle a single child status event */
inline_speed void
child_reap (EV_P_ int chain, int pid, int status)
{
  ev_child *w;
  int traced = WIFSTOPPED (status) || WIFCONTINUED (status);

  for (w = (ev_child *)childs [chain & ((EV_PID_HASHSIZE) - 1)]; w; w = (ev_child *)((WL)w)->next)
    {
      if ((w->pid == pid || !w->pid)
          && (!traced || (w->flags & 1)))
        {
          ev_set_priority (w, EV_MAXPRI); /* need to do it *now*, this *must* be the same prio as the signal watcher itself */
          w->rpid    = pid;
          w->rstatus = status;
          ev_feed_event (EV_A_ (W)w, EV_CHILD);
        }
    }
}

#ifndef WCONTINUED
# define WCONTINUED 0
#endif

/* called on sigchld etc., calls waitpid */
static void
childcb (EV_P_ ev_signal *sw, int revents)
{
  int pid, status;

  /* some systems define WCONTINUED but then fail to support it (linux 2.4) */
  if (0 >= (pid = waitpid (-1, &status, WNOHANG | WUNTRACED | WCONTINUED)))
    if (!WCONTINUED
        || errno != EINVAL
        || 0 >= (pid = waitpid (-1, &status, WNOHANG | WUNTRACED)))
      return;

  /* make sure we are called again until all children have been reaped */
  /* we need to do it this way so that the callback gets called before we continue */
  ev_feed_event (EV_A_ (W)sw, EV_SIGNAL);

  child_reap (EV_A_ pid, pid, status);
  if ((EV_PID_HASHSIZE) > 1)
    child_reap (EV_A_ 0, pid, status); /* this might trigger a watcher twice, but feed_event catches that */
}

#endif

/*****************************************************************************/

#if EV_USE_IOCP
# include "ev_iocp.c"
#endif
#if EV_USE_PORT
# include "ev_port.c"
#endif
#if EV_USE_KQUEUE
# include "ev_kqueue.c"
#endif
#if EV_USE_EPOLL
# include "ev_epoll.c"
#endif
#if EV_USE_POLL
# include "ev_poll.c"
#endif
#if EV_USE_SELECT
# include "ev_select.c"
#endif

ecb_cold int
ev_version_major (void) EV_THROW
{
  return EV_VERSION_MAJOR;
}

ecb_cold int
ev_version_minor (void) EV_THROW
{
  return EV_VERSION_MINOR;
}

/* return true if we are running with elevated privileges and should ignore env variables */
inline_size ecb_cold int
enable_secure (void)
{
#ifdef _WIN32
  return 0;
#else
  return getuid () != geteuid ()
      || getgid () != getegid ();
#endif
}

ecb_cold
unsigned int
ev_supported_backends (void) EV_THROW
{
  unsigned int flags = 0;

  if (EV_USE_PORT  ) flags |= EVBACKEND_PORT;
  if (EV_USE_KQUEUE) flags |= EVBACKEND_KQUEUE;
  if (EV_USE_EPOLL ) flags |= EVBACKEND_EPOLL;
  if (EV_USE_POLL  ) flags |= EVBACKEND_POLL;
  if (EV_USE_SELECT) flags |= EVBACKEND_SELECT;
  
  return flags;
}

ecb_cold
unsigned int
ev_recommended_backends (void) EV_THROW
{
  unsigned int flags = ev_supported_backends ();

#ifndef __NetBSD__
  /* kqueue is borked on everything but netbsd apparently */
  /* it usually doesn't work correctly on anything but sockets and pipes */
  flags &= ~EVBACKEND_KQUEUE;
#endif
#ifdef __APPLE__
  /* only select works correctly on that "unix-certified" platform */
  flags &= ~EVBACKEND_KQUEUE; /* horribly broken, even for sockets */
  flags &= ~EVBACKEND_POLL;   /* poll is based on kqueue from 10.5 onwards */
#endif
#ifdef __FreeBSD__
  flags &= ~EVBACKEND_POLL;   /* poll return value is unusable (http://forums.freebsd.org/archive/index.php/t-10270.html) */
#endif

  return flags;
}

ecb_cold
unsigned int
ev_embeddable_backends (void) EV_THROW
{
  int flags = EVBACKEND_EPOLL | EVBACKEND_KQUEUE | EVBACKEND_PORT;

  /* epoll embeddability broken on all linux versions up to at least 2.6.23 */
  if (ev_linux_version () < 0x020620) /* disable it on linux < 2.6.32 */
    flags &= ~EVBACKEND_EPOLL;

  return flags;
}

unsigned int
ev_backend (EV_P) EV_THROW
{
  return backend;
}

#if EV_FEATURE_API
unsigned int
ev_iteration (EV_P) EV_THROW
{
  return loop_count;
}

unsigned int
ev_depth (EV_P) EV_THROW
{
  return loop_depth;
}

void
ev_set_io_collect_interval (EV_P_ ev_tstamp interval) EV_THROW
{
  io_blocktime = interval;
}

void
ev_set_timeout_collect_interval (EV_P_ ev_tstamp interval) EV_THROW
{
  timeout_blocktime = interval;
}

void
ev_set_userdata (EV_P_ void *data) EV_THROW
{
  userdata = data;
}

void *
ev_userdata (EV_P) EV_THROW
{
  return userdata;
}

void
ev_set_invoke_pending_cb (EV_P_ ev_loop_callback invoke_pending_cb) EV_THROW
{
  invoke_cb = invoke_pending_cb;
}

void
ev_set_loop_release_cb (EV_P_ void (*release)(EV_P) EV_THROW, void (*acquire)(EV_P) EV_THROW) EV_THROW
{
  release_cb = release;
  acquire_cb = acquire;
}
#endif

/* initialise a loop structure, must be zero-initialised */
noinline ecb_cold
static void
loop_init (EV_P_ unsigned int flags) EV_THROW
{
  if (!backend)
    {
      origflags = flags;

#if EV_USE_REALTIME
      if (!have_realtime)
        {
          struct timespec ts;

          if (!clock_gettime (CLOCK_REALTIME, &ts))
            have_realtime = 1;
        }
#endif

#if EV_USE_MONOTONIC
      if (!have_monotonic)
        {
          struct timespec ts;

          if (!clock_gettime (CLOCK_MONOTONIC, &ts))
            have_monotonic = 1;
        }
#endif

      /* pid check not overridable via env */
#ifndef _WIN32
      if (flags & EVFLAG_FORKCHECK)
        curpid = getpid ();
#endif

      if (!(flags & EVFLAG_NOENV)
          && !enable_secure ()
          && getenv ("LIBEV_FLAGS"))
        flags = atoi (getenv ("LIBEV_FLAGS"));

      ev_rt_now          = ev_time ();
      mn_now             = get_clock ();
      now_floor          = mn_now;
      rtmn_diff          = ev_rt_now - mn_now;
#if EV_FEATURE_API
      invoke_cb          = ev_invoke_pending;
#endif

      io_blocktime       = 0.;
      timeout_blocktime  = 0.;
      backend            = 0;
      backend_fd         = -1;
      sig_pending        = 0;
#if EV_ASYNC_ENABLE
      async_pending      = 0;
#endif
      pipe_write_skipped = 0;
      pipe_write_wanted  = 0;
      evpipe [0]         = -1;
      evpipe [1]         = -1;
#if EV_USE_INOTIFY
      fs_fd              = flags & EVFLAG_NOINOTIFY ? -1 : -2;
#endif
#if EV_USE_SIGNALFD
      sigfd              = flags & EVFLAG_SIGNALFD  ? -2 : -1;
#endif

      if (!(flags & EVBACKEND_MASK))
        flags |= ev_recommended_backends ();

#if EV_USE_IOCP
      if (!backend && (flags & EVBACKEND_IOCP  )) backend = iocp_init   (EV_A_ flags);
#endif
#if EV_USE_PORT
      if (!backend && (flags & EVBACKEND_PORT  )) backend = port_init   (EV_A_ flags);
#endif
#if EV_USE_KQUEUE
      if (!backend && (flags & EVBACKEND_KQUEUE)) backend = kqueue_init (EV_A_ flags);
#endif
#if EV_USE_EPOLL
      if (!backend && (flags & EVBACKEND_EPOLL )) backend = epoll_init  (EV_A_ flags);
#endif
#if EV_USE_POLL
      if (!backend && (flags & EVBACKEND_POLL  )) backend = poll_init   (EV_A_ flags);
#endif
#if EV_USE_SELECT
      if (!backend && (flags & EVBACKEND_SELECT)) backend = select_init (EV_A_ flags);
#endif

      ev_prepare_init (&pending_w, pendingcb);

#if EV_SIGNAL_ENABLE || EV_ASYNC_ENABLE
      ev_init (&pipe_w, pipecb);
      ev_set_priority (&pipe_w, EV_MAXPRI);
#endif
    }
}

/* free up a loop structure */
ecb_cold
void
ev_loop_destroy (EV_P)
{
  int i;

#if EV_MULTIPLICITY
  /* mimic free (0) */
  if (!EV_A)
    return;
#endif

#if EV_CLEANUP_ENABLE
  /* queue cleanup watchers (and execute them) */
  if (expect_false (cleanupcnt))
    {
      queue_events (EV_A_ (W *)cleanups, cleanupcnt, EV_CLEANUP);
      EV_INVOKE_PENDING;
    }
#endif

#if EV_CHILD_ENABLE
  if (ev_is_default_loop (EV_A) && ev_is_active (&childev))
    {
      ev_ref (EV_A); /* child watcher */
      ev_signal_stop (EV_A_ &childev);
    }
#endif

  if (ev_is_active (&pipe_w))
    {
      /*ev_ref (EV_A);*/
      /*ev_io_stop (EV_A_ &pipe_w);*/

      if (evpipe [0] >= 0) EV_WIN32_CLOSE_FD (evpipe [0]);
      if (evpipe [1] >= 0) EV_WIN32_CLOSE_FD (evpipe [1]);
    }

#if EV_USE_SIGNALFD
  if (ev_is_active (&sigfd_w))
    close (sigfd);
#endif

#if EV_USE_INOTIFY
  if (fs_fd >= 0)
    close (fs_fd);
#endif

  if (backend_fd >= 0)
    close (backend_fd);

#if EV_USE_IOCP
  if (backend == EVBACKEND_IOCP  ) iocp_destroy   (EV_A);
#endif
#if EV_USE_PORT
  if (backend == EVBACKEND_PORT  ) port_destroy   (EV_A);
#endif
#if EV_USE_KQUEUE
  if (backend == EVBACKEND_KQUEUE) kqueue_destroy (EV_A);
#endif
#if EV_USE_EPOLL
  if (backend == EVBACKEND_EPOLL ) epoll_destroy  (EV_A);
#endif
#if EV_USE_POLL
  if (backend == EVBACKEND_POLL  ) poll_destroy   (EV_A);
#endif
#if EV_USE_SELECT
  if (backend == EVBACKEND_SELECT) select_destroy (EV_A);
#endif

  for (i = NUMPRI; i--; )
    {
      array_free (pending, [i]);
#if EV_IDLE_ENABLE
      array_free (idle, [i]);
#endif
    }

  ev_free (anfds); anfds = 0; anfdmax = 0;

  /* have to use the microsoft-never-gets-it-right macro */
  array_free (rfeed, EMPTY);
  array_free (fdchange, EMPTY);
  array_free (timer, EMPTY);
#if EV_PERIODIC_ENABLE
  array_free (periodic, EMPTY);
#endif
#if EV_FORK_ENABLE
  array_free (fork, EMPTY);
#endif
#if EV_CLEANUP_ENABLE
  array_free (cleanup, EMPTY);
#endif
  array_free (prepare, EMPTY);
  array_free (check, EMPTY);
#if EV_ASYNC_ENABLE
  array_free (async, EMPTY);
#endif

  backend = 0;

#if EV_MULTIPLICITY
  if (ev_is_default_loop (EV_A))
#endif
    ev_default_loop_ptr = 0;
#if EV_MULTIPLICITY
  else
    ev_free (EV_A);
#endif
}

#if EV_USE_INOTIFY
inline_size void infy_fork (EV_P);
#endif

inline_size void
loop_fork (EV_P)
{
#if EV_USE_PORT
  if (backend == EVBACKEND_PORT  ) port_fork   (EV_A);
#endif
#if EV_USE_KQUEUE
  if (backend == EVBACKEND_KQUEUE) kqueue_fork (EV_A);
#endif
#if EV_USE_EPOLL
  if (backend == EVBACKEND_EPOLL ) epoll_fork  (EV_A);
#endif
#if EV_USE_INOTIFY
  infy_fork (EV_A);
#endif

#if EV_SIGNAL_ENABLE || EV_ASYNC_ENABLE
  if (ev_is_active (&pipe_w) && postfork != 2)
    {
      /* pipe_write_wanted must be false now, so modifying fd vars should be safe */

      ev_ref (EV_A);
      ev_io_stop (EV_A_ &pipe_w);

      if (evpipe [0] >= 0)
        EV_WIN32_CLOSE_FD (evpipe [0]);

      evpipe_init (EV_A);
      /* iterate over everything, in case we missed something before */
      ev_feed_event (EV_A_ &pipe_w, EV_CUSTOM);
    }
#endif

  postfork = 0;
}

#if EV_MULTIPLICITY

ecb_cold
struct ev_loop *
ev_loop_new (unsigned int flags) EV_THROW
{
  EV_P = (struct ev_loop *)ev_malloc (sizeof (struct ev_loop));

  memset (EV_A, 0, sizeof (struct ev_loop));
  loop_init (EV_A_ flags);

  if (ev_backend (EV_A))
    return EV_A;

  ev_free (EV_A);
  return 0;
}

#endif /* multiplicity */

#if EV_VERIFY
noinline ecb_cold
static void
verify_watcher (EV_P_ W w)
{
  assert (("libev: watcher has invalid priority", ABSPRI (w) >= 0 && ABSPRI (w) < NUMPRI));

  if (w->pending)
    assert (("libev: pending watcher not on pending queue", pendings [ABSPRI (w)][w->pending - 1].w == w));
}

noinline ecb_cold
static void
verify_heap (EV_P_ ANHE *heap, int N)
{
  int i;

  for (i = HEAP0; i < N + HEAP0; ++i)
    {
      assert (("libev: active index mismatch in heap", ev_active (ANHE_w (heap [i])) == i));
      assert (("libev: heap condition violated", i == HEAP0 || ANHE_at (heap [HPARENT (i)]) <= ANHE_at (heap [i])));
      assert (("libev: heap at cache mismatch", ANHE_at (heap [i]) == ev_at (ANHE_w (heap [i]))));

      verify_watcher (EV_A_ (W)ANHE_w (heap [i]));
    }
}

noinline ecb_cold
static void
array_verify (EV_P_ W *ws, int cnt)
{
  while (cnt--)
    {
      assert (("libev: active index mismatch", ev_active (ws [cnt]) == cnt + 1));
      verify_watcher (EV_A_ ws [cnt]);
    }
}
#endif

#if EV_FEATURE_API
void ecb_cold
ev_verify (EV_P) EV_THROW
{
#if EV_VERIFY
  int i;
  WL w, w2;

  assert (activecnt >= -1);

  assert (fdchangemax >= fdchangecnt);
  for (i = 0; i < fdchangecnt; ++i)
    assert (("libev: negative fd in fdchanges", fdchanges [i] >= 0));

  assert (anfdmax >= 0);
  for (i = 0; i < anfdmax; ++i)
    {
      int j = 0;

      for (w = w2 = anfds [i].head; w; w = w->next)
        {
          verify_watcher (EV_A_ (W)w);

          if (j++ & 1)
            {
              assert (("libev: io watcher list contains a loop", w != w2));
              w2 = w2->next;
            }

          assert (("libev: inactive fd watcher on anfd list", ev_active (w) == 1));
          assert (("libev: fd mismatch between watcher and anfd", ((ev_io *)w)->fd == i));
        }
    }

  assert (timermax >= timercnt);
  verify_heap (EV_A_ timers, timercnt);

#if EV_PERIODIC_ENABLE
  assert (periodicmax >= periodiccnt);
  verify_heap (EV_A_ periodics, periodiccnt);
#endif

  for (i = NUMPRI; i--; )
    {
      assert (pendingmax [i] >= pendingcnt [i]);
#if EV_IDLE_ENABLE
      assert (idleall >= 0);
      assert (idlemax [i] >= idlecnt [i]);
      array_verify (EV_A_ (W *)idles [i], idlecnt [i]);
#endif
    }

#if EV_FORK_ENABLE
  assert (forkmax >= forkcnt);
  array_verify (EV_A_ (W *)forks, forkcnt);
#endif

#if EV_CLEANUP_ENABLE
  assert (cleanupmax >= cleanupcnt);
  array_verify (EV_A_ (W *)cleanups, cleanupcnt);
#endif

#if EV_ASYNC_ENABLE
  assert (asyncmax >= asynccnt);
  array_verify (EV_A_ (W *)asyncs, asynccnt);
#endif

#if EV_PREPARE_ENABLE
  assert (preparemax >= preparecnt);
  array_verify (EV_A_ (W *)prepares, preparecnt);
#endif

#if EV_CHECK_ENABLE
  assert (checkmax >= checkcnt);
  array_verify (EV_A_ (W *)checks, checkcnt);
#endif

# if 0
#if EV_CHILD_ENABLE
  for (w = (ev_child *)childs [chain & ((EV_PID_HASHSIZE) - 1)]; w; w = (ev_child *)((WL)w)->next)
  for (signum = EV_NSIG; signum--; ) if (signals [signum].pending)
#endif
# endif
#endif
}
#endif

#if EV_MULTIPLICITY
ecb_cold
struct ev_loop *
#else
int
#endif
ev_default_loop (unsigned int flags) EV_THROW
{
  if (!ev_default_loop_ptr)
    {
#if EV_MULTIPLICITY
      EV_P = ev_default_loop_ptr = &default_loop_struct;
#else
      ev_default_loop_ptr = 1;
#endif

      loop_init (EV_A_ flags);

      if (ev_backend (EV_A))
        {
#if EV_CHILD_ENABLE
          ev_signal_init (&childev, childcb, SIGCHLD);
          ev_set_priority (&childev, EV_MAXPRI);
          ev_signal_start (EV_A_ &childev);
          ev_unref (EV_A); /* child watcher should not keep loop alive */
#endif
        }
      else
        ev_default_loop_ptr = 0;
    }

  return ev_default_loop_ptr;
}

void
ev_loop_fork (EV_P) EV_THROW
{
  postfork = 1;
}

/*****************************************************************************/

void
ev_invoke (EV_P_ void *w, int revents)
{
  EV_CB_INVOKE ((W)w, revents);
}

unsigned int
ev_pending_count (EV_P) EV_THROW
{
  int pri;
  unsigned int count = 0;

  for (pri = NUMPRI; pri--; )
    count += pendingcnt [pri];

  return count;
}

noinline
void
ev_invoke_pending (EV_P)
{
  pendingpri = NUMPRI;

  while (pendingpri) /* pendingpri possibly gets modified in the inner loop */
    {
      --pendingpri;

      while (pendingcnt [pendingpri])
        {
          ANPENDING *p = pendings [pendingpri] + --pendingcnt [pendingpri];

          p->w->pending = 0;
          EV_CB_INVOKE (p->w, p->events);
          EV_FREQUENT_CHECK;
        }
    }
}

#if EV_IDLE_ENABLE
/* make idle watchers pending. this handles the "call-idle */
/* only when higher priorities are idle" logic */
inline_size void
idle_reify (EV_P)
{
  if (expect_false (idleall))
    {
      int pri;

      for (pri = NUMPRI; pri--; )
        {
          if (pendingcnt [pri])
            break;

          if (idlecnt [pri])
            {
              queue_events (EV_A_ (W *)idles [pri], idlecnt [pri], EV_IDLE);
              break;
            }
        }
    }
}
#endif

/* make timers pending */
inline_size void
timers_reify (EV_P)
{
  EV_FREQUENT_CHECK;

  if (timercnt && ANHE_at (timers [HEAP0]) < mn_now)
    {
      do
        {
          ev_timer *w = (ev_timer *)ANHE_w (timers [HEAP0]);

          /*assert (("libev: inactive timer on timer heap detected", ev_is_active (w)));*/

          /* first reschedule or stop timer */
          if (w->repeat)
            {
              ev_at (w) += w->repeat;
              if (ev_at (w) < mn_now)
                ev_at (w) = mn_now;

              assert (("libev: negative ev_timer repeat value found while processing timers", w->repeat > 0.));

              ANHE_at_cache (timers [HEAP0]);
              downheap (timers, timercnt, HEAP0);
            }
          else
            ev_timer_stop (EV_A_ w); /* nonrepeating: stop timer */

          EV_FREQUENT_CHECK;
          feed_reverse (EV_A_ (W)w);
        }
      while (timercnt && ANHE_at (timers [HEAP0]) < mn_now);

      feed_reverse_done (EV_A_ EV_TIMER);
    }
}

#if EV_PERIODIC_ENABLE

noinline
static void
periodic_recalc (EV_P_ ev_periodic *w)
{
  ev_tstamp interval = w->interval > MIN_INTERVAL ? w->interval : MIN_INTERVAL;
  ev_tstamp at = w->offset + interval * ev_floor ((ev_rt_now - w->offset) / interval);

  /* the above almost always errs on the low side */
  while (at <= ev_rt_now)
    {
      ev_tstamp nat = at + w->interval;

      /* when resolution fails us, we use ev_rt_now */
      if (expect_false (nat == at))
        {
          at = ev_rt_now;
          break;
        }

      at = nat;
    }

  ev_at (w) = at;
}

/* make periodics pending */
inline_size void
periodics_reify (EV_P)
{
  EV_FREQUENT_CHECK;

  while (periodiccnt && ANHE_at (periodics [HEAP0]) < ev_rt_now)
    {
      do
        {
          ev_periodic *w = (ev_periodic *)ANHE_w (periodics [HEAP0]);

          /*assert (("libev: inactive timer on periodic heap detected", ev_is_active (w)));*/

          /* first reschedule or stop timer */
          if (w->reschedule_cb)
            {
              ev_at (w) = w->reschedule_cb (w, ev_rt_now);

              assert (("libev: ev_periodic reschedule callback returned time in the past", ev_at (w) >= ev_rt_now));

              ANHE_at_cache (periodics [HEAP0]);
              downheap (periodics, periodiccnt, HEAP0);
            }
          else if (w->interval)
            {
              periodic_recalc (EV_A_ w);
              ANHE_at_cache (periodics [HEAP0]);
              downheap (periodics, periodiccnt, HEAP0);
            }
          else
            ev_periodic_stop (EV_A_ w); /* nonrepeating: stop timer */

          EV_FREQUENT_CHECK;
          feed_reverse (EV_A_ (W)w);
        }
      while (periodiccnt && ANHE_at (periodics [HEAP0]) < ev_rt_now);

      feed_reverse_done (EV_A_ EV_PERIODIC);
    }
}

/* simply recalculate all periodics */
/* TODO: maybe ensure that at least one event happens when jumping forward? */
noinline ecb_cold
static void
periodics_reschedule (EV_P)
{
  int i;

  /* adjust periodics after time jump */
  for (i = HEAP0; i < periodiccnt + HEAP0; ++i)
    {
      ev_periodic *w = (ev_periodic *)ANHE_w (periodics [i]);

      if (w->reschedule_cb)
        ev_at (w) = w->reschedule_cb (w, ev_rt_now);
      else if (w->interval)
        periodic_recalc (EV_A_ w);

      ANHE_at_cache (periodics [i]);
    }

  reheap (periodics, periodiccnt);
}
#endif

/* adjust all timers by a given offset */
noinline ecb_cold
static void
timers_reschedule (EV_P_ ev_tstamp adjust)
{
  int i;

  for (i = 0; i < timercnt; ++i)
    {
      ANHE *he = timers + i + HEAP0;
      ANHE_w (*he)->at += adjust;
      ANHE_at_cache (*he);
    }
}

/* fetch new monotonic and realtime times from the kernel */
/* also detect if there was a timejump, and act accordingly */
inline_speed void
time_update (EV_P_ ev_tstamp max_block)
{
#if EV_USE_MONOTONIC
  if (expect_true (have_monotonic))
    {
      int i;
      ev_tstamp odiff = rtmn_diff;

      mn_now = get_clock ();

      /* only fetch the realtime clock every 0.5*MIN_TIMEJUMP seconds */
      /* interpolate in the meantime */
      if (expect_true (mn_now - now_floor < MIN_TIMEJUMP * .5))
        {
          ev_rt_now = rtmn_diff + mn_now;
          return;
        }

      now_floor = mn_now;
      ev_rt_now = ev_time ();

      /* loop a few times, before making important decisions.
       * on the choice of "4": one iteration isn't enough,
       * in case we get preempted during the calls to
       * ev_time and get_clock. a second call is almost guaranteed
       * to succeed in that case, though. and looping a few more times
       * doesn't hurt either as we only do this on time-jumps or
       * in the unlikely event of having been preempted here.
       */
      for (i = 4; --i; )
        {
          ev_tstamp diff;
          rtmn_diff = ev_rt_now - mn_now;

          diff = odiff - rtmn_diff;

          if (expect_true ((diff < 0. ? -diff : diff) < MIN_TIMEJUMP))
            return; /* all is well */

          ev_rt_now = ev_time ();
          mn_now    = get_clock ();
          now_floor = mn_now;
        }

      /* no timer adjustment, as the monotonic clock doesn't jump */
      /* timers_reschedule (EV_A_ rtmn_diff - odiff) */
# if EV_PERIODIC_ENABLE
      periodics_reschedule (EV_A);
# endif
    }
  else
#endif
    {
      ev_rt_now = ev_time ();

      if (expect_false (mn_now > ev_rt_now || ev_rt_now > mn_now + max_block + MIN_TIMEJUMP))
        {
          /* adjust timers. this is easy, as the offset is the same for all of them */
          timers_reschedule (EV_A_ ev_rt_now - mn_now);
#if EV_PERIODIC_ENABLE
          periodics_reschedule (EV_A);
#endif
        }

      mn_now = ev_rt_now;
    }
}

int
ev_run (EV_P_ int flags)
{
#if EV_FEATURE_API
  ++loop_depth;
#endif

  assert (("libev: ev_loop recursion during release detected", loop_done != EVBREAK_RECURSE));

  loop_done = EVBREAK_CANCEL;

  EV_INVOKE_PENDING; /* in case we recurse, ensure ordering stays nice and clean */

  do
    {
#if EV_VERIFY >= 2
      ev_verify (EV_A);
#endif

#ifndef _WIN32
      if (expect_false (curpid)) /* penalise the forking check even more */
        if (expect_false (getpid () != curpid))
          {
            curpid = getpid ();
            postfork = 1;
          }
#endif

#if EV_FORK_ENABLE
      /* we might have forked, so queue fork handlers */
      if (expect_false (postfork))
        if (forkcnt)
          {
            queue_events (EV_A_ (W *)forks, forkcnt, EV_FORK);
            EV_INVOKE_PENDING;
          }
#endif

#if EV_PREPARE_ENABLE
      /* queue prepare watchers (and execute them) */
      if (expect_false (preparecnt))
        {
          queue_events (EV_A_ (W *)prepares, preparecnt, EV_PREPARE);
          EV_INVOKE_PENDING;
        }
#endif

      if (expect_false (loop_done))
        break;

      /* we might have forked, so reify kernel state if necessary */
      if (expect_false (postfork))
        loop_fork (EV_A);

      /* update fd-related kernel structures */
      fd_reify (EV_A);

      /* calculate blocking time */
      {
        ev_tstamp waittime  = 0.;
        ev_tstamp sleeptime = 0.;

        /* remember old timestamp for io_blocktime calculation */
        ev_tstamp prev_mn_now = mn_now;

        /* update time to cancel out callback processing overhead */
        time_update (EV_A_ 1e100);

        /* from now on, we want a pipe-wake-up */
        pipe_write_wanted = 1;

        ECB_MEMORY_FENCE; /* make sure pipe_write_wanted is visible before we check for potential skips */

        if (expect_true (!(flags & EVRUN_NOWAIT || idleall || !activecnt || pipe_write_skipped)))
          {
            waittime = MAX_BLOCKTIME;

            if (timercnt)
              {
                ev_tstamp to = ANHE_at (timers [HEAP0]) - mn_now;
                if (waittime > to) waittime = to;
              }

#if EV_PERIODIC_ENABLE
            if (periodiccnt)
              {
                ev_tstamp to = ANHE_at (periodics [HEAP0]) - ev_rt_now;
                if (waittime > to) waittime = to;
              }
#endif

            /* don't let timeouts decrease the waittime below timeout_blocktime */
            if (expect_false (waittime < timeout_blocktime))
              waittime = timeout_blocktime;

            /* at this point, we NEED to wait, so we have to ensure */
            /* to pass a minimum nonzero value to the backend */
            if (expect_false (waittime < backend_mintime))
              waittime = backend_mintime;

            /* extra check because io_blocktime is commonly 0 */
            if (expect_false (io_blocktime))
              {
                sleeptime = io_blocktime - (mn_now - prev_mn_now);

                if (sleeptime > waittime - backend_mintime)
                  sleeptime = waittime - backend_mintime;

                if (expect_true (sleeptime > 0.))
                  {
                    ev_sleep (sleeptime);
                    waittime -= sleeptime;
                  }
              }
          }

#if EV_FEATURE_API
        ++loop_count;
#endif
        assert ((loop_done = EVBREAK_RECURSE, 1)); /* assert for side effect */
        backend_poll (EV_A_ waittime);
        assert ((loop_done = EVBREAK_CANCEL, 1)); /* assert for side effect */

        pipe_write_wanted = 0; /* just an optimisation, no fence needed */

        ECB_MEMORY_FENCE_ACQUIRE;
        if (pipe_write_skipped)
          {
            assert (("libev: pipe_w not active, but pipe not written", ev_is_active (&pipe_w)));
            ev_feed_event (EV_A_ &pipe_w, EV_CUSTOM);
          }


        /* update ev_rt_now, do magic */
        time_update (EV_A_ waittime + sleeptime);
      }

      /* queue pending timers and reschedule them */
      timers_reify (EV_A); /* relative timers called last */
#if EV_PERIODIC_ENABLE
      periodics_reify (EV_A); /* absolute timers called first */
#endif

#if EV_IDLE_ENABLE
      /* queue idle watchers unless other events are pending */
      idle_reify (EV_A);
#endif

#if EV_CHECK_ENABLE
      /* queue check watchers, to be executed first */
      if (expect_false (checkcnt))
        queue_events (EV_A_ (W *)checks, checkcnt, EV_CHECK);
#endif

      EV_INVOKE_PENDING;
    }
  while (expect_true (
    activecnt
    && !loop_done
    && !(flags & (EVRUN_ONCE | EVRUN_NOWAIT))
  ));

  if (loop_done == EVBREAK_ONE)
    loop_done = EVBREAK_CANCEL;

#if EV_FEATURE_API
  --loop_depth;
#endif

  return activecnt;
}

void
ev_break (EV_P_ int how) EV_THROW
{
  loop_done = how;
}

void
ev_ref (EV_P) EV_THROW
{
  ++activecnt;
}

void
ev_unref (EV_P) EV_THROW
{
  --activecnt;
}

void
ev_now_update (EV_P) EV_THROW
{
  time_update (EV_A_ 1e100);
}

void
ev_suspend (EV_P) EV_THROW
{
  ev_now_update (EV_A);
}

void
ev_resume (EV_P) EV_THROW
{
  ev_tstamp mn_prev = mn_now;

  ev_now_update (EV_A);
  timers_reschedule (EV_A_ mn_now - mn_prev);
#if EV_PERIODIC_ENABLE
  /* TODO: really do this? */
  periodics_reschedule (EV_A);
#endif
}

/*****************************************************************************/
/* singly-linked list management, used when the expected list length is short */

inline_size void
wlist_add (WL *head, WL elem)
{
  elem->next = *head;
  *head = elem;
}

inline_size void
wlist_del (WL *head, WL elem)
{
  while (*head)
    {
      if (expect_true (*head == elem))
        {
          *head = elem->next;
          break;
        }

      head = &(*head)->next;
    }
}

/* internal, faster, version of ev_clear_pending */
inline_speed void
clear_pending (EV_P_ W w)
{
  if (w->pending)
    {
      pendings [ABSPRI (w)][w->pending - 1].w = (W)&pending_w;
      w->pending = 0;
    }
}

int
ev_clear_pending (EV_P_ void *w) EV_THROW
{
  W w_ = (W)w;
  int pending = w_->pending;

  if (expect_true (pending))
    {
      ANPENDING *p = pendings [ABSPRI (w_)] + pending - 1;
      p->w = (W)&pending_w;
      w_->pending = 0;
      return p->events;
    }
  else
    return 0;
}

inline_size void
pri_adjust (EV_P_ W w)
{
  int pri = ev_priority (w);
  pri = pri < EV_MINPRI ? EV_MINPRI : pri;
  pri = pri > EV_MAXPRI ? EV_MAXPRI : pri;
  ev_set_priority (w, pri);
}

inline_speed void
ev_start (EV_P_ W w, int active)
{
  pri_adjust (EV_A_ w);
  w->active = active;
  ev_ref (EV_A);
}

inline_size void
ev_stop (EV_P_ W w)
{
  ev_unref (EV_A);
  w->active = 0;
}

/*****************************************************************************/

noinline
void
ev_io_start (EV_P_ ev_io *w) EV_THROW
{
  int fd = w->fd;

  if (expect_false (ev_is_active (w)))
    return;

  assert (("libev: ev_io_start called with negative fd", fd >= 0));
  assert (("libev: ev_io_start called with illegal event mask", !(w->events & ~(EV__IOFDSET | EV_READ | EV_WRITE))));

  EV_FREQUENT_CHECK;

  ev_start (EV_A_ (W)w, 1);
  array_needsize (ANFD, anfds, anfdmax, fd + 1, array_init_zero);
  wlist_add (&anfds[fd].head, (WL)w);

  /* common bug, apparently */
  assert (("libev: ev_io_start called with corrupted watcher", ((WL)w)->next != (WL)w));

  fd_change (EV_A_ fd, w->events & EV__IOFDSET | EV_ANFD_REIFY);
  w->events &= ~EV__IOFDSET;

  EV_FREQUENT_CHECK;
}

noinline
void
ev_io_stop (EV_P_ ev_io *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  assert (("libev: ev_io_stop called with illegal fd (must stay constant after start!)", w->fd >= 0 && w->fd < anfdmax));

  EV_FREQUENT_CHECK;

  wlist_del (&anfds[w->fd].head, (WL)w);
  ev_stop (EV_A_ (W)w);

  fd_change (EV_A_ w->fd, EV_ANFD_REIFY);

  EV_FREQUENT_CHECK;
}

noinline
void
ev_timer_start (EV_P_ ev_timer *w) EV_THROW
{
  if (expect_false (ev_is_active (w)))
    return;

  ev_at (w) += mn_now;

  assert (("libev: ev_timer_start called with negative timer repeat value", w->repeat >= 0.));

  EV_FREQUENT_CHECK;

  ++timercnt;
  ev_start (EV_A_ (W)w, timercnt + HEAP0 - 1);
  array_needsize (ANHE, timers, timermax, ev_active (w) + 1, EMPTY2);
  ANHE_w (timers [ev_active (w)]) = (WT)w;
  ANHE_at_cache (timers [ev_active (w)]);
  upheap (timers, ev_active (w));

  EV_FREQUENT_CHECK;

  /*assert (("libev: internal timer heap corruption", timers [ev_active (w)] == (WT)w));*/
}

noinline
void
ev_timer_stop (EV_P_ ev_timer *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  {
    int active = ev_active (w);

    assert (("libev: internal timer heap corruption", ANHE_w (timers [active]) == (WT)w));

    --timercnt;

    if (expect_true (active < timercnt + HEAP0))
      {
        timers [active] = timers [timercnt + HEAP0];
        adjustheap (timers, timercnt, active);
      }
  }

  ev_at (w) -= mn_now;

  ev_stop (EV_A_ (W)w);

  EV_FREQUENT_CHECK;
}

noinline
void
ev_timer_again (EV_P_ ev_timer *w) EV_THROW
{
  EV_FREQUENT_CHECK;

  clear_pending (EV_A_ (W)w);

  if (ev_is_active (w))
    {
      if (w->repeat)
        {
          ev_at (w) = mn_now + w->repeat;
          ANHE_at_cache (timers [ev_active (w)]);
          adjustheap (timers, timercnt, ev_active (w));
        }
      else
        ev_timer_stop (EV_A_ w);
    }
  else if (w->repeat)
    {
      ev_at (w) = w->repeat;
      ev_timer_start (EV_A_ w);
    }

  EV_FREQUENT_CHECK;
}

ev_tstamp
ev_timer_remaining (EV_P_ ev_timer *w) EV_THROW
{
  return ev_at (w) - (ev_is_active (w) ? mn_now : 0.);
}

#if EV_PERIODIC_ENABLE
noinline
void
ev_periodic_start (EV_P_ ev_periodic *w) EV_THROW
{
  if (expect_false (ev_is_active (w)))
    return;

  if (w->reschedule_cb)
    ev_at (w) = w->reschedule_cb (w, ev_rt_now);
  else if (w->interval)
    {
      assert (("libev: ev_periodic_start called with negative interval value", w->interval >= 0.));
      periodic_recalc (EV_A_ w);
    }
  else
    ev_at (w) = w->offset;

  EV_FREQUENT_CHECK;

  ++periodiccnt;
  ev_start (EV_A_ (W)w, periodiccnt + HEAP0 - 1);
  array_needsize (ANHE, periodics, periodicmax, ev_active (w) + 1, EMPTY2);
  ANHE_w (periodics [ev_active (w)]) = (WT)w;
  ANHE_at_cache (periodics [ev_active (w)]);
  upheap (periodics, ev_active (w));

  EV_FREQUENT_CHECK;

  /*assert (("libev: internal periodic heap corruption", ANHE_w (periodics [ev_active (w)]) == (WT)w));*/
}

noinline
void
ev_periodic_stop (EV_P_ ev_periodic *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  {
    int active = ev_active (w);

    assert (("libev: internal periodic heap corruption", ANHE_w (periodics [active]) == (WT)w));

    --periodiccnt;

    if (expect_true (active < periodiccnt + HEAP0))
      {
        periodics [active] = periodics [periodiccnt + HEAP0];
        adjustheap (periodics, periodiccnt, active);
      }
  }

  ev_stop (EV_A_ (W)w);

  EV_FREQUENT_CHECK;
}

noinline
void
ev_periodic_again (EV_P_ ev_periodic *w) EV_THROW
{
  /* TODO: use adjustheap and recalculation */
  ev_periodic_stop (EV_A_ w);
  ev_periodic_start (EV_A_ w);
}
#endif

#ifndef SA_RESTART
# define SA_RESTART 0
#endif

#if EV_SIGNAL_ENABLE

noinline
void
ev_signal_start (EV_P_ ev_signal *w) EV_THROW
{
  if (expect_false (ev_is_active (w)))
    return;

  assert (("libev: ev_signal_start called with illegal signal number", w->signum > 0 && w->signum < EV_NSIG));

#if EV_MULTIPLICITY
  assert (("libev: a signal must not be attached to two different loops",
           !signals [w->signum - 1].loop || signals [w->signum - 1].loop == loop));

  signals [w->signum - 1].loop = EV_A;
  ECB_MEMORY_FENCE_RELEASE;
#endif

  EV_FREQUENT_CHECK;

#if EV_USE_SIGNALFD
  if (sigfd == -2)
    {
      sigfd = signalfd (-1, &sigfd_set, SFD_NONBLOCK | SFD_CLOEXEC);
      if (sigfd < 0 && errno == EINVAL)
        sigfd = signalfd (-1, &sigfd_set, 0); /* retry without flags */

      if (sigfd >= 0)
        {
          fd_intern (sigfd); /* doing it twice will not hurt */

          sigemptyset (&sigfd_set);

          ev_io_init (&sigfd_w, sigfdcb, sigfd, EV_READ);
          ev_set_priority (&sigfd_w, EV_MAXPRI);
          ev_io_start (EV_A_ &sigfd_w);
          ev_unref (EV_A); /* signalfd watcher should not keep loop alive */
        }
    }

  if (sigfd >= 0)
    {
      /* TODO: check .head */
      sigaddset (&sigfd_set, w->signum);
      sigprocmask (SIG_BLOCK, &sigfd_set, 0);

      signalfd (sigfd, &sigfd_set, 0);
    }
#endif

  ev_start (EV_A_ (W)w, 1);
  wlist_add (&signals [w->signum - 1].head, (WL)w);

  if (!((WL)w)->next)
# if EV_USE_SIGNALFD
    if (sigfd < 0) /*TODO*/
# endif
      {
# ifdef _WIN32
        evpipe_init (EV_A);

        signal (w->signum, ev_sighandler);
# else
        struct sigaction sa;

        evpipe_init (EV_A);

        sa.sa_handler = ev_sighandler;
        sigfillset (&sa.sa_mask);
        sa.sa_flags = SA_RESTART; /* if restarting works we save one iteration */
        sigaction (w->signum, &sa, 0);

        if (origflags & EVFLAG_NOSIGMASK)
          {
            sigemptyset (&sa.sa_mask);
            sigaddset (&sa.sa_mask, w->signum);
            sigprocmask (SIG_UNBLOCK, &sa.sa_mask, 0);
          }
#endif
      }

  EV_FREQUENT_CHECK;
}

noinline
void
ev_signal_stop (EV_P_ ev_signal *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  wlist_del (&signals [w->signum - 1].head, (WL)w);
  ev_stop (EV_A_ (W)w);

  if (!signals [w->signum - 1].head)
    {
#if EV_MULTIPLICITY
      signals [w->signum - 1].loop = 0; /* unattach from signal */
#endif
#if EV_USE_SIGNALFD
      if (sigfd >= 0)
        {
          sigset_t ss;

          sigemptyset (&ss);
          sigaddset (&ss, w->signum);
          sigdelset (&sigfd_set, w->signum);

          signalfd (sigfd, &sigfd_set, 0);
          sigprocmask (SIG_UNBLOCK, &ss, 0);
        }
      else
#endif
        signal (w->signum, SIG_DFL);
    }

  EV_FREQUENT_CHECK;
}

#endif

#if EV_CHILD_ENABLE

void
ev_child_start (EV_P_ ev_child *w) EV_THROW
{
#if EV_MULTIPLICITY
  assert (("libev: child watchers are only supported in the default loop", loop == ev_default_loop_ptr));
#endif
  if (expect_false (ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  ev_start (EV_A_ (W)w, 1);
  wlist_add (&childs [w->pid & ((EV_PID_HASHSIZE) - 1)], (WL)w);

  EV_FREQUENT_CHECK;
}

void
ev_child_stop (EV_P_ ev_child *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  wlist_del (&childs [w->pid & ((EV_PID_HASHSIZE) - 1)], (WL)w);
  ev_stop (EV_A_ (W)w);

  EV_FREQUENT_CHECK;
}

#endif

#if EV_STAT_ENABLE

# ifdef _WIN32
#  undef lstat
#  define lstat(a,b) _stati64 (a,b)
# endif

#define DEF_STAT_INTERVAL  5.0074891
#define NFS_STAT_INTERVAL 30.1074891 /* for filesystems potentially failing inotify */
#define MIN_STAT_INTERVAL  0.1074891

noinline static void stat_timer_cb (EV_P_ ev_timer *w_, int revents);

#if EV_USE_INOTIFY

/* the * 2 is to allow for alignment padding, which for some reason is >> 8 */
# define EV_INOTIFY_BUFSIZE (sizeof (struct inotify_event) * 2 + NAME_MAX)

noinline
static void
infy_add (EV_P_ ev_stat *w)
{
  w->wd = inotify_add_watch (fs_fd, w->path,
                             IN_ATTRIB | IN_DELETE_SELF | IN_MOVE_SELF | IN_MODIFY
                             | IN_CREATE | IN_DELETE | IN_MOVED_FROM | IN_MOVED_TO
                             | IN_DONT_FOLLOW | IN_MASK_ADD);

  if (w->wd >= 0)
    {
      struct statfs sfs;

      /* now local changes will be tracked by inotify, but remote changes won't */
      /* unless the filesystem is known to be local, we therefore still poll */
      /* also do poll on <2.6.25, but with normal frequency */

      if (!fs_2625)
        w->timer.repeat = w->interval ? w->interval : DEF_STAT_INTERVAL;
      else if (!statfs (w->path, &sfs)
               && (sfs.f_type == 0x1373 /* devfs */
                   || sfs.f_type == 0x4006 /* fat */
                   || sfs.f_type == 0x4d44 /* msdos */
                   || sfs.f_type == 0xEF53 /* ext2/3 */
                   || sfs.f_type == 0x72b6 /* jffs2 */
                   || sfs.f_type == 0x858458f6 /* ramfs */
                   || sfs.f_type == 0x5346544e /* ntfs */
                   || sfs.f_type == 0x3153464a /* jfs */
                   || sfs.f_type == 0x9123683e /* btrfs */
                   || sfs.f_type == 0x52654973 /* reiser3 */
                   || sfs.f_type == 0x01021994 /* tmpfs */
                   || sfs.f_type == 0x58465342 /* xfs */))
        w->timer.repeat = 0.; /* filesystem is local, kernel new enough */
      else
        w->timer.repeat = w->interval ? w->interval : NFS_STAT_INTERVAL; /* remote, use reduced frequency */
    }
  else
    {
      /* can't use inotify, continue to stat */
      w->timer.repeat = w->interval ? w->interval : DEF_STAT_INTERVAL;

      /* if path is not there, monitor some parent directory for speedup hints */
      /* note that exceeding the hardcoded path limit is not a correctness issue, */
      /* but an efficiency issue only */
      if ((errno == ENOENT || errno == EACCES) && strlen (w->path) < 4096)
        {
          char path [4096];
          strcpy (path, w->path);

          do
            {
              int mask = IN_MASK_ADD | IN_DELETE_SELF | IN_MOVE_SELF
                       | (errno == EACCES ? IN_ATTRIB : IN_CREATE | IN_MOVED_TO);

              char *pend = strrchr (path, '/');

              if (!pend || pend == path)
                break;

              *pend = 0;
              w->wd = inotify_add_watch (fs_fd, path, mask);
            }
          while (w->wd < 0 && (errno == ENOENT || errno == EACCES));
        }
    }

  if (w->wd >= 0)
    wlist_add (&fs_hash [w->wd & ((EV_INOTIFY_HASHSIZE) - 1)].head, (WL)w);

  /* now re-arm timer, if required */
  if (ev_is_active (&w->timer)) ev_ref (EV_A);
  ev_timer_again (EV_A_ &w->timer);
  if (ev_is_active (&w->timer)) ev_unref (EV_A);
}

noinline
static void
infy_del (EV_P_ ev_stat *w)
{
  int slot;
  int wd = w->wd;

  if (wd < 0)
    return;

  w->wd = -2;
  slot = wd & ((EV_INOTIFY_HASHSIZE) - 1);
  wlist_del (&fs_hash [slot].head, (WL)w);

  /* remove this watcher, if others are watching it, they will rearm */
  inotify_rm_watch (fs_fd, wd);
}

noinline
static void
infy_wd (EV_P_ int slot, int wd, struct inotify_event *ev)
{
  if (slot < 0)
    /* overflow, need to check for all hash slots */
    for (slot = 0; slot < (EV_INOTIFY_HASHSIZE); ++slot)
      infy_wd (EV_A_ slot, wd, ev);
  else
    {
      WL w_;

      for (w_ = fs_hash [slot & ((EV_INOTIFY_HASHSIZE) - 1)].head; w_; )
        {
          ev_stat *w = (ev_stat *)w_;
          w_ = w_->next; /* lets us remove this watcher and all before it */

          if (w->wd == wd || wd == -1)
            {
              if (ev->mask & (IN_IGNORED | IN_UNMOUNT | IN_DELETE_SELF))
                {
                  wlist_del (&fs_hash [slot & ((EV_INOTIFY_HASHSIZE) - 1)].head, (WL)w);
                  w->wd = -1;
                  infy_add (EV_A_ w); /* re-add, no matter what */
                }

              stat_timer_cb (EV_A_ &w->timer, 0);
            }
        }
    }
}

static void
infy_cb (EV_P_ ev_io *w, int revents)
{
  char buf [EV_INOTIFY_BUFSIZE];
  int ofs;
  int len = read (fs_fd, buf, sizeof (buf));

  for (ofs = 0; ofs < len; )
    {
      struct inotify_event *ev = (struct inotify_event *)(buf + ofs);
      infy_wd (EV_A_ ev->wd, ev->wd, ev);
      ofs += sizeof (struct inotify_event) + ev->len;
    }
}

inline_size ecb_cold
void
ev_check_2625 (EV_P)
{
  /* kernels < 2.6.25 are borked
   * http://www.ussg.indiana.edu/hypermail/linux/kernel/0711.3/1208.html
   */
  if (ev_linux_version () < 0x020619)
    return;

  fs_2625 = 1;
}

inline_size int
infy_newfd (void)
{
#if defined IN_CLOEXEC && defined IN_NONBLOCK
  int fd = inotify_init1 (IN_CLOEXEC | IN_NONBLOCK);
  if (fd >= 0)
    return fd;
#endif
  return inotify_init ();
}

inline_size void
infy_init (EV_P)
{
  if (fs_fd != -2)
    return;

  fs_fd = -1;

  ev_check_2625 (EV_A);

  fs_fd = infy_newfd ();

  if (fs_fd >= 0)
    {
      fd_intern (fs_fd);
      ev_io_init (&fs_w, infy_cb, fs_fd, EV_READ);
      ev_set_priority (&fs_w, EV_MAXPRI);
      ev_io_start (EV_A_ &fs_w);
      ev_unref (EV_A);
    }
}

inline_size void
infy_fork (EV_P)
{
  int slot;

  if (fs_fd < 0)
    return;

  ev_ref (EV_A);
  ev_io_stop (EV_A_ &fs_w);
  close (fs_fd);
  fs_fd = infy_newfd ();

  if (fs_fd >= 0)
    {
      fd_intern (fs_fd);
      ev_io_set (&fs_w, fs_fd, EV_READ);
      ev_io_start (EV_A_ &fs_w);
      ev_unref (EV_A);
    }

  for (slot = 0; slot < (EV_INOTIFY_HASHSIZE); ++slot)
    {
      WL w_ = fs_hash [slot].head;
      fs_hash [slot].head = 0;

      while (w_)
        {
          ev_stat *w = (ev_stat *)w_;
          w_ = w_->next; /* lets us add this watcher */

          w->wd = -1;

          if (fs_fd >= 0)
            infy_add (EV_A_ w); /* re-add, no matter what */
          else
            {
              w->timer.repeat = w->interval ? w->interval : DEF_STAT_INTERVAL;
              if (ev_is_active (&w->timer)) ev_ref (EV_A);
              ev_timer_again (EV_A_ &w->timer);
              if (ev_is_active (&w->timer)) ev_unref (EV_A);
            }
        }
    }
}

#endif

#ifdef _WIN32
# define EV_LSTAT(p,b) _stati64 (p, b)
#else
# define EV_LSTAT(p,b) lstat (p, b)
#endif

void
ev_stat_stat (EV_P_ ev_stat *w) EV_THROW
{
  if (lstat (w->path, &w->attr) < 0)
    w->attr.st_nlink = 0;
  else if (!w->attr.st_nlink)
    w->attr.st_nlink = 1;
}

noinline
static void
stat_timer_cb (EV_P_ ev_timer *w_, int revents)
{
  ev_stat *w = (ev_stat *)(((char *)w_) - offsetof (ev_stat, timer));

  ev_statdata prev = w->attr;
  ev_stat_stat (EV_A_ w);

  /* memcmp doesn't work on netbsd, they.... do stuff to their struct stat */
  if (
    prev.st_dev      != w->attr.st_dev
    || prev.st_ino   != w->attr.st_ino
    || prev.st_mode  != w->attr.st_mode
    || prev.st_nlink != w->attr.st_nlink
    || prev.st_uid   != w->attr.st_uid
    || prev.st_gid   != w->attr.st_gid
    || prev.st_rdev  != w->attr.st_rdev
    || prev.st_size  != w->attr.st_size
    || prev.st_atime != w->attr.st_atime
    || prev.st_mtime != w->attr.st_mtime
    || prev.st_ctime != w->attr.st_ctime
  ) {
      /* we only update w->prev on actual differences */
      /* in case we test more often than invoke the callback, */
      /* to ensure that prev is always different to attr */
      w->prev = prev;

      #if EV_USE_INOTIFY
        if (fs_fd >= 0)
          {
            infy_del (EV_A_ w);
            infy_add (EV_A_ w);
            ev_stat_stat (EV_A_ w); /* avoid race... */
          }
      #endif

      ev_feed_event (EV_A_ w, EV_STAT);
    }
}

void
ev_stat_start (EV_P_ ev_stat *w) EV_THROW
{
  if (expect_false (ev_is_active (w)))
    return;

  ev_stat_stat (EV_A_ w);

  if (w->interval < MIN_STAT_INTERVAL && w->interval)
    w->interval = MIN_STAT_INTERVAL;

  ev_timer_init (&w->timer, stat_timer_cb, 0., w->interval ? w->interval : DEF_STAT_INTERVAL);
  ev_set_priority (&w->timer, ev_priority (w));

#if EV_USE_INOTIFY
  infy_init (EV_A);

  if (fs_fd >= 0)
    infy_add (EV_A_ w);
  else
#endif
    {
      ev_timer_again (EV_A_ &w->timer);
      ev_unref (EV_A);
    }

  ev_start (EV_A_ (W)w, 1);

  EV_FREQUENT_CHECK;
}

void
ev_stat_stop (EV_P_ ev_stat *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

#if EV_USE_INOTIFY
  infy_del (EV_A_ w);
#endif

  if (ev_is_active (&w->timer))
    {
      ev_ref (EV_A);
      ev_timer_stop (EV_A_ &w->timer);
    }

  ev_stop (EV_A_ (W)w);

  EV_FREQUENT_CHECK;
}
#endif

#if EV_IDLE_ENABLE
void
ev_idle_start (EV_P_ ev_idle *w) EV_THROW
{
  if (expect_false (ev_is_active (w)))
    return;

  pri_adjust (EV_A_ (W)w);

  EV_FREQUENT_CHECK;

  {
    int active = ++idlecnt [ABSPRI (w)];

    ++idleall;
    ev_start (EV_A_ (W)w, active);

    array_needsize (ev_idle *, idles [ABSPRI (w)], idlemax [ABSPRI (w)], active, EMPTY2);
    idles [ABSPRI (w)][active - 1] = w;
  }

  EV_FREQUENT_CHECK;
}

void
ev_idle_stop (EV_P_ ev_idle *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  {
    int active = ev_active (w);

    idles [ABSPRI (w)][active - 1] = idles [ABSPRI (w)][--idlecnt [ABSPRI (w)]];
    ev_active (idles [ABSPRI (w)][active - 1]) = active;

    ev_stop (EV_A_ (W)w);
    --idleall;
  }

  EV_FREQUENT_CHECK;
}
#endif

#if EV_PREPARE_ENABLE
void
ev_prepare_start (EV_P_ ev_prepare *w) EV_THROW
{
  if (expect_false (ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  ev_start (EV_A_ (W)w, ++preparecnt);
  array_needsize (ev_prepare *, prepares, preparemax, preparecnt, EMPTY2);
  prepares [preparecnt - 1] = w;

  EV_FREQUENT_CHECK;
}

void
ev_prepare_stop (EV_P_ ev_prepare *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  {
    int active = ev_active (w);

    prepares [active - 1] = prepares [--preparecnt];
    ev_active (prepares [active - 1]) = active;
  }

  ev_stop (EV_A_ (W)w);

  EV_FREQUENT_CHECK;
}
#endif

#if EV_CHECK_ENABLE
void
ev_check_start (EV_P_ ev_check *w) EV_THROW
{
  if (expect_false (ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  ev_start (EV_A_ (W)w, ++checkcnt);
  array_needsize (ev_check *, checks, checkmax, checkcnt, EMPTY2);
  checks [checkcnt - 1] = w;

  EV_FREQUENT_CHECK;
}

void
ev_check_stop (EV_P_ ev_check *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  {
    int active = ev_active (w);

    checks [active - 1] = checks [--checkcnt];
    ev_active (checks [active - 1]) = active;
  }

  ev_stop (EV_A_ (W)w);

  EV_FREQUENT_CHECK;
}
#endif

#if EV_EMBED_ENABLE
noinline
void
ev_embed_sweep (EV_P_ ev_embed *w) EV_THROW
{
  ev_run (w->other, EVRUN_NOWAIT);
}

static void
embed_io_cb (EV_P_ ev_io *io, int revents)
{
  ev_embed *w = (ev_embed *)(((char *)io) - offsetof (ev_embed, io));

  if (ev_cb (w))
    ev_feed_event (EV_A_ (W)w, EV_EMBED);
  else
    ev_run (w->other, EVRUN_NOWAIT);
}

static void
embed_prepare_cb (EV_P_ ev_prepare *prepare, int revents)
{
  ev_embed *w = (ev_embed *)(((char *)prepare) - offsetof (ev_embed, prepare));

  {
    EV_P = w->other;

    while (fdchangecnt)
      {
        fd_reify (EV_A);
        ev_run (EV_A_ EVRUN_NOWAIT);
      }
  }
}

static void
embed_fork_cb (EV_P_ ev_fork *fork_w, int revents)
{
  ev_embed *w = (ev_embed *)(((char *)fork_w) - offsetof (ev_embed, fork));

  ev_embed_stop (EV_A_ w);

  {
    EV_P = w->other;

    ev_loop_fork (EV_A);
    ev_run (EV_A_ EVRUN_NOWAIT);
  }

  ev_embed_start (EV_A_ w);
}

#if 0
static void
embed_idle_cb (EV_P_ ev_idle *idle, int revents)
{
  ev_idle_stop (EV_A_ idle);
}
#endif

void
ev_embed_start (EV_P_ ev_embed *w) EV_THROW
{
  if (expect_false (ev_is_active (w)))
    return;

  {
    EV_P = w->other;
    assert (("libev: loop to be embedded is not embeddable", backend & ev_embeddable_backends ()));
    ev_io_init (&w->io, embed_io_cb, backend_fd, EV_READ);
  }

  EV_FREQUENT_CHECK;

  ev_set_priority (&w->io, ev_priority (w));
  ev_io_start (EV_A_ &w->io);

  ev_prepare_init (&w->prepare, embed_prepare_cb);
  ev_set_priority (&w->prepare, EV_MINPRI);
  ev_prepare_start (EV_A_ &w->prepare);

  ev_fork_init (&w->fork, embed_fork_cb);
  ev_fork_start (EV_A_ &w->fork);

  /*ev_idle_init (&w->idle, e,bed_idle_cb);*/

  ev_start (EV_A_ (W)w, 1);

  EV_FREQUENT_CHECK;
}

void
ev_embed_stop (EV_P_ ev_embed *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  ev_io_stop      (EV_A_ &w->io);
  ev_prepare_stop (EV_A_ &w->prepare);
  ev_fork_stop    (EV_A_ &w->fork);

  ev_stop (EV_A_ (W)w);

  EV_FREQUENT_CHECK;
}
#endif

#if EV_FORK_ENABLE
void
ev_fork_start (EV_P_ ev_fork *w) EV_THROW
{
  if (expect_false (ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  ev_start (EV_A_ (W)w, ++forkcnt);
  array_needsize (ev_fork *, forks, forkmax, forkcnt, EMPTY2);
  forks [forkcnt - 1] = w;

  EV_FREQUENT_CHECK;
}

void
ev_fork_stop (EV_P_ ev_fork *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  {
    int active = ev_active (w);

    forks [active - 1] = forks [--forkcnt];
    ev_active (forks [active - 1]) = active;
  }

  ev_stop (EV_A_ (W)w);

  EV_FREQUENT_CHECK;
}
#endif

#if EV_CLEANUP_ENABLE
void
ev_cleanup_start (EV_P_ ev_cleanup *w) EV_THROW
{
  if (expect_false (ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  ev_start (EV_A_ (W)w, ++cleanupcnt);
  array_needsize (ev_cleanup *, cleanups, cleanupmax, cleanupcnt, EMPTY2);
  cleanups [cleanupcnt - 1] = w;

  /* cleanup watchers should never keep a refcount on the loop */
  ev_unref (EV_A);
  EV_FREQUENT_CHECK;
}

void
ev_cleanup_stop (EV_P_ ev_cleanup *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;
  ev_ref (EV_A);

  {
    int active = ev_active (w);

    cleanups [active - 1] = cleanups [--cleanupcnt];
    ev_active (cleanups [active - 1]) = active;
  }

  ev_stop (EV_A_ (W)w);

  EV_FREQUENT_CHECK;
}
#endif

#if EV_ASYNC_ENABLE
void
ev_async_start (EV_P_ ev_async *w) EV_THROW
{
  if (expect_false (ev_is_active (w)))
    return;

  w->sent = 0;

  evpipe_init (EV_A);

  EV_FREQUENT_CHECK;

  ev_start (EV_A_ (W)w, ++asynccnt);
  array_needsize (ev_async *, asyncs, asyncmax, asynccnt, EMPTY2);
  asyncs [asynccnt - 1] = w;

  EV_FREQUENT_CHECK;
}

void
ev_async_stop (EV_P_ ev_async *w) EV_THROW
{
  clear_pending (EV_A_ (W)w);
  if (expect_false (!ev_is_active (w)))
    return;

  EV_FREQUENT_CHECK;

  {
    int active = ev_active (w);

    asyncs [active - 1] = asyncs [--asynccnt];
    ev_active (asyncs [active - 1]) = active;
  }

  ev_stop (EV_A_ (W)w);

  EV_FREQUENT_CHECK;
}

void
ev_async_send (EV_P_ ev_async *w) EV_THROW
{
  w->sent = 1;
  evpipe_write (EV_A_ &async_pending);
}
#endif

/*****************************************************************************/

struct ev_once
{
  ev_io io;
  ev_timer to;
  void (*cb)(int revents, void *arg);
  void *arg;
};

static void
once_cb (EV_P_ struct ev_once *once, int revents)
{
  void (*cb)(int revents, void *arg) = once->cb;
  void *arg = once->arg;

  ev_io_stop    (EV_A_ &once->io);
  ev_timer_stop (EV_A_ &once->to);
  ev_free (once);

  cb (revents, arg);
}

static void
once_cb_io (EV_P_ ev_io *w, int revents)
{
  struct ev_once *once = (struct ev_once *)(((char *)w) - offsetof (struct ev_once, io));

  once_cb (EV_A_ once, revents | ev_clear_pending (EV_A_ &once->to));
}

static void
once_cb_to (EV_P_ ev_timer *w, int revents)
{
  struct ev_once *once = (struct ev_once *)(((char *)w) - offsetof (struct ev_once, to));

  once_cb (EV_A_ once, revents | ev_clear_pending (EV_A_ &once->io));
}

void
ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, void *arg), void *arg) EV_THROW
{
  struct ev_once *once = (struct ev_once *)ev_malloc (sizeof (struct ev_once));

  if (expect_false (!once))
    {
      cb (EV_ERROR | EV_READ | EV_WRITE | EV_TIMER, arg);
      return;
    }

  once->cb  = cb;
  once->arg = arg;

  ev_init (&once->io, once_cb_io);
  if (fd >= 0)
    {
      ev_io_set (&once->io, fd, events);
      ev_io_start (EV_A_ &once->io);
    }

  ev_init (&once->to, once_cb_to);
  if (timeout >= 0.)
    {
      ev_timer_set (&once->to, timeout, 0.);
      ev_timer_start (EV_A_ &once->to);
    }
}

/*****************************************************************************/

#if EV_WALK_ENABLE
ecb_cold
void
ev_walk (EV_P_ int types, void (*cb)(EV_P_ int type, void *w)) EV_THROW
{
  int i, j;
  ev_watcher_list *wl, *wn;

  if (types & (EV_IO | EV_EMBED))
    for (i = 0; i < anfdmax; ++i)
      for (wl = anfds [i].head; wl; )
        {
          wn = wl->next;

#if EV_EMBED_ENABLE
          if (ev_cb ((ev_io *)wl) == embed_io_cb)
            {
              if (types & EV_EMBED)
                cb (EV_A_ EV_EMBED, ((char *)wl) - offsetof (struct ev_embed, io));
            }
          else
#endif
#if EV_USE_INOTIFY
          if (ev_cb ((ev_io *)wl) == infy_cb)
            ;
          else
#endif
          if ((ev_io *)wl != &pipe_w)
            if (types & EV_IO)
              cb (EV_A_ EV_IO, wl);

          wl = wn;
        }

  if (types & (EV_TIMER | EV_STAT))
    for (i = timercnt + HEAP0; i-- > HEAP0; )
#if EV_STAT_ENABLE
      /*TODO: timer is not always active*/
      if (ev_cb ((ev_timer *)ANHE_w (timers [i])) == stat_timer_cb)
        {
          if (types & EV_STAT)
            cb (EV_A_ EV_STAT, ((char *)ANHE_w (timers [i])) - offsetof (struct ev_stat, timer));
        }
      else
#endif
      if (types & EV_TIMER)
        cb (EV_A_ EV_TIMER, ANHE_w (timers [i]));

#if EV_PERIODIC_ENABLE
  if (types & EV_PERIODIC)
    for (i = periodiccnt + HEAP0; i-- > HEAP0; )
      cb (EV_A_ EV_PERIODIC, ANHE_w (periodics [i]));
#endif

#if EV_IDLE_ENABLE
  if (types & EV_IDLE)
    for (j = NUMPRI; j--; )
      for (i = idlecnt [j]; i--; )
        cb (EV_A_ EV_IDLE, idles [j][i]);
#endif

#if EV_FORK_ENABLE
  if (types & EV_FORK)
    for (i = forkcnt; i--; )
      if (ev_cb (forks [i]) != embed_fork_cb)
        cb (EV_A_ EV_FORK, forks [i]);
#endif

#if EV_ASYNC_ENABLE
  if (types & EV_ASYNC)
    for (i = asynccnt; i--; )
      cb (EV_A_ EV_ASYNC, asyncs [i]);
#endif

#if EV_PREPARE_ENABLE
  if (types & EV_PREPARE)
    for (i = preparecnt; i--; )
# if EV_EMBED_ENABLE
      if (ev_cb (prepares [i]) != embed_prepare_cb)
# endif
        cb (EV_A_ EV_PREPARE, prepares [i]);
#endif

#if EV_CHECK_ENABLE
  if (types & EV_CHECK)
    for (i = checkcnt; i--; )
      cb (EV_A_ EV_CHECK, checks [i]);
#endif

#if EV_SIGNAL_ENABLE
  if (types & EV_SIGNAL)
    for (i = 0; i < EV_NSIG - 1; ++i)
      for (wl = signals [i].head; wl; )
        {
          wn = wl->next;
          cb (EV_A_ EV_SIGNAL, wl);
          wl = wn;
        }
#endif

#if EV_CHILD_ENABLE
  if (types & EV_CHILD)
    for (i = (EV_PID_HASHSIZE); i--; )
      for (wl = childs [i]; wl; )
        {
          wn = wl->next;
          cb (EV_A_ EV_CHILD, wl);
          wl = wn;
        }
#endif
/* EV_STAT     0x00001000 /* stat data changed */
/* EV_EMBED    0x00010000 /* embedded event loop needs sweep */
}
#endif

#if EV_MULTIPLICITY
  #include "ev_wrap.h"
#endif


================================================
FILE: libev/ev.h
================================================
/*
 * libev native API header
 *
 * Copyright (c) 2007,2008,2009,2010,2011,2012,2015 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

#ifndef EV_H_
#define EV_H_

#ifdef __cplusplus
# define EV_CPP(x) x
# if __cplusplus >= 201103L
#  define EV_THROW noexcept
# else
#  define EV_THROW throw ()
# endif
#else
# define EV_CPP(x)
# define EV_THROW
#endif

EV_CPP(extern "C" {)

/*****************************************************************************/

/* pre-4.0 compatibility */
#ifndef EV_COMPAT3
# define EV_COMPAT3 1
#endif

#ifndef EV_FEATURES
# if defined __OPTIMIZE_SIZE__
#  define EV_FEATURES 0x7c
# else
#  define EV_FEATURES 0x7f
# endif
#endif

#define EV_FEATURE_CODE     ((EV_FEATURES) &  1)
#define EV_FEATURE_DATA     ((EV_FEATURES) &  2)
#define EV_FEATURE_CONFIG   ((EV_FEATURES) &  4)
#define EV_FEATURE_API      ((EV_FEATURES) &  8)
#define EV_FEATURE_WATCHERS ((EV_FEATURES) & 16)
#define EV_FEATURE_BACKENDS ((EV_FEATURES) & 32)
#define EV_FEATURE_OS       ((EV_FEATURES) & 64)

/* these priorities are inclusive, higher priorities will be invoked earlier */
#ifndef EV_MINPRI
# define EV_MINPRI (EV_FEATURE_CONFIG ? -2 : 0)
#endif
#ifndef EV_MAXPRI
# define EV_MAXPRI (EV_FEATURE_CONFIG ? +2 : 0)
#endif

#ifndef EV_MULTIPLICITY
# define EV_MULTIPLICITY EV_FEATURE_CONFIG
#endif

#ifndef EV_PERIODIC_ENABLE
# define EV_PERIODIC_ENABLE EV_FEATURE_WATCHERS
#endif

#ifndef EV_STAT_ENABLE
# define EV_STAT_ENABLE EV_FEATURE_WATCHERS
#endif

#ifndef EV_PREPARE_ENABLE
# define EV_PREPARE_ENABLE EV_FEATURE_WATCHERS
#endif

#ifndef EV_CHECK_ENABLE
# define EV_CHECK_ENABLE EV_FEATURE_WATCHERS
#endif

#ifndef EV_IDLE_ENABLE
# define EV_IDLE_ENABLE EV_FEATURE_WATCHERS
#endif

#ifndef EV_FORK_ENABLE
# define EV_FORK_ENABLE EV_FEATURE_WATCHERS
#endif

#ifndef EV_CLEANUP_ENABLE
# define EV_CLEANUP_ENABLE EV_FEATURE_WATCHERS
#endif

#ifndef EV_SIGNAL_ENABLE
# define EV_SIGNAL_ENABLE EV_FEATURE_WATCHERS
#endif

#ifndef EV_CHILD_ENABLE
# ifdef _WIN32
#  define EV_CHILD_ENABLE 0
# else
#  define EV_CHILD_ENABLE EV_FEATURE_WATCHERS
#endif
#endif

#ifndef EV_ASYNC_ENABLE
# define EV_ASYNC_ENABLE EV_FEATURE_WATCHERS
#endif

#ifndef EV_EMBED_ENABLE
# define EV_EMBED_ENABLE EV_FEATURE_WATCHERS
#endif

#ifndef EV_WALK_ENABLE
# define EV_WALK_ENABLE 0 /* not yet */
#endif

/*****************************************************************************/

#if EV_CHILD_ENABLE && !EV_SIGNAL_ENABLE
# undef EV_SIGNAL_ENABLE
# define EV_SIGNAL_ENABLE 1
#endif

/*****************************************************************************/

typedef double ev_tstamp;

#include <string.h> /* for memmove */

#ifndef EV_ATOMIC_T
# include <signal.h>
# define EV_ATOMIC_T sig_atomic_t volatile
#endif

#if EV_STAT_ENABLE
# ifdef _WIN32
#  include <time.h>
#  include <sys/types.h>
# endif
# include <sys/stat.h>
#endif

/* support multiple event loops? */
#if EV_MULTIPLICITY
struct ev_loop;
# define EV_P  struct ev_loop *loop               /* a loop as sole parameter in a declaration */
# define EV_P_ EV_P,                              /* a loop as first of multiple parameters */
# define EV_A  loop                               /* a loop as sole argument to a function call */
# define EV_A_ EV_A,                              /* a loop as first of multiple arguments */
# define EV_DEFAULT_UC  ev_default_loop_uc_ ()    /* the default loop, if initialised, as sole arg */
# define EV_DEFAULT_UC_ EV_DEFAULT_UC,            /* the default loop as first of multiple arguments */
# define EV_DEFAULT  ev_default_loop (0)          /* the default loop as sole arg */
# define EV_DEFAULT_ EV_DEFAULT,                  /* the default loop as first of multiple arguments */
#else
# define EV_P void
# define EV_P_
# define EV_A
# define EV_A_
# define EV_DEFAULT
# define EV_DEFAULT_
# define EV_DEFAULT_UC
# define EV_DEFAULT_UC_
# undef EV_EMBED_ENABLE
#endif

/* EV_INLINE is used for functions in header files */
#if __STDC_VERSION__ >= 199901L || __GNUC__ >= 3
# define EV_INLINE static inline
#else
# define EV_INLINE static
#endif

#ifdef EV_API_STATIC
# define EV_API_DECL static
#else
# define EV_API_DECL extern
#endif

/* EV_PROTOTYPES can be used to switch of prototype declarations */
#ifndef EV_PROTOTYPES
# define EV_PROTOTYPES 1
#endif

/*****************************************************************************/

#define EV_VERSION_MAJOR 4
#define EV_VERSION_MINOR 24

/* eventmask, revents, events... */
enum {
  EV_UNDEF    = (int)0xFFFFFFFF, /* guaranteed to be invalid */
  EV_NONE     =            0x00, /* no events */
  EV_READ     =            0x01, /* ev_io detected read will not block */
  EV_WRITE    =            0x02, /* ev_io detected write will not block */
  EV__IOFDSET =            0x80, /* internal use only */
  EV_IO       =         EV_READ, /* alias for type-detection */
  EV_TIMER    =      0x00000100, /* timer timed out */
#if EV_COMPAT3
  EV_TIMEOUT  =        EV_TIMER, /* pre 4.0 API compatibility */
#endif
  EV_PERIODIC =      0x00000200, /* periodic timer timed out */
  EV_SIGNAL   =      0x00000400, /* signal was received */
  EV_CHILD    =      0x00000800, /* child/pid had status change */
  EV_STAT     =      0x00001000, /* stat data changed */
  EV_IDLE     =      0x00002000, /* event loop is idling */
  EV_PREPARE  =      0x00004000, /* event loop about to poll */
  EV_CHECK    =      0x00008000, /* event loop finished poll */
  EV_EMBED    =      0x00010000, /* embedded event loop needs sweep */
  EV_FORK     =      0x00020000, /* event loop resumed in child */
  EV_CLEANUP  =      0x00040000, /* event loop resumed in child */
  EV_ASYNC    =      0x00080000, /* async intra-loop signal */
  EV_CUSTOM   =      0x01000000, /* for use by user code */
  EV_ERROR    = (int)0x80000000  /* sent when an error occurs */
};

/* can be used to add custom fields to all watchers, while losing binary compatibility */
#ifndef EV_COMMON
# define EV_COMMON void *data;
#endif

#ifndef EV_CB_DECLARE
# define EV_CB_DECLARE(type) void (*cb)(EV_P_ struct type *w, int revents);
#endif
#ifndef EV_CB_INVOKE
# define EV_CB_INVOKE(watcher,revents) (watcher)->cb (EV_A_ (watcher), (revents))
#endif

/* not official, do not use */
#define EV_CB(type,name) void name (EV_P_ struct ev_ ## type *w, int revents)

/*
 * struct member types:
 * private: you may look at them, but not change them,
 *          and they might not mean anything to you.
 * ro: can be read anytime, but only changed when the watcher isn't active.
 * rw: can be read and modified anytime, even when the watcher is active.
 *
 * some internal details that might be helpful for debugging:
 *
 * active is either 0, which means the watcher is not active,
 *           or the array index of the watcher (periodics, timers)
 *           or the array index + 1 (most other watchers)
 *           or simply 1 for watchers that aren't in some array.
 * pending is either 0, in which case the watcher isn't,
 *           or the array index + 1 in the pendings array.
 */

#if EV_MINPRI == EV_MAXPRI
# define EV_DECL_PRIORITY
#elif !defined (EV_DECL_PRIORITY)
# define EV_DECL_PRIORITY int priority;
#endif

/* shared by all watchers */
#define EV_WATCHER(type)			\
  int active; /* private */			\
  int pending; /* private */			\
  EV_DECL_PRIORITY /* private */		\
  EV_COMMON /* rw */				\
  EV_CB_DECLARE (type) /* private */

#define EV_WATCHER_LIST(type)			\
  EV_WATCHER (type)				\
  struct ev_watcher_list *next; /* private */

#define EV_WATCHER_TIME(type)			\
  EV_WATCHER (type)				\
  ev_tstamp at;     /* private */

/* base class, nothing to see here unless you subclass */
typedef struct ev_watcher
{
  EV_WATCHER (ev_watcher)
} ev_watcher;

/* base class, nothing to see here unless you subclass */
typedef struct ev_watcher_list
{
  EV_WATCHER_LIST (ev_watcher_list)
} ev_watcher_list;

/* base class, nothing to see here unless you subclass */
typedef struct ev_watcher_time
{
  EV_WATCHER_TIME (ev_watcher_time)
} ev_watcher_time;

/* invoked when fd is either EV_READable or EV_WRITEable */
/* revent EV_READ, EV_WRITE */
typedef struct ev_io
{
  EV_WATCHER_LIST (ev_io)

  int fd;     /* ro */
  int events; /* ro */
} ev_io;

/* invoked after a specific time, repeatable (based on monotonic clock) */
/* revent EV_TIMEOUT */
typedef struct ev_timer
{
  EV_WATCHER_TIME (ev_timer)

  ev_tstamp repeat; /* rw */
} ev_timer;

/* invoked at some specific time, possibly repeating at regular intervals (based on UTC) */
/* revent EV_PERIODIC */
typedef struct ev_periodic
{
  EV_WATCHER_TIME (ev_periodic)

  ev_tstamp offset; /* rw */
  ev_tstamp interval; /* rw */
  ev_tstamp (*reschedule_cb)(struct ev_periodic *w, ev_tstamp now) EV_THROW; /* rw */
} ev_periodic;

/* invoked when the given signal has been received */
/* revent EV_SIGNAL */
typedef struct ev_signal
{
  EV_WATCHER_LIST (ev_signal)

  int signum; /* ro */
} ev_signal;

/* invoked when sigchld is received and waitpid indicates the given pid */
/* revent EV_CHILD */
/* does not support priorities */
typedef struct ev_child
{
  EV_WATCHER_LIST (ev_child)

  int flags;   /* private */
  int pid;     /* ro */
  int rpid;    /* rw, holds the received pid */
  int rstatus; /* rw, holds the exit status, use the macros from sys/wait.h */
} ev_child;

#if EV_STAT_ENABLE
/* st_nlink = 0 means missing file or other error */
# ifdef _WIN32
typedef struct _stati64 ev_statdata;
# else
typedef struct stat ev_statdata;
# endif

/* invoked each time the stat data changes for a given path */
/* revent EV_STAT */
typedef struct ev_stat
{
  EV_WATCHER_LIST (ev_stat)

  ev_timer timer;     /* private */
  ev_tstamp interval; /* ro */
  const char *path;   /* ro */
  ev_statdata prev;   /* ro */
  ev_statdata attr;   /* ro */

  int wd; /* wd for inotify, fd for kqueue */
} ev_stat;
#endif

#if EV_IDLE_ENABLE
/* invoked when the nothing else needs to be done, keeps the process from blocking */
/* revent EV_IDLE */
typedef struct ev_idle
{
  EV_WATCHER (ev_idle)
} ev_idle;
#endif

/* invoked for each run of the mainloop, just before the blocking call */
/* you can still change events in any way you like */
/* revent EV_PREPARE */
typedef struct ev_prepare
{
  EV_WATCHER (ev_prepare)
} ev_prepare;

/* invoked for each run of the mainloop, just after the blocking call */
/* revent EV_CHECK */
typedef struct ev_check
{
  EV_WATCHER (ev_check)
} ev_check;

#if EV_FORK_ENABLE
/* the callback gets invoked before check in the child process when a fork was detected */
/* revent EV_FORK */
typedef struct ev_fork
{
  EV_WATCHER (ev_fork)
} ev_fork;
#endif

#if EV_CLEANUP_ENABLE
/* is invoked just before the loop gets destroyed */
/* revent EV_CLEANUP */
typedef struct ev_cleanup
{
  EV_WATCHER (ev_cleanup)
} ev_cleanup;
#endif

#if EV_EMBED_ENABLE
/* used to embed an event loop inside another */
/* the callback gets invoked when the event loop has handled events, and can be 0 */
typedef struct ev_embed
{
  EV_WATCHER (ev_embed)

  struct ev_loop *other; /* ro */
  ev_io io;              /* private */
  ev_prepare prepare;    /* private */
  ev_check check;        /* unused */
  ev_timer timer;        /* unused */
  ev_periodic periodic;  /* unused */
  ev_idle idle;          /* unused */
  ev_fork fork;          /* private */
#if EV_CLEANUP_ENABLE
  ev_cleanup cleanup;    /* unused */
#endif
} ev_embed;
#endif

#if EV_ASYNC_ENABLE
/* invoked when somebody calls ev_async_send on the watcher */
/* revent EV_ASYNC */
typedef struct ev_async
{
  EV_WATCHER (ev_async)

  EV_ATOMIC_T sent; /* private */
} ev_async;

# define ev_async_pending(w) (+(w)->sent)
#endif

/* the presence of this union forces similar struct layout */
union ev_any_watcher
{
  struct ev_watcher w;
  struct ev_watcher_list wl;

  struct ev_io io;
  struct ev_timer timer;
  struct ev_periodic periodic;
  struct ev_signal signal;
  struct ev_child child;
#if EV_STAT_ENABLE
  struct ev_stat stat;
#endif
#if EV_IDLE_ENABLE
  struct ev_idle idle;
#endif
  struct ev_prepare prepare;
  struct ev_check check;
#if EV_FORK_ENABLE
  struct ev_fork fork;
#endif
#if EV_CLEANUP_ENABLE
  struct ev_cleanup cleanup;
#endif
#if EV_EMBED_ENABLE
  struct ev_embed embed;
#endif
#if EV_ASYNC_ENABLE
  struct ev_async async;
#endif
};

/* flag bits for ev_default_loop and ev_loop_new */
enum {
  /* the default */
  EVFLAG_AUTO      = 0x00000000U, /* not quite a mask */
  /* flag bits */
  EVFLAG_NOENV     = 0x01000000U, /* do NOT consult environment */
  EVFLAG_FORKCHECK = 0x02000000U, /* check for a fork in each iteration */
  /* debugging/feature disable */
  EVFLAG_NOINOTIFY = 0x00100000U, /* do not attempt to use inotify */
#if EV_COMPAT3
  EVFLAG_NOSIGFD   = 0, /* compatibility to pre-3.9 */
#endif
  EVFLAG_SIGNALFD  = 0x00200000U, /* attempt to use signalfd */
  EVFLAG_NOSIGMASK = 0x00400000U  /* avoid modifying the signal mask */
};

/* method bits to be ored together */
enum {
  EVBACKEND_SELECT  = 0x00000001U, /* available just about anywhere */
  EVBACKEND_POLL    = 0x00000002U, /* !win, !aix, broken on osx */
  EVBACKEND_EPOLL   = 0x00000004U, /* linux */
  EVBACKEND_KQUEUE  = 0x00000008U, /* bsd, broken on osx */
  EVBACKEND_DEVPOLL = 0x00000010U, /* solaris 8 */ /* NYI */
  EVBACKEND_PORT    = 0x00000020U, /* solaris 10 */
  EVBACKEND_ALL     = 0x0000003FU, /* all known backends */
  EVBACKEND_MASK    = 0x0000FFFFU  /* all future backends */
};

#if EV_PROTOTYPES
EV_API_DECL int ev_version_major (void) EV_THROW;
EV_API_DECL int ev_version_minor (void) EV_THROW;

EV_API_DECL unsigned int ev_supported_backends (void) EV_THROW;
EV_API_DECL unsigned int ev_recommended_backends (void) EV_THROW;
EV_API_DECL unsigned int ev_embeddable_backends (void) EV_THROW;

EV_API_DECL ev_tstamp ev_time (void) EV_THROW;
EV_API_DECL void ev_sleep (ev_tstamp delay) EV_THROW; /* sleep for a while */

/* Sets the allocation function to use, works like realloc.
 * It is used to allocate and free memory.
 * If it returns zero when memory needs to be allocated, the library might abort
 * or take some potentially destructive action.
 * The default is your system realloc function.
 */
EV_API_DECL void ev_set_allocator (void *(*cb)(void *ptr, long size) EV_THROW) EV_THROW;

/* set the callback function to call on a
 * retryable syscall error
 * (such as failed select, poll, epoll_wait)
 */
EV_API_DECL void ev_set_syserr_cb (void (*cb)(const char *msg) EV_THROW) EV_THROW;

#if EV_MULTIPLICITY

/* the default loop is the only one that handles signals and child watchers */
/* you can call this as often as you like */
EV_API_DECL struct ev_loop *ev_default_loop (unsigned int flags EV_CPP (= 0)) EV_THROW;

#ifdef EV_API_STATIC
EV_API_DECL struct ev_loop *ev_default_loop_ptr;
#endif

EV_INLINE struct ev_loop *
ev_default_loop_uc_ (void) EV_THROW
{
  extern struct ev_loop *ev_default_loop_ptr;

  return ev_default_loop_ptr;
}

EV_INLINE int
ev_is_default_loop (EV_P) EV_THROW
{
  return EV_A == EV_DEFAULT_UC;
}

/* create and destroy alternative loops that don't handle signals */
EV_API_DECL struct ev_loop *ev_loop_new (unsigned int flags EV_CPP (= 0)) EV_THROW;

EV_API_DECL ev_tstamp ev_now (EV_P) EV_THROW; /* time w.r.t. timers and the eventloop, updated after each poll */

#else

EV_API_DECL int ev_default_loop (unsigned int flags EV_CPP (= 0)) EV_THROW; /* returns true when successful */

EV_API_DECL ev_tstamp ev_rt_now;

EV_INLINE ev_tstamp
ev_now (void) EV_THROW
{
  return ev_rt_now;
}

/* looks weird, but ev_is_default_loop (EV_A) still works if this exists */
EV_INLINE int
ev_is_default_loop (void) EV_THROW
{
  return 1;
}

#endif /* multiplicity */

/* destroy event loops, also works for the default loop */
EV_API_DECL void ev_loop_destroy (EV_P);

/* this needs to be called after fork, to duplicate the loop */
/* when you want to re-use it in the child */
/* you can call it in either the parent or the child */
/* you can actually call it at any time, anywhere :) */
EV_API_DECL void ev_loop_fork (EV_P) EV_THROW;

EV_API_DECL unsigned int ev_backend (EV_P) EV_THROW; /* backend in use by loop */

EV_API_DECL void ev_now_update (EV_P) EV_THROW; /* update event loop time */

#if EV_WALK_ENABLE
/* walk (almost) all watchers in the loop of a given type, invoking the */
/* callback on every such watcher. The callback might stop the watcher, */
/* but do nothing else with the loop */
EV_API_DECL void ev_walk (EV_P_ int types, void (*cb)(EV_P_ int type, void *w)) EV_THROW;
#endif

#endif /* prototypes */

/* ev_run flags values */
enum {
  EVRUN_NOWAIT = 1, /* do not block/wait */
  EVRUN_ONCE   = 2  /* block *once* only */
};

/* ev_break how values */
enum {
  EVBREAK_CANCEL = 0, /* undo unloop */
  EVBREAK_ONE    = 1, /* unloop once */
  EVBREAK_ALL    = 2  /* unloop all loops */
};

#if EV_PROTOTYPES
EV_API_DECL int  ev_run (EV_P_ int flags EV_CPP (= 0));
EV_API_DECL void ev_break (EV_P_ int how EV_CPP (= EVBREAK_ONE)) EV_THROW; /* break out of the loop */

/*
 * ref/unref can be used to add or remove a refcount on the mainloop. every watcher
 * keeps one reference. if you have a long-running watcher you never unregister that
 * should not keep ev_loop from running, unref() after starting, and ref() before stopping.
 */
EV_API_DECL void ev_ref   (EV_P) EV_THROW;
EV_API_DECL void ev_unref (EV_P) EV_THROW;

/*
 * convenience function, wait for a single event, without registering an event watcher
 * if timeout is < 0, do wait indefinitely
 */
EV_API_DECL void ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, void *arg), void *arg) EV_THROW;

# if EV_FEATURE_API
EV_API_DECL unsigned int ev_iteration (EV_P) EV_THROW; /* number of loop iterations */
EV_API_DECL unsigned int ev_depth     (EV_P) EV_THROW; /* #ev_loop enters - #ev_loop leaves */
EV_API_DECL void         ev_verify    (EV_P) EV_THROW; /* abort if loop data corrupted */

EV_API_DECL void ev_set_io_collect_interval (EV_P_ ev_tstamp interval) EV_THROW; /* sleep at least this time, default 0 */
EV_API_DECL void ev_set_timeout_collect_interval (EV_P_ ev_tstamp interval) EV_THROW; /* sleep at least this time, default 0 */

/* advanced stuff for threading etc. support, see docs */
EV_API_DECL void ev_set_userdata (EV_P_ void *data) EV_THROW;
EV_API_DECL void *ev_userdata (EV_P) EV_THROW;
typedef void (*ev_loop_callback)(EV_P);
EV_API_DECL void ev_set_invoke_pending_cb (EV_P_ ev_loop_callback invoke_pending_cb) EV_THROW;
/* C++ doesn't allow the use of the ev_loop_callback typedef here, so we need to spell it out */
EV_API_DECL void ev_set_loop_release_cb (EV_P_ void (*release)(EV_P) EV_THROW, void (*acquire)(EV_P) EV_THROW) EV_THROW;

EV_API_DECL unsigned int ev_pending_count (EV_P) EV_THROW; /* number of pending events, if any */
EV_API_DECL void ev_invoke_pending (EV_P); /* invoke all pending watchers */

/*
 * stop/start the timer handling.
 */
EV_API_DECL void ev_suspend (EV_P) EV_THROW;
EV_API_DECL void ev_resume  (EV_P) EV_THROW;
#endif

#endif

/* these may evaluate ev multiple times, and the other arguments at most once */
/* either use ev_init + ev_TYPE_set, or the ev_TYPE_init macro, below, to first initialise a watcher */
#define ev_init(ev,cb_) do {			\
  ((ev_watcher *)(void *)(ev))->active  =	\
  ((ev_watcher *)(void *)(ev))->pending = 0;	\
  ev_set_priority ((ev), 0);			\
  ev_set_cb ((ev), cb_);			\
} while (0)

#define ev_io_set(ev,fd_,events_)            do { (ev)->fd = (fd_); (ev)->events = (events_) | EV__IOFDSET; } while (0)
#define ev_timer_set(ev,after_,repeat_)      do { ((ev_watcher_time *)(ev))->at = (after_); (ev)->repeat = (repeat_); } while (0)
#define ev_periodic_set(ev,ofs_,ival_,rcb_)  do { (ev)->offset = (ofs_); (ev)->interval = (ival_); (ev)->reschedule_cb = (rcb_); } while (0)
#define ev_signal_set(ev,signum_)            do { (ev)->signum = (signum_); } while (0)
#define ev_child_set(ev,pid_,trace_)         do { (ev)->pid = (pid_); (ev)->flags = !!(trace_); } while (0)
#define ev_stat_set(ev,path_,interval_)      do { (ev)->path = (path_); (ev)->interval = (interval_); (ev)->wd = -2; } while (0)
#define ev_idle_set(ev)                      /* nop, yes, this is a serious in-joke */
#define ev_prepare_set(ev)                   /* nop, yes, this is a serious in-joke */
#define ev_check_set(ev)                     /* nop, yes, this is a serious in-joke */
#define ev_embed_set(ev,other_)              do { (ev)->other = (other_); } while (0)
#define ev_fork_set(ev)                      /* nop, yes, this is a serious in-joke */
#define ev_cleanup_set(ev)                   /* nop, yes, this is a serious in-joke */
#define ev_async_set(ev)                     /* nop, yes, this is a serious in-joke */

#define ev_io_init(ev,cb,fd,events)          do { ev_init ((ev), (cb)); ev_io_set ((ev),(fd),(events)); } while (0)
#define ev_timer_init(ev,cb,after,repeat)    do { ev_init ((ev), (cb)); ev_timer_set ((ev),(after),(repeat)); } while (0)
#define ev_periodic_init(ev,cb,ofs,ival,rcb) do { ev_init ((ev), (cb)); ev_periodic_set ((ev),(ofs),(ival),(rcb)); } while (0)
#define ev_signal_init(ev,cb,signum)         do { ev_init ((ev), (cb)); ev_signal_set ((ev), (signum)); } while (0)
#define ev_child_init(ev,cb,pid,trace)       do { ev_init ((ev), (cb)); ev_child_set ((ev),(pid),(trace)); } while (0)
#define ev_stat_init(ev,cb,path,interval)    do { ev_init ((ev), (cb)); ev_stat_set ((ev),(path),(interval)); } while (0)
#define ev_idle_init(ev,cb)                  do { ev_init ((ev), (cb)); ev_idle_set ((ev)); } while (0)
#define ev_prepare_init(ev,cb)               do { ev_init ((ev), (cb)); ev_prepare_set ((ev)); } while (0)
#define ev_check_init(ev,cb)                 do { ev_init ((ev), (cb)); ev_check_set ((ev)); } while (0)
#define ev_embed_init(ev,cb,other)           do { ev_init ((ev), (cb)); ev_embed_set ((ev),(other)); } while (0)
#define ev_fork_init(ev,cb)                  do { ev_init ((ev), (cb)); ev_fork_set ((ev)); } while (0)
#define ev_cleanup_init(ev,cb)               do { ev_init ((ev), (cb)); ev_cleanup_set ((ev)); } while (0)
#define ev_async_init(ev,cb)                 do { ev_init ((ev), (cb)); ev_async_set ((ev)); } while (0)

#define ev_is_pending(ev)                    (0 + ((ev_watcher *)(void *)(ev))->pending) /* ro, true when watcher is waiting for callback invocation */
#define ev_is_active(ev)                     (0 + ((ev_watcher *)(void *)(ev))->active) /* ro, true when the watcher has been started */

#define ev_cb_(ev)                           (ev)->cb /* rw */
#define ev_cb(ev)                            (memmove (&ev_cb_ (ev), &((ev_watcher *)(ev))->cb, sizeof (ev_cb_ (ev))), (ev)->cb)

#if EV_MINPRI == EV_MAXPRI
# define ev_priority(ev)                     ((ev), EV_MINPRI)
# define ev_set_priority(ev,pri)             ((ev), (pri))
#else
# define ev_priority(ev)                     (+(((ev_watcher *)(void *)(ev))->priority))
# define ev_set_priority(ev,pri)             (   (ev_watcher *)(void *)(ev))->priority = (pri)
#endif

#define ev_periodic_at(ev)                   (+((ev_watcher_time *)(ev))->at)

#ifndef ev_set_cb
# define ev_set_cb(ev,cb_)                   (ev_cb_ (ev) = (cb_), memmove (&((ev_watcher *)(ev))->cb, &ev_cb_ (ev), sizeof (ev_cb_ (ev))))
#endif

/* stopping (enabling, adding) a watcher does nothing if it is already running */
/* stopping (disabling, deleting) a watcher does nothing unless it's already running */
#if EV_PROTOTYPES

/* feeds an event into a watcher as if the event actually occurred */
/* accepts any ev_watcher type */
EV_API_DECL void ev_feed_event     (EV_P_ void *w, int revents) EV_THROW;
EV_API_DECL void ev_feed_fd_event  (EV_P_ int fd, int revents) EV_THROW;
#if EV_SIGNAL_ENABLE
EV_API_DECL void ev_feed_signal    (int signum) EV_THROW;
EV_API_DECL void ev_feed_signal_event (EV_P_ int signum) EV_THROW;
#endif
EV_API_DECL void ev_invoke         (EV_P_ void *w, int revents);
EV_API_DECL int  ev_clear_pending  (EV_P_ void *w) EV_THROW;

EV_API_DECL void ev_io_start       (EV_P_ ev_io *w) EV_THROW;
EV_API_DECL void ev_io_stop        (EV_P_ ev_io *w) EV_THROW;

EV_API_DECL void ev_timer_start    (EV_P_ ev_timer *w) EV_THROW;
EV_API_DECL void ev_timer_stop     (EV_P_ ev_timer *w) EV_THROW;
/* stops if active and no repeat, restarts if active and repeating, starts if inactive and repeating */
EV_API_DECL void ev_timer_again    (EV_P_ ev_timer *w) EV_THROW;
/* return remaining time */
EV_API_DECL ev_tstamp ev_timer_remaining (EV_P_ ev_timer *w) EV_THROW;

#if EV_PERIODIC_ENABLE
EV_API_DECL void ev_periodic_start (EV_P_ ev_periodic *w) EV_THROW;
EV_API_DECL void ev_periodic_stop  (EV_P_ ev_periodic *w) EV_THROW;
EV_API_DECL void ev_periodic_again (EV_P_ ev_periodic *w) EV_THROW;
#endif

/* only supported in the default loop */
#if EV_SIGNAL_ENABLE
EV_API_DECL void ev_signal_start   (EV_P_ ev_signal *w) EV_THROW;
EV_API_DECL void ev_signal_stop    (EV_P_ ev_signal *w) EV_THROW;
#endif

/* only supported in the default loop */
# if EV_CHILD_ENABLE
EV_API_DECL void ev_child_start    (EV_P_ ev_child *w) EV_THROW;
EV_API_DECL void ev_child_stop     (EV_P_ ev_child *w) EV_THROW;
# endif

# if EV_STAT_ENABLE
EV_API_DECL void ev_stat_start     (EV_P_ ev_stat *w) EV_THROW;
EV_API_DECL void ev_stat_stop      (EV_P_ ev_stat *w) EV_THROW;
EV_API_DECL void ev_stat_stat      (EV_P_ ev_stat *w) EV_THROW;
# endif

# if EV_IDLE_ENABLE
EV_API_DECL void ev_idle_start     (EV_P_ ev_idle *w) EV_THROW;
EV_API_DECL void ev_idle_stop      (EV_P_ ev_idle *w) EV_THROW;
# endif

#if EV_PREPARE_ENABLE
EV_API_DECL void ev_prepare_start  (EV_P_ ev_prepare *w) EV_THROW;
EV_API_DECL void ev_prepare_stop   (EV_P_ ev_prepare *w) EV_THROW;
#endif

#if EV_CHECK_ENABLE
EV_API_DECL void ev_check_start    (EV_P_ ev_check *w) EV_THROW;
EV_API_DECL void ev_check_stop     (EV_P_ ev_check *w) EV_THROW;
#endif

# if EV_FORK_ENABLE
EV_API_DECL void ev_fork_start     (EV_P_ ev_fork *w) EV_THROW;
EV_API_DECL void ev_fork_stop      (EV_P_ ev_fork *w) EV_THROW;
# endif

# if EV_CLEANUP_ENABLE
EV_API_DECL void ev_cleanup_start  (EV_P_ ev_cleanup *w) EV_THROW;
EV_API_DECL void ev_cleanup_stop   (EV_P_ ev_cleanup *w) EV_THROW;
# endif

# if EV_EMBED_ENABLE
/* only supported when loop to be embedded is in fact embeddable */
EV_API_DECL void ev_embed_start    (EV_P_ ev_embed *w) EV_THROW;
EV_API_DECL void ev_embed_stop     (EV_P_ ev_embed *w) EV_THROW;
EV_API_DECL void ev_embed_sweep    (EV_P_ ev_embed *w) EV_THROW;
# endif

# if EV_ASYNC_ENABLE
EV_API_DECL void ev_async_start    (EV_P_ ev_async *w) EV_THROW;
EV_API_DECL void ev_async_stop     (EV_P_ ev_async *w) EV_THROW;
EV_API_DECL void ev_async_send     (EV_P_ ev_async *w) EV_THROW;
# endif

#if EV_COMPAT3
  #define EVLOOP_NONBLOCK EVRUN_NOWAIT
  #define EVLOOP_ONESHOT  EVRUN_ONCE
  #define EVUNLOOP_CANCEL EVBREAK_CANCEL
  #define EVUNLOOP_ONE    EVBREAK_ONE
  #define EVUNLOOP_ALL    EVBREAK_ALL
  #if EV_PROTOTYPES
    EV_INLINE void ev_loop   (EV_P_ int flags) { ev_run   (EV_A_ flags); }
    EV_INLINE void ev_unloop (EV_P_ int how  ) { ev_break (EV_A_ how  ); }
    EV_INLINE void ev_default_destroy (void) { ev_loop_destroy (EV_DEFAULT); }
    EV_INLINE void ev_default_fork    (void) { ev_loop_fork    (EV_DEFAULT); }
    #if EV_FEATURE_API
      EV_INLINE unsigned int ev_loop_count  (EV_P) { return ev_iteration  (EV_A); }
      EV_INLINE unsigned int ev_loop_depth  (EV_P) { return ev_depth      (EV_A); }
      EV_INLINE void         ev_loop_verify (EV_P) {        ev_verify     (EV_A); }
    #endif
  #endif
#else
  typedef struct ev_loop ev_loop;
#endif

#endif

EV_CPP(})

#endif


================================================
FILE: libev/ev.pod
================================================
=encoding utf-8

=head1 NAME

libev - a high performance full-featured event loop written in C

=head1 SYNOPSIS

   #include <ev.h>

=head2 EXAMPLE PROGRAM

   // a single header file is required
   #include <ev.h>

   #include <stdio.h> // for puts

   // every watcher type has its own typedef'd struct
   // with the name ev_TYPE
   ev_io stdin_watcher;
   ev_timer timeout_watcher;

   // all watcher callbacks have a similar signature
   // this callback is called when data is readable on stdin
   static void
   stdin_cb (EV_P_ ev_io *w, int revents)
   {
     puts ("stdin ready");
     // for one-shot events, one must manually stop the watcher
     // with its corresponding stop function.
     ev_io_stop (EV_A_ w);

     // this causes all nested ev_run's to stop iterating
     ev_break (EV_A_ EVBREAK_ALL);
   }

   // another callback, this time for a time-out
   static void
   timeout_cb (EV_P_ ev_timer *w, int revents)
   {
     puts ("timeout");
     // this causes the innermost ev_run to stop iterating
     ev_break (EV_A_ EVBREAK_ONE);
   }

   int
   main (void)
   {
     // use the default event loop unless you have special needs
     struct ev_loop *loop = EV_DEFAULT;

     // initialise an io watcher, then start it
     // this one will watch for stdin to become readable
     ev_io_init (&stdin_watcher, stdin_cb, /*STDIN_FILENO*/ 0, EV_READ);
     ev_io_start (loop, &stdin_watcher);

     // initialise a timer watcher, then start it
     // simple non-repeating 5.5 second timeout
     ev_timer_init (&timeout_watcher, timeout_cb, 5.5, 0.);
     ev_timer_start (loop, &timeout_watcher);

     // now wait for events to arrive
     ev_run (loop, 0);

     // break was called, so exit
     return 0;
   }

=head1 ABOUT THIS DOCUMENT

This document documents the libev software package.

The newest version of this document is also available as an html-formatted
web page you might find easier to navigate when reading it for the first
time: L<http://pod.tst.eu/http://cvs.schmorp.de/libev/ev.pod>.

While this document tries to be as complete as possible in documenting
libev, its usage and the rationale behind its design, it is not a tutorial
on event-based programming, nor will it introduce event-based programming
with libev.

Familiarity with event based programming techniques in general is assumed
throughout this document.

=head1 WHAT TO READ WHEN IN A HURRY

This manual tries to be very detailed, but unfortunately, this also makes
it very long. If you just want to know the basics of libev, I suggest
reading L</ANATOMY OF A WATCHER>, then the L</EXAMPLE PROGRAM> above and
look up the missing functions in L</GLOBAL FUNCTIONS> and the C<ev_io> and
C<ev_timer> sections in L</WATCHER TYPES>.

=head1 ABOUT LIBEV

Libev is an event loop: you register interest in certain events (such as a
file descriptor being readable or a timeout occurring), and it will manage
these event sources and provide your program with events.

To do this, it must take more or less complete control over your process
(or thread) by executing the I<event loop> handler, and will then
communicate events via a callback mechanism.

You register interest in certain events by registering so-called I<event
watchers>, which are relatively small C structures you initialise with the
details of the event, and then hand it over to libev by I<starting> the
watcher.

=head2 FEATURES

Libev supports C<select>, C<poll>, the Linux-specific C<epoll>, the
BSD-specific C<kqueue> and the Solaris-specific event port mechanisms
for file descriptor events (C<ev_io>), the Linux C<inotify> interface
(for C<ev_stat>), Linux eventfd/signalfd (for faster and cleaner
inter-thread wakeup (C<ev_async>)/signal handling (C<ev_signal>)) relative
timers (C<ev_timer>), absolute timers with customised rescheduling
(C<ev_periodic>), synchronous signals (C<ev_signal>), process status
change events (C<ev_child>), and event watchers dealing with the event
loop mechanism itself (C<ev_idle>, C<ev_embed>, C<ev_prepare> and
C<ev_check> watchers) as well as file watchers (C<ev_stat>) and even
limited support for fork events (C<ev_fork>).

It also is quite fast (see this
L<benchmark|http://libev.schmorp.de/bench.html> comparing it to libevent
for example).

=head2 CONVENTIONS

Libev is very configurable. In this manual the default (and most common)
configuration will be described, which supports multiple event loops. For
more info about various configuration options please have a look at
B<EMBED> section in this manual. If libev was configured without support
for multiple event loops, then all functions taking an initial argument of
name C<loop> (which is always of type C<struct ev_loop *>) will not have
this argument.

=head2 TIME REPRESENTATION

Libev represents time as a single floating point number, representing
the (fractional) number of seconds since the (POSIX) epoch (in practice
somewhere near the beginning of 1970, details are complicated, don't
ask). This type is called C<ev_tstamp>, which is what you should use
too. It usually aliases to the C<double> type in C. When you need to do
any calculations on it, you should treat it as some floating point value.

Unlike the name component C<stamp> might indicate, it is also used for
time differences (e.g. delays) throughout libev.

=head1 ERROR HANDLING

Libev knows three classes of errors: operating system errors, usage errors
and internal errors (bugs).

When libev catches an operating system error it cannot handle (for example
a system call indicating a condition libev cannot fix), it calls the callback
set via C<ev_set_syserr_cb>, which is supposed to fix the problem or
abort. The default is to print a diagnostic message and to call C<abort
()>.

When libev detects a usage error such as a negative timer interval, then
it will print a diagnostic message and abort (via the C<assert> mechanism,
so C<NDEBUG> will disable this checking): these are programming errors in
the libev caller and need to be fixed there.

Libev also has a few internal error-checking C<assert>ions, and also has
extensive consistency checking code. These do not trigger under normal
circumstances, as they indicate either a bug in libev or worse.


=head1 GLOBAL FUNCTIONS

These functions can be called anytime, even before initialising the
library in any way.

=over 4

=item ev_tstamp ev_time ()

Returns the current time as libev would use it. Please note that the
C<ev_now> function is usually faster and also often returns the timestamp
you actually want to know. Also interesting is the combination of
C<ev_now_update> and C<ev_now>.

=item ev_sleep (ev_tstamp interval)

Sleep for the given interval: The current thread will be blocked
until either it is interrupted or the given time interval has
passed (approximately - it might return a bit earlier even if not
interrupted). Returns immediately if C<< interval <= 0 >>.

Basically this is a sub-second-resolution C<sleep ()>.

The range of the C<interval> is limited - libev only guarantees to work
with sleep times of up to one day (C<< interval <= 86400 >>).

=item int ev_version_major ()

=item int ev_version_minor ()

You can find out the major and minor ABI version numbers of the library
you linked against by calling the functions C<ev_version_major> and
C<ev_version_minor>. If you want, you can compare against the global
symbols C<EV_VERSION_MAJOR> and C<EV_VERSION_MINOR>, which specify the
version of the library your program was compiled against.

These version numbers refer to the ABI version of the library, not the
release version.

Usually, it's a good idea to terminate if the major versions mismatch,
as this indicates an incompatible change. Minor versions are usually
compatible to older versions, so a larger minor version alone is usually
not a problem.

Example: Make sure we haven't accidentally been linked against the wrong
version (note, however, that this will not detect other ABI mismatches,
such as LFS or reentrancy).

   assert (("libev version mismatch",
            ev_version_major () == EV_VERSION_MAJOR
            && ev_version_minor () >= EV_VERSION_MINOR));

=item unsigned int ev_supported_backends ()

Return the set of all backends (i.e. their corresponding C<EV_BACKEND_*>
value) compiled into this binary of libev (independent of their
availability on the system you are running on). See C<ev_default_loop> for
a description of the set values.

Example: make sure we have the epoll method, because yeah this is cool and
a must have and can we have a torrent of it please!!!11

   assert (("sorry, no epoll, no sex",
            ev_supported_backends () & EVBACKEND_EPOLL));

=item unsigned int ev_recommended_backends ()

Return the set of all backends compiled into this binary of libev and
also recommended for this platform, meaning it will work for most file
descriptor types. This set is often smaller than the one returned by
C<ev_supported_backends>, as for example kqueue is broken on most BSDs
and will not be auto-detected unless you explicitly request it (assuming
you know what you are doing). This is the set of backends that libev will
probe for if you specify no backends explicitly.

=item unsigned int ev_embeddable_backends ()

Returns the set of backends that are embeddable in other event loops. This
value is platform-specific but can include backends not available on the
current system. To find which embeddable backends might be supported on
the current system, you would need to look at C<ev_embeddable_backends ()
& ev_supported_backends ()>, likewise for recommended ones.

See the description of C<ev_embed> watchers for more info.

=item ev_set_allocator (void *(*cb)(void *ptr, long size) throw ())

Sets the allocation function to use (the prototype is similar - the
semantics are identical to the C<realloc> C89/SuS/POSIX function). It is
used to allocate and free memory (no surprises here). If it returns zero
when memory needs to be allocated (C<size != 0>), the library might abort
or take some potentially destructive action.

Since some systems (at least OpenBSD and Darwin) fail to implement
correct C<realloc> semantics, libev will use a wrapper around the system
C<realloc> and C<free> functions by default.

You could override this function in high-availability programs to, say,
free some memory if it cannot allocate memory, to use a special allocator,
or even to sleep a while and retry until some memory is available.

Example: Replace the libev allocator with one that waits a bit and then
retries (example requires a standards-compliant C<realloc>).

   static void *
   persistent_realloc (void *ptr, size_t size)
   {
     for (;;)
       {
         void *newptr = realloc (ptr, size);

         if (newptr)
           return newptr;

         sleep (60);
       }
   }

   ...
   ev_set_allocator (persistent_realloc);

=item ev_set_syserr_cb (void (*cb)(const char *msg) throw ())

Set the callback function to call on a retryable system call error (such
as failed select, poll, epoll_wait). The message is a printable string
indicating the system call or subsystem causing the problem. If this
callback is set, then libev will expect it to remedy the situation, no
matter what, when it returns. That is, libev will generally retry the
requested operation, or, if the condition doesn't go away, do bad stuff
(such as abort).

Example: This is basically the same thing that libev does internally, too.

   static void
   fatal_error (const char *msg)
   {
     perror (msg);
     abort ();
   }

   ...
   ev_set_syserr_cb (fatal_error);

=item ev_feed_signal (int signum)

This function can be used to "simulate" a signal receive. It is completely
safe to call this function at any time, from any context, including signal
handlers or random threads.

Its main use is to customise signal handling in your process, especially
in the presence of threads. For example, you could block signals
by default in all threads (and specifying C<EVFLAG_NOSIGMASK> when
creating any loops), and in one thread, use C<sigwait> or any other
mechanism to wait for signals, then "deliver" them to libev by calling
C<ev_feed_signal>.

=back

=head1 FUNCTIONS CONTROLLING EVENT LOOPS

An event loop is described by a C<struct ev_loop *> (the C<struct> is
I<not> optional in this case unless libev 3 compatibility is disabled, as
libev 3 had an C<ev_loop> function colliding with the struct name).

The library knows two types of such loops, the I<default> loop, which
supports child process events, and dynamically created event loops which
do not.

=over 4

=item struct ev_loop *ev_default_loop (unsigned int flags)

This returns the "default" event loop object, which is what you should
normally use when you just need "the event loop". Event loop objects and
the C<flags> parameter are described in more detail in the entry for
C<ev_loop_new>.

If the default loop is already initialised then this function simply
returns it (and ignores the flags. If that is troubling you, check
C<ev_backend ()> afterwards). Otherwise it will create it with the given
flags, which should almost always be C<0>, unless the caller is also the
one calling C<ev_run> or otherwise qualifies as "the main program".

If you don't know what event loop to use, use the one returned from this
function (or via the C<EV_DEFAULT> macro).

Note that this function is I<not> thread-safe, so if you want to use it
from multiple threads, you have to employ some kind of mutex (note also
that this case is unlikely, as loops cannot be shared easily between
threads anyway).

The default loop is the only loop that can handle C<ev_child> watchers,
and to do this, it always registers a handler for C<SIGCHLD>. If this is
a problem for your application you can either create a dynamic loop with
C<ev_loop_new> which doesn't do that, or you can simply overwrite the
C<SIGCHLD> signal handler I<after> calling C<ev_default_init>.

Example: This is the most typical usage.

   if (!ev_default_loop (0))
     fatal ("could not initialise libev, bad $LIBEV_FLAGS in environment?");

Example: Restrict libev to the select and poll backends, and do not allow
environment settings to be taken into account:

   ev_default_loop (EVBACKEND_POLL | EVBACKEND_SELECT | EVFLAG_NOENV);

=item struct ev_loop *ev_loop_new (unsigned int flags)

This will create and initialise a new event loop object. If the loop
could not be initialised, returns false.

This function is thread-safe, and one common way to use libev with
threads is indeed to create one loop per thread, and using the default
loop in the "main" or "initial" thread.

The flags argument can be used to specify special behaviour or specific
backends to use, and is usually specified as C<0> (or C<EVFLAG_AUTO>).

The following flags are supported:

=over 4

=item C<EVFLAG_AUTO>

The default flags value. Use this if you have no clue (it's the right
thing, believe me).

=item C<EVFLAG_NOENV>

If this flag bit is or'ed into the flag value (or the program runs setuid
or setgid) then libev will I<not> look at the environment variable
C<LIBEV_FLAGS>. Otherwise (the default), this environment variable will
override the flags completely if it is found in the environment. This is
useful to try out specific backends to test their performance, to work
around bugs, or to make libev threadsafe (accessing environment variables
cannot be done in a threadsafe way, but usually it works if no other
thread modifies them).

=item C<EVFLAG_FORKCHECK>

Instead of calling C<ev_loop_fork> manually after a fork, you can also
make libev check for a fork in each iteration by enabling this flag.

This works by calling C<getpid ()> on every iteration of the loop,
and thus this might slow down your event loop if you do a lot of loop
iterations and little real work, but is usually not noticeable (on my
GNU/Linux system for example, C<getpid> is actually a simple 5-insn
sequence without a system call and thus I<very> fast, but my GNU/Linux
system also has C<pthread_atfork> which is even faster). (Update: glibc
versions 2.25 apparently removed the C<getpid> optimisation again).

The big advantage of this flag is that you can forget about fork (and
forget about forgetting to tell libev about forking, although you still
have to ignore C<SIGPIPE>) when you use this flag.

This flag setting cannot be overridden or specified in the C<LIBEV_FLAGS>
environment variable.

=item C<EVFLAG_NOINOTIFY>

When this flag is specified, then libev will not attempt to use the
I<inotify> API for its C<ev_stat> watchers. Apart from debugging and
testing, this flag can be useful to conserve inotify file descriptors, as
otherwise each loop using C<ev_stat> watchers consumes one inotify handle.

=item C<EVFLAG_SIGNALFD>

When this flag is specified, then libev will attempt to use the
I<signalfd> API for its C<ev_signal> (and C<ev_child>) watchers. This API
delivers signals synchronously, which makes it both faster and might make
it possible to get the queued signal data. It can also simplify signal
handling with threads, as long as you properly block signals in your
threads that are not interested in handling them.

Signalfd will not be used by default as this changes your signal mask, and
there are a lot of shoddy libraries and programs (glib's threadpool for
example) that can't properly initialise their signal masks.

=item C<EVFLAG_NOSIGMASK>

When this flag is specified, then libev will avoid to modify the signal
mask. Specifically, this means you have to make sure signals are unblocked
when you want to receive them.

This behaviour is useful when you want to do your own signal handling, or
want to handle signals only in specific threads and want to avoid libev
unblocking the signals.

It's also required by POSIX in a threaded program, as libev calls
C<sigprocmask>, whose behaviour is officially unspecified.

This flag's behaviour will become the default in future versions of libev.

=item C<EVBACKEND_SELECT>  (value 1, portable select backend)

This is your standard select(2) backend. Not I<completely> standard, as
libev tries to roll its own fd_set with no limits on the number of fds,
but if that fails, expect a fairly low limit on the number of fds when
using this backend. It doesn't scale too well (O(highest_fd)), but its
usually the fastest backend for a low number of (low-numbered :) fds.

To get good performance out of this backend you need a high amount of
parallelism (most of the file descriptors should be busy). If you are
writing a server, you should C<accept ()> in a loop to accept as many
connections as possible during one iteration. You might also want to have
a look at C<ev_set_io_collect_interval ()> to increase the amount of
readiness notifications you get per iteration.

This backend maps C<EV_READ> to the C<readfds> set and C<EV_WRITE> to the
C<writefds> set (and to work around Microsoft Windows bugs, also onto the
C<exceptfds> set on that platform).

=item C<EVBACKEND_POLL>    (value 2, poll backend, available everywhere except on windows)

And this is your standard poll(2) backend. It's more complicated
than select, but handles sparse fds better and has no artificial
limit on the number of fds you can use (except it will slow down
considerably with a lot of inactive fds). It scales similarly to select,
i.e. O(total_fds). See the entry for C<EVBACKEND_SELECT>, above, for
performance tips.

This backend maps C<EV_READ> to C<POLLIN | POLLERR | POLLHUP>, and
C<EV_WRITE> to C<POLLOUT | POLLERR | POLLHUP>.

=item C<EVBACKEND_EPOLL>   (value 4, Linux)

Use the linux-specific epoll(7) interface (for both pre- and post-2.6.9
kernels).

For few fds, this backend is a bit little slower than poll and select, but
it scales phenomenally better. While poll and select usually scale like
O(total_fds) where total_fds is the total number of fds (or the highest
fd), epoll scales either O(1) or O(active_fds).

The epoll mechanism deserves honorable mention as the most misdesigned
of the more advanced event mechanisms: mere annoyances include silently
dropping file descriptors, requiring a system call per change per file
descriptor (and unnecessary guessing of parameters), problems with dup,
returning before the timeout value, resulting in additional iterations
(and only giving 5ms accuracy while select on the same platform gives
0.1ms) and so on. The biggest issue is fork races, however - if a program
forks then I<both> parent and child process have to recreate the epoll
set, which can take considerable time (one syscall per file descriptor)
and is of course hard to detect.

Epoll is also notoriously buggy - embedding epoll fds I<should> work,
but of course I<doesn't>, and epoll just loves to report events for
totally I<different> file descriptors (even already closed ones, so
one cannot even remove them from the set) than registered in the set
(especially on SMP systems). Libev tries to counter these spurious
notifications by employing an additional generation counter and comparing
that against the events to filter out spurious ones, recreating the set
when required. Epoll also erroneously rounds down timeouts, but gives you
no way to know when and by how much, so sometimes you have to busy-wait
because epoll returns immediately despite a nonzero timeout. And last
not least, it also refuses to work with some file descriptors which work
perfectly fine with C<select> (files, many character devices...).

Epoll is truly the train wreck among event poll mechanisms, a frankenpoll,
cobbled together in a hurry, no thought to design or interaction with
others. Oh, the pain, will it ever stop...

While stopping, setting and starting an I/O watcher in the same iteration
will result in some caching, there is still a system call per such
incident (because the same I<file descriptor> could point to a different
I<file description> now), so its best to avoid that. Also, C<dup ()>'ed
file descriptors might not work very well if you register events for both
file descriptors.

Best performance from this backend is achieved by not unregistering all
watchers for a file descriptor until it has been closed, if possible,
i.e. keep at least one watcher active per fd at all times. Stopping and
starting a watcher (without re-setting it) also usually doesn't cause
extra overhead. A fork can both result in spurious notifications as well
as in libev having to destroy and recreate the epoll object, which can
take considerable time and thus should be avoided.

All this means that, in practice, C<EVBACKEND_SELECT> can be as fast or
faster than epoll for maybe up to a hundred file descriptors, depending on
the usage. So sad.

While nominally embeddable in other event loops, this feature is broken in
all kernel versions tested so far.

This backend maps C<EV_READ> and C<EV_WRITE> in the same way as
C<EVBACKEND_POLL>.

=item C<EVBACKEND_KQUEUE>  (value 8, most BSD clones)

Kqueue deserves special mention, as at the time of this writing, it
was broken on all BSDs except NetBSD (usually it doesn't work reliably
with anything but sockets and pipes, except on Darwin, where of course
it's completely useless). Unlike epoll, however, whose brokenness
is by design, these kqueue bugs can (and eventually will) be fixed
without API changes to existing programs. For this reason it's not being
"auto-detected" unless you explicitly specify it in the flags (i.e. using
C<EVBACKEND_KQUEUE>) or libev was compiled on a known-to-be-good (-enough)
system like NetBSD.

You still can embed kqueue into a normal poll or select backend and use it
only for sockets (after having made sure that sockets work with kqueue on
the target platform). See C<ev_embed> watchers for more info.

It scales in the same way as the epoll backend, but the interface to the
kernel is more efficient (which says nothing about its actual speed, of
course). While stopping, setting and starting an I/O watcher does never
cause an extra system call as with C<EVBACKEND_EPOLL>, it still adds up to
two event changes per incident. Support for C<fork ()> is very bad (you
might have to leak fd's on fork, but it's more sane than epoll) and it
drops fds silently in similarly hard-to-detect cases.

This backend usually performs well under most conditions.

While nominally embeddable in other event loops, this doesn't work
everywhere, so you might need to test for this. And since it is broken
almost everywhere, you should only use it when you have a lot of sockets
(for which it usually works), by embedding it into another event loop
(e.g. C<EVBACKEND_SELECT> or C<EVBACKEND_POLL> (but C<poll> is of course
also broken on OS X)) and, did I mention it, using it only for sockets.

This backend maps C<EV_READ> into an C<EVFILT_READ> kevent with
C<NOTE_EOF>, and C<EV_WRITE> into an C<EVFILT_WRITE> kevent with
C<NOTE_EOF>.

=item C<EVBACKEND_DEVPOLL> (value 16, Solaris 8)

This is not implemented yet (and might never be, unless you send me an
implementation). According to reports, C</dev/poll> only supports sockets
and is not embeddable, which would limit the usefulness of this backend
immensely.

=item C<EVBACKEND_PORT>    (value 32, Solaris 10)

This uses the Solaris 10 event port mechanism. As with everything on Solaris,
it's really slow, but it still scales very well (O(active_fds)).

While this backend scales well, it requires one system call per active
file descriptor per loop iteration. For small and medium numbers of file
descriptors a "slow" C<EVBACKEND_SELECT> or C<EVBACKEND_POLL> backend
might perform better.

On the positive side, this backend actually performed fully to
specification in all tests and is fully embeddable, which is a rare feat
among the OS-specific backends (I vastly prefer correctness over speed
hacks).

On the negative side, the interface is I<bizarre> - so bizarre that
even sun itself gets it wrong in their code examples: The event polling
function sometimes returns events to the caller even though an error
occurred, but with no indication whether it has done so or not (yes, it's
even documented that way) - deadly for edge-triggered interfaces where you
absolutely have to know whether an event occurred or not because you have
to re-arm the watcher.

Fortunately libev seems to be able to work around these idiocies.

This backend maps C<EV_READ> and C<EV_WRITE> in the same way as
C<EVBACKEND_POLL>.

=item C<EVBACKEND_ALL>

Try all backends (even potentially broken ones that wouldn't be tried
with C<EVFLAG_AUTO>). Since this is a mask, you can do stuff such as
C<EVBACKEND_ALL & ~EVBACKEND_KQUEUE>.

It is definitely not recommended to use this flag, use whatever
C<ev_recommended_backends ()> returns, or simply do not specify a backend
at all.

=item C<EVBACKEND_MASK>

Not a backend at all, but a mask to select all backend bits from a
C<flags> value, in case you want to mask out any backends from a flags
value (e.g. when modifying the C<LIBEV_FLAGS> environment variable).

=back

If one or more of the backend flags are or'ed into the flags value,
then only these backends will be tried (in the reverse order as listed
here). If none are specified, all backends in C<ev_recommended_backends
()> will be tried.

Example: Try to create a event loop that uses epoll and nothing else.

   struct ev_loop *epoller = ev_loop_new (EVBACKEND_EPOLL | EVFLAG_NOENV);
   if (!epoller)
     fatal ("no epoll found here, maybe it hides under your chair");

Example: Use whatever libev has to offer, but make sure that kqueue is
used if available.

   struct ev_loop *loop = ev_loop_new (ev_recommended_backends () | EVBACKEND_KQUEUE);

=item ev_loop_destroy (loop)

Destroys an event loop object (frees all memory and kernel state
etc.). None of the active event watchers will be stopped in the normal
sense, so e.g. C<ev_is_active> might still return true. It is your
responsibility to either stop all watchers cleanly yourself I<before>
calling this function, or cope with the fact afterwards (which is usually
the easiest thing, you can just ignore the watchers and/or C<free ()> them
for example).

Note that certain global state, such as signal state (and installed signal
handlers), will not be freed by this function, and related watchers (such
as signal and child watchers) would need to be stopped manually.

This function is normally used on loop objects allocated by
C<ev_loop_new>, but it can also be used on the default loop returned by
C<ev_default_loop>, in which case it is not thread-safe.

Note that it is not advisable to call this function on the default loop
except in the rare occasion where you really need to free its resources.
If you need dynamically allocated loops it is better to use C<ev_loop_new>
and C<ev_loop_destroy>.

=item ev_loop_fork (loop)

This function sets a flag that causes subsequent C<ev_run> iterations
to reinitialise the kernel state for backends that have one. Despite
the name, you can call it anytime you are allowed to start or stop
watchers (except inside an C<ev_prepare> callback), but it makes most
sense after forking, in the child process. You I<must> call it (or use
C<EVFLAG_FORKCHECK>) in the child before resuming or calling C<ev_run>.

In addition, if you want to reuse a loop (via this function or
C<EVFLAG_FORKCHECK>), you I<also> have to ignore C<SIGPIPE>.

Again, you I<have> to call it on I<any> loop that you want to re-use after
a fork, I<even if you do not plan to use the loop in the parent>. This is
because some kernel interfaces *cough* I<kqueue> *cough* do funny things
during fork.

On the other hand, you only need to call this function in the child
process if and only if you want to use the event loop in the child. If
you just fork+exec or create a new loop in the child, you don't have to
call it at all (in fact, C<epoll> is so badly broken that it makes a
difference, but libev will usually detect this case on its own and do a
costly reset of the backend).

The function itself is quite fast and it's usually not a problem to call
it just in case after a fork.

Example: Automate calling C<ev_loop_fork> on the default loop when
using pthreads.

   static void
   post_fork_child (void)
   {
     ev_loop_fork (EV_DEFAULT);
   }

   ...
   pthread_atfork (0, 0, post_fork_child);

=item int ev_is_default_loop (loop)

Returns true when the given loop is, in fact, the default loop, and false
otherwise.

=item unsigned int ev_iteration (loop)

Returns the current iteration count for the event loop, which is identical
to the number of times libev did poll for new events. It starts at C<0>
and happily wraps around with enough iterations.

This value can sometimes be useful as a generation counter of sorts (it
"ticks" the number of loop iterations), as it roughly corresponds with
C<ev_prepare> and C<ev_check> calls - and is incremented between the
prepare and check phases.

=item unsigned int ev_depth (loop)

Returns the number of times C<ev_run> was entered minus the number of
times C<ev_run> was exited normally, in other words, the recursion depth.

Outside C<ev_run>, this number is zero. In a callback, this number is
C<1>, unless C<ev_run> was invoked recursively (or from another thread),
in which case it is higher.

Leaving C<ev_run> abnormally (setjmp/longjmp, cancelling the thread,
throwing an exception etc.), doesn't count as "exit" - consider this
as a hint to avoid such ungentleman-like behaviour unless it's really
convenient, in which case it is fully supported.

=item unsigned int ev_backend (loop)

Returns one of the C<EVBACKEND_*> flags indicating the event backend in
use.

=item ev_tstamp ev_now (loop)

Returns the current "event loop time", which is the time the event loop
received events and started processing them. This timestamp does not
change as long as callbacks are being processed, and this is also the base
time used for relative timers. You can treat it as the timestamp of the
event occurring (or more correctly, libev finding out about it).

=item ev_now_update (loop)

Establishes the current time by querying the kernel, updating the time
returned by C<ev_now ()> in the progress. This is a costly operation and
is usually done automatically within C<ev_run ()>.

This function is rarely useful, but when some event callback runs for a
very long time without entering the event loop, updating libev's idea of
the current time is a good idea.

See also L</The special problem of time updates> in the C<ev_timer> section.

=item ev_suspend (loop)

=item ev_resume (loop)

These two functions suspend and resume an event loop, for use when the
loop is not used for a while and timeouts should not be processed.

A typical use case would be an interactive program such as a game:  When
the user presses C<^Z> to suspend the game and resumes it an hour later it
would be best to handle timeouts as if no time had actually passed while
the program was suspended. This can be achieved by calling C<ev_suspend>
in your C<SIGTSTP> handler, sending yourself a C<SIGSTOP> and calling
C<ev_resume> directly afterwards to resume timer processing.

Effectively, all C<ev_timer> watchers will be delayed by the time spend
between C<ev_suspend> and C<ev_resume>, and all C<ev_periodic> watchers
will be rescheduled (that is, they will lose any events that would have
occurred while suspended).

After calling C<ev_suspend> you B<must not> call I<any> function on the
given loop other than C<ev_resume>, and you B<must not> call C<ev_resume>
without a previous call to C<ev_suspend>.

Calling C<ev_suspend>/C<ev_resume> has the side effect of updating the
event loop time (see C<ev_now_update>).

=item bool ev_run (loop, int flags)

Finally, this is it, the event handler. This function usually is called
after you have initialised all your watchers and you want to start
handling events. It will ask the operating system for any new events, call
the watcher callbacks, and then repeat the whole process indefinitely: This
is why event loops are called I<loops>.

If the flags argument is specified as C<0>, it will keep handling events
until either no event watchers are active anymore or C<ev_break> was
called.

The return value is false if there are no more active watchers (which
usually means "all jobs done" or "deadlock"), and true in all other cases
(which usually means " you should call C<ev_run> again").

Please note that an explicit C<ev_break> is usually better than
relying on all watchers to be stopped when deciding when a program has
finished (especially in interactive programs), but having a program
that automatically loops as long as it has to and no longer by virtue
of relying on its watchers stopping correctly, that is truly a thing of
beauty.

This function is I<mostly> exception-safe - you can break out of a
C<ev_run> call by calling C<longjmp> in a callback, throwing a C++
exception and so on. This does not decrement the C<ev_depth> value, nor
will it clear any outstanding C<EVBREAK_ONE> breaks.

A flags value of C<EVRUN_NOWAIT> will look for new events, will handle
those events and any already outstanding ones, but will not wait and
block your process in case there are no events and will return after one
iteration of the loop. This is sometimes useful to poll and handle new
events while doing lengthy calculations, to keep the program responsive.

A flags value of C<EVRUN_ONCE> will look for new events (waiting if
necessary) and will handle those and any already outstanding ones. It
will block your process until at least one new event arrives (which could
be an event internal to libev itself, so there is no guarantee that a
user-registered callback will be called), and will return after one
iteration of the loop.

This is useful if you are waiting for some external event in conjunction
with something not expressible using other libev watchers (i.e. "roll your
own C<ev_run>"). However, a pair of C<ev_prepare>/C<ev_check> watchers is
usually a better approach for this kind of thing.

Here are the gory details of what C<ev_run> does (this is for your
understanding, not a guarantee that things will work exactly like this in
future versions):

   - Increment loop depth.
   - Reset the ev_break status.
   - Before the first iteration, call any pending watchers.
   LOOP:
   - If EVFLAG_FORKCHECK was used, check for a fork.
   - If a fork was detected (by any means), queue and call all fork watchers.
   - Queue and call all prepare watchers.
   - If ev_break was called, goto FINISH.
   - If we have been forked, detach and recreate the kernel state
     as to not disturb the other process.
   - Update the kernel state with all outstanding changes.
   - Update the "event loop time" (ev_now ()).
   - Calculate for how long to sleep or block, if at all
     (active idle watchers, EVRUN_NOWAIT or not having
     any active watchers at all will result in not sleeping).
   - Sleep if the I/O and timer collect interval say so.
   - Increment loop iteration counter.
   - Block the process, waiting for any events.
   - Queue all outstanding I/O (fd) events.
   - Update the "event loop time" (ev_now ()), and do time jump adjustments.
   - Queue all expired timers.
   - Queue all expired periodics.
   - Queue all idle watchers with priority higher than that of pending events.
   - Queue all check watchers.
   - Call all queued watchers in reverse order (i.e. check watchers first).
     Signals and child watchers are implemented as I/O watchers, and will
     be handled here by queueing them when their watcher gets executed.
   - If ev_break has been called, or EVRUN_ONCE or EVRUN_NOWAIT
     were used, or there are no active watchers, goto FINISH, otherwise
     continue with step LOOP.
   FINISH:
   - Reset the ev_break status iff it was EVBREAK_ONE.
   - Decrement the loop depth.
   - Return.

Example: Queue some jobs and then loop until no events are outstanding
anymore.

   ... queue jobs here, make sure they register event watchers as long
   ... as they still have work to do (even an idle watcher will do..)
   ev_run (my_loop, 0);
   ... jobs done or somebody called break. yeah!

=item ev_break (loop, how)

Can be used to make a call to C<ev_run> return early (but only after it
has processed all outstanding events). The C<how> argument must be either
C<EVBREAK_ONE>, which will make the innermost C<ev_run> call return, or
C<EVBREAK_ALL>, which will make all nested C<ev_run> calls return.

This "break state" will be cleared on the next call to C<ev_run>.

It is safe to call C<ev_break> from outside any C<ev_run> calls, too, in
which case it will have no effect.

=item ev_ref (loop)

=item ev_unref (loop)

Ref/unref can be used to add or remove a reference count on the event
loop: Every watcher keeps one reference, and as long as the reference
count is nonzero, C<ev_run> will not return on its own.

This is useful when you have a watcher that you never intend to
unregister, but that nevertheless should not keep C<ev_run> from
returning. In such a case, call C<ev_unref> after starting, and C<ev_ref>
before stopping it.

As an example, libev itself uses this for its internal signal pipe: It
is not visible to the libev user and should not keep C<ev_run> from
exiting if no event watchers registered by it are active. It is also an
excellent way to do this for generic recurring timers or from within
third-party libraries. Just remember to I<unref after start> and I<ref
before stop> (but only if the watcher wasn't active before, or was active
before, respectively. Note also that libev might stop watchers itself
(e.g. non-repeating timers) in which case you have to C<ev_ref>
in the callback).

Example: Create a signal watcher, but keep it from keeping C<ev_run>
running when nothing else is active.

   ev_signal exitsig;
   ev_signal_init (&exitsig, sig_cb, SIGINT);
   ev_signal_start (loop, &exitsig);
   ev_unref (loop);

Example: For some weird reason, unregister the above signal handler again.

   ev_ref (loop);
   ev_signal_stop (loop, &exitsig);

=item ev_set_io_collect_interval (loop, ev_tstamp interval)

=item ev_set_timeout_collect_interval (loop, ev_tstamp interval)

These advanced functions influence the time that libev will spend waiting
for events. Both time intervals are by default C<0>, meaning that libev
will try to invoke timer/periodic callbacks and I/O callbacks with minimum
latency.

Setting these to a higher value (the C<interval> I<must> be >= C<0>)
allows libev to delay invocation of I/O and timer/periodic callbacks
to increase efficiency of loop iterations (or to increase power-saving
opportunities).

The idea is that sometimes your program runs just fast enough to handle
one (or very few) event(s) per loop iteration. While this makes the
program responsive, it also wastes a lot of CPU time to poll for new
events, especially with backends like C<select ()> which have a high
overhead for the actual polling but can deliver many events at once.

By setting a higher I<io collect interval> you allow libev to spend more
time collecting I/O events, so you can handle more events per iteration,
at the cost of increasing latency. Timeouts (both C<ev_periodic> and
C<ev_timer>) will not be affected. Setting this to a non-null value will
introduce an additional C<ev_sleep ()> call into most loop iterations. The
sleep time ensures that libev will not poll for I/O events more often then
once per this interval, on average (as long as the host time resolution is
good enough).

Likewise, by setting a higher I<timeout collect interval> you allow libev
to spend more time collecting timeouts, at the expense of increased
latency/jitter/inexactness (the watcher callback will be called
later). C<ev_io> watchers will not be affected. Setting this to a non-null
value will not introduce any overhead in libev.

Many (busy) programs can usually benefit by setting the I/O collect
interval to a value near C<0.1> or so, which is often enough for
interactive servers (of course not for games), likewise for timeouts. It
usually doesn't make much sense to set it to a lower value than C<0.01>,
as this approaches the timing granularity of most systems. Note that if
you do transactions with the outside world and you can't increase the
parallelity, then this setting will limit your transaction rate (if you
need to poll once per transaction and the I/O collect interval is 0.01,
then you can't do more than 100 transactions per second).

Setting the I<timeout collect interval> can improve the opportunity for
saving power, as the program will "bundle" timer callback invocations that
are "near" in time together, by delaying some, thus reducing the number of
times the process sleeps and wakes up again. Another useful technique to
reduce iterations/wake-ups is to use C<ev_periodic> watchers and make sure
they fire on, say, one-second boundaries only.

Example: we only need 0.1s timeout granularity, and we wish not to poll
more often than 100 times per second:

   ev_set_timeout_collect_interval (EV_DEFAULT_UC_ 0.1);
   ev_set_io_collect_interval (EV_DEFAULT_UC_ 0.01);

=item ev_invoke_pending (loop)

This call will simply invoke all pending watchers while resetting their
pending state. Normally, C<ev_run> does this automatically when required,
but when overriding the invoke callback this call comes handy. This
function can be invoked from a watcher - this can be useful for example
when you want to do some lengthy calculation and want to pass further
event handling to another thread (you still have to make sure only one
thread executes within C<ev_invoke_pending> or C<ev_run> of course).

=item int ev_pending_count (loop)

Returns the number of pending watchers - zero indicates that no watchers
are pending.

=item ev_set_invoke_pending_cb (loop, void (*invoke_pending_cb)(EV_P))

This overrides the invoke pending functionality of the loop: Instead of
invoking all pending watchers when there are any, C<ev_run> will call
this callback instead. This is useful, for example, when you want to
invoke the actual watchers inside another context (another thread etc.).

If you want to reset the callback, use C<ev_invoke_pending> as new
callback.

=item ev_set_loop_release_cb (loop, void (*release)(EV_P) throw (), void (*acquire)(EV_P) throw ())

Sometimes you want to share the same loop between multiple threads. This
can be done relatively simply by putting mutex_lock/unlock calls around
each call to a libev function.

However, C<ev_run> can run an indefinite time, so it is not feasible
to wait for it to return. One way around this is to wake up the event
loop via C<ev_break> and C<ev_async_send>, another way is to set these
I<release> and I<acquire> callbacks on the loop.

When set, then C<release> will be called just before the thread is
suspended waiting for new events, and C<acquire> is called just
afterwards.

Ideally, C<release> will just call your mutex_unlock function, and
C<acquire> will just call the mutex_lock function again.

While event loop modifications are allowed between invocations of
C<release> and C<acquire> (that's their only purpose after all), no
modifications done will affect the event loop, i.e. adding watchers will
have no effect on the set of file descriptors being watched, or the time
waited. Use an C<ev_async> watcher to wake up C<ev_run> when you want it
to take note of any changes you made.

In theory, threads executing C<ev_run> will be async-cancel safe between
invocations of C<release> and C<acquire>.

See also the locking example in the C<THREADS> section later in this
document.

=item ev_set_userdata (loop, void *data)

=item void *ev_userdata (loop)

Set and retrieve a single C<void *> associated with a loop. When
C<ev_set_userdata> has never been called, then C<ev_userdata> returns
C<0>.

These two functions can be used to associate arbitrary data with a loop,
and are intended solely for the C<invoke_pending_cb>, C<release> and
C<acquire> callbacks described above, but of course can be (ab-)used for
any other purpose as well.

=item ev_verify (loop)

This function only does something when C<EV_VERIFY> support has been
compiled in, which is the default for non-minimal builds. It tries to go
through all internal structures and checks them for validity. If anything
is found to be inconsistent, it will print an error message to standard
error and call C<abort ()>.

This can be used to catch bugs inside libev itself: under normal
circumstances, this function will never abort as of course libev keeps its
data structures consistent.

=back


=head1 ANATOMY OF A WATCHER

In the following description, uppercase C<TYPE> in names stands for the
watcher type, e.g. C<ev_TYPE_start> can mean C<ev_timer_start> for timer
watchers and C<ev_io_start> for I/O watchers.

A watcher is an opaque structure that you allocate and register to record
your interest in some event. To make a concrete example, imagine you want
to wait for STDIN to become readable, you would create an C<ev_io> watcher
for that:

   static void my_cb (struct ev_loop *loop, ev_io *w, int revents)
   {
     ev_io_stop (w);
     ev_break (loop, EVBREAK_ALL);
   }

   struct ev_loop *loop = ev_default_loop (0);

   ev_io stdin_watcher;

   ev_init (&stdin_watcher, my_cb);
   ev_io_set (&stdin_watcher, STDIN_FILENO, EV_READ);
   ev_io_start (loop, &stdin_watcher);

   ev_run (loop, 0);

As you can see, you are responsible for allocating the memory for your
watcher structures (and it is I<usually> a bad idea to do this on the
stack).

Each watcher has an associated watcher structure (called C<struct ev_TYPE>
or simply C<ev_TYPE>, as typedefs are provided for all watcher structs).

Each watcher structure must be initialised by a call to C<ev_init (watcher
*, callback)>, which expects a callback to be provided. This callback is
invoked each time the event occurs (or, in the case of I/O watchers, each
time the event loop detects that the file descriptor given is readable
and/or writable).

Each watcher type further has its own C<< ev_TYPE_set (watcher *, ...) >>
macro to configure it, with arguments specific to the watcher type. There
is also a macro to combine initialisation and setting in one call: C<<
ev_TYPE_init (watcher *, callback, ...) >>.

To make the watcher actually watch out for events, you have to start it
with a watcher-specific start function (C<< ev_TYPE_start (loop, watcher
*) >>), and you can stop watching for events at any time by calling the
corresponding stop function (C<< ev_TYPE_stop (loop, watcher *) >>.

As long as your watcher is active (has been started but not stopped) you
must not touch the values stored in it. Most specifically you must never
reinitialise it or call its C<ev_TYPE_set> macro.

Each and every callback receives the event loop pointer as first, the
registered watcher structure as second, and a bitset of received events as
third argument.

The received events usually include a single bit per event type received
(you can receive multiple events at the same time). The possible bit masks
are:

=over 4

=item C<EV_READ>

=item C<EV_WRITE>

The file descriptor in the C<ev_io> watcher has become readable and/or
writable.

=item C<EV_TIMER>

The C<ev_timer> watcher has timed out.

=item C<EV_PERIODIC>

The C<ev_periodic> watcher has timed out.

=item C<EV_SIGNAL>

The signal specified in the C<ev_signal> watcher has been received by a thread.

=item C<EV_CHILD>

The pid specified in the C<ev_child> watcher has received a status change.

=item C<EV_STAT>

The path specified in the C<ev_stat> watcher changed its attributes somehow.

=item C<EV_IDLE>

The C<ev_idle> watcher has determined that you have nothing better to do.

=item C<EV_PREPARE>

=item C<EV_CHECK>

All C<ev_prepare> watchers are invoked just I<before> C<ev_run> starts to
gather new events, and all C<ev_check> watchers are queued (not invoked)
just after C<ev_run> has gathered them, but before it queues any callbacks
for any received events. That means C<ev_prepare> watchers are the last
watchers invoked before the event loop sleeps or polls for new events, and
C<ev_check> watchers will be invoked before any other watchers of the same
or lower priority within an event loop iteration.

Callbacks of both watcher types can start and stop as many watchers as
they want, and all of them will be taken into account (for example, a
C<ev_prepare> watcher might start an idle watcher to keep C<ev_run> from
blocking).

=item C<EV_EMBED>

The embedded event loop specified in the C<ev_embed> watcher needs attention.

=item C<EV_FORK>

The event loop has been resumed in the child process after fork (see
C<ev_fork>).

=item C<EV_CLEANUP>

The event loop is about to be destroyed (see C<ev_cleanup>).

=item C<EV_ASYNC>

The given async watcher has been asynchronously notified (see C<ev_async>).

=item C<EV_CUSTOM>

Not ever sent (or otherwise used) by libev itself, but can be freely used
by libev users to signal watchers (e.g. via C<ev_feed_event>).

=item C<EV_ERROR>

An unspecified error has occurred, the watcher has been stopped. This might
happen because the watcher could not be properly started because libev
ran out of memory, a file descriptor was found to be closed or any other
problem. Libev considers these application bugs.

You best act on it by reporting the problem and somehow coping with the
watcher being stopped. Note that well-written programs should not receive
an error ever, so when your watcher receives it, this usually indicates a
bug in your program.

Libev will usually signal a few "dummy" events together with an error, for
example it might indicate that a fd is readable or writable, and if your
callbacks is well-written it can just attempt the operation and cope with
the error from read() or write(). This will not work in multi-threaded
programs, though, as the fd could already be closed and reused for another
thing, so beware.

=back

=head2 GENERIC WATCHER FUNCTIONS

=over 4

=item C<ev_init> (ev_TYPE *watcher, callback)

This macro initialises the generic portion of a watcher. The contents
of the watcher object can be arbitrary (so C<malloc> will do). Only
the generic parts of the watcher are initialised, you I<need> to call
the type-specific C<ev_TYPE_set> macro afterwards to initialise the
type-specific parts. For each type there is also a C<ev_TYPE_init> macro
which rolls both calls into one.

You can reinitialise a watcher at any time as long as it has been stopped
(or never started) and there are no pending events outstanding.

The callback is always of type C<void (*)(struct ev_loop *loop, ev_TYPE *watcher,
int revents)>.

Example: Initialise an C<ev_io> watcher in two steps.

   ev_io w;
   ev_init (&w, my_cb);
   ev_io_set (&w, STDIN_FILENO, EV_READ);

=item C<ev_TYPE_set> (ev_TYPE *watcher, [args])

This macro initialises the type-specific parts of a watcher. You need to
call C<ev_init> at least once before you call this macro, but you can
call C<ev_TYPE_set> any number of times. You must not, however, call this
macro on a watcher that is active (it can be pending, however, which is a
difference to the C<ev_init> macro).

Although some watcher types do not have type-specific arguments
(e.g. C<ev_prepare>) you still need to call its C<set> macro.

See C<ev_init>, above, for an example.

=item C<ev_TYPE_init> (ev_TYPE *watcher, callback, [args])

This convenience macro rolls both C<ev_init> and C<ev_TYPE_set> macro
calls into a single call. This is the most convenient method to initialise
a watcher. The same limitations apply, of course.

Example: Initialise and set an C<ev_io> watcher in one step.

   ev_io_init (&w, my_cb, STDIN_FILENO, EV_READ);

=item C<ev_TYPE_start> (loop, ev_TYPE *watcher)

Starts (activates) the given watcher. Only active watchers will receive
events. If the watcher is already active nothing will happen.

Example: Start the C<ev_io> watcher that is being abused as example in this
whole section.

   ev_io_start (EV_DEFAULT_UC, &w);

=item C<ev_TYPE_stop> (loop, ev_TYPE *watcher)

Stops the given watcher if active, and clears the pending status (whether
the watcher was active or not).

It is possible that stopped watchers are pending - for example,
non-repeating timers are being stopped when they become pending - but
calling C<ev_TYPE_stop> ensures that the watcher is neither active nor
pending. If you want to free or reuse the memory used by the watcher it is
therefore a good idea to always call its C<ev_TYPE_stop> function.

=item bool ev_is_active (ev_TYPE *watcher)

Returns a true value iff the watcher is active (i.e. it has been started
and not yet been stopped). As long as a watcher is active you must not modify
it.

=item bool ev_is_pending (ev_TYPE *watcher)

Returns a true value iff the watcher is pending, (i.e. it has outstanding
events but its callback has not yet been invoked). As long as a watcher
is pending (but not active) you must not call an init function on it (but
C<ev_TYPE_set> is safe), you must not change its priority, and you must
make sure the watcher is available to libev (e.g. you cannot C<free ()>
it).

=item callback ev_cb (ev_TYPE *watcher)

Returns the callback currently set on the watcher.

=item ev_set_cb (ev_TYPE *watcher, callback)

Change the callback. You can change the callback at virtually any time
(modulo threads).

=item ev_set_priority (ev_TYPE *watcher, int priority)

=item int ev_priority (ev_TYPE *watcher)

Set and query the priority of the watcher. The priority is a small
integer between C<EV_MAXPRI> (default: C<2>) and C<EV_MINPRI>
(default: C<-2>). Pending watchers with higher priority will be invoked
before watchers with lower priority, but priority will not keep watchers
from being executed (except for C<ev_idle> watchers).

If you need to suppress invocation when higher priority events are pending
you need to look at C<ev_idle> watchers, which provide this functionality.

You I<must not> change the priority of a watcher as long as it is active or
pending.

Setting a priority outside the range of C<EV_MINPRI> to C<EV_MAXPRI> is
fine, as long as you do not mind that the priority value you query might
or might not have been clamped to the valid range.

The default priority used by watchers when no priority has been set is
always C<0>, which is supposed to not be too high and not be too low :).

See L</WATCHER PRIORITY MODELS>, below, for a more thorough treatment of
priorities.

=item ev_invoke (loop, ev_TYPE *watcher, int revents)

Invoke the C<watcher> with the given C<loop> and C<revents>. Neither
C<loop> nor C<revents> need to be valid as long as the watcher callback
can deal with that fact, as both are simply passed through to the
callback.

=item int ev_clear_pending (loop, ev_TYPE *watcher)

If the watcher is pending, this function clears its pending status and
returns its C<revents> bitset (as if its callback was invoked). If the
watcher isn't pending it does nothing and returns C<0>.

Sometimes it can be useful to "poll" a watcher instead of waiting for its
callback to be invoked, which can be accomplished with this function.

=item ev_feed_event (loop, ev_TYPE *watcher, int revents)

Feeds the given event set into the event loop, as if the specified event
had happened for the specified watcher (which must be a pointer to an
initialised but not necessarily started event watcher). Obviously you must
not free the watcher as long as it has pending events.

Stopping the watcher, letting libev invoke it, or calling
C<ev_clear_pending> will clear the pending event, even if the watcher was
not started in the first place.

See also C<ev_feed_fd_event> and C<ev_feed_signal_event> for related
functions that do not need a watcher.

=back

See also the L</ASSOCIATING CUSTOM DATA WITH A WATCHER> and L</BUILDING YOUR
OWN COMPOSITE WATCHERS> idioms.

=head2 WATCHER STATES

There are various watcher states mentioned throughout this manual -
active, pending and so on. In this section these states and the rules to
transition between them will be described in more detail - and while these
rules might look complicated, they usually do "the right thing".

=over 4

=item initialised

Before a watcher can be registered with the event loop it has to be
initialised. This can be done with a call to C<ev_TYPE_init>, or calls to
C<ev_init> followed by the watcher-specific C<ev_TYPE_set> function.

In this state it is simply some block of memory that is suitable for
use in an event loop. It can be moved around, freed, reused etc. at
will - as long as you either keep the memory contents intact, or call
C<ev_TYPE_init> again.

=item started/running/active

Once a watcher has been started with a call to C<ev_TYPE_start> it becomes
property of the event loop, and is actively waiting for events. While in
this state it cannot be accessed (except in a few documented ways), moved,
freed or anything else - the only legal thing is to keep a pointer to it,
and call libev functions on it that are documented to work on active watchers.

=item pending

If a watcher is active and libev determines that an event it is interested
in has occurred (such as a timer expiring), it will become pending. It will
stay in this pending state until either it is stopped or its callback is
about to be invoked, so it is not normally pending inside the watcher
callback.

The watcher might or might not be active while it is pending (for example,
an expired non-repeating timer can be pending but no longer active). If it
is stopped, it can be freely accessed (e.g. by calling C<ev_TYPE_set>),
but it is still property of the event loop at this time, so cannot be
moved, freed or reused. And if it is active the rules described in the
previous item still apply.

It is also possible to feed an event on a watcher that is not active (e.g.
via C<ev_feed_event>), in which case it becomes pending without being
active.

=item stopped

A watcher can be stopped implicitly by libev (in which case it might still
be pending), or explicitly by calling its C<ev_TYPE_stop> function. The
latter will clear any pending state the watcher might be in, regardless
of whether it was active or not, so stopping a watcher explicitly before
freeing it is often a good idea.

While stopped (and not pending) the watcher is essentially in the
initialised state, that is, it can be reused, moved, modified in any way
you wish (but when you trash the memory block, you need to C<ev_TYPE_init>
it again).

=back

=head2 WATCHER PRIORITY MODELS

Many event loops support I<watcher priorities>, which are usually small
integers that influence the ordering of event callback invocation
between watchers in some way, all else being equal.

In libev, Watcher priorities can be set using C<ev_set_priority>. See its
description for the more technical details such as the actual priority
range.

There are two common ways how these these priorities are being interpreted
by event loops:

In the more common lock-out model, higher priorities "lock out" invocation
of lower priority watchers, which means as long as higher priority
watchers receive events, lower priority watchers are not being invoked.

The less common only-for-ordering model uses priorities solely to order
callback invocation within a single event loop iteration: Higher priority
watchers are invoked before lower priority ones, but they all get invoked
before polling for new events.

Libev uses the second (only-for-ordering) model for all its watchers
except for idle watchers (which use the lock-out model).

The rationale behind this is that implementing the lock-out model for
watchers is not well supported by most kernel interfaces, and most event
libraries will just poll for the same events again and again as long as
their callbacks have not been executed, which is very inefficient in the
common case of one high-priority watcher locking out a mass of lower
priority ones.

Static (ordering) priorities are most useful when you have two or more
watchers handling the same resource: a typical usage example is having an
C<ev_io> watcher to receive data, and an associated C<ev_timer> to handle
timeouts. Under load, data might be received while the program handles
other jobs, but since timers normally get invoked first, the timeout
handler will be executed before checking for data. In that case, giving
the timer a lower priority than the I/O watcher ensures that I/O will be
handled first even under adverse conditions (which is usually, but not
always, what you want).

Since idle watchers use the "lock-out" model, meaning that idle watchers
will only be executed when no same or higher priority watchers have
received events, they can be used to implement the "lock-out" model when
required.

For example, to emulate how many other event libraries handle priorities,
you can associate an C<ev_idle> watcher to each such watcher, and in
the normal watcher callback, you just start the idle watcher. The real
processing is done in the idle watcher callback. This causes libev to
continuously poll and process kernel event data for the watcher, but when
the lock-out case is known to be rare (which in turn is rare :), this is
workable.

Usually, however, the lock-out model implemented that way will perform
miserably under the type of load it was designed to handle. In that case,
it might be preferable to stop the real watcher before starting the
idle watcher, so the kernel will not have to process the event in case
the actual processing will be delayed for considerable time.

Here is an example of an I/O watcher that should run at a strictly lower
priority than the default, and which should only process data when no
other events are pending:

   ev_idle idle; // actual processing watcher
   ev_io io;     // actual event watcher

   static void
   io_cb (EV_P_ ev_io *w, int revents)
   {
     // stop the I/O watcher, we received the event, but
     // are not yet ready to handle it.
     ev_io_stop (EV_A_ w);

     // start the idle watcher to handle the actual event.
     // it will not be executed as long as other watchers
     // with the default priority are receiving events.
     ev_idle_start (EV_A_ &idle);
   }

   static void
   idle_cb (EV_P_ ev_idle *w, int revents)
   {
     // actual processing
     read (STDIN_FILENO, ...);

     // have to start the I/O watcher again, as
     // we have handled the event
     ev_io_start (EV_P_ &io);
   }

   // initialisation
   ev_idle_init (&idle, idle_cb);
   ev_io_init (&io, io_cb, STDIN_FILENO, EV_READ);
   ev_io_start (EV_DEFAULT_ &io);

In the "real" world, it might also be beneficial to start a timer, so that
low-priority connections can not be locked out forever under load. This
enables your program to keep a lower latency for important connections
during short periods of high load, while not completely locking out less
important ones.


=head1 WATCHER TYPES

This section describes each watcher in detail, but will not repeat
information given in the last section. Any initialisation/set macros,
functions and members specific to the watcher type are explained.

Members are additionally marked with either I<[read-only]>, meaning that,
while the watcher is active, you can look at the member and expect some
sensible content, but you must not modify it (you can modify it while the
watcher is stopped to your hearts content), or I<[read-write]>, which
means you can expect it to have some sensible content while the watcher
is active, but you can also modify it. Modifying it may not do something
sensible or take immediate effect (or do anything at all), but libev will
not crash or malfunction in any way.


=head2 C<ev_io> - is this file descriptor readable or writable?

I/O watchers check whether a file descriptor is readable or writable
in each iteration of the event loop, or, more precisely, when reading
would not block the process and writing would at least be able to write
some data. This behaviour is called level-triggering because you keep
receiving events as long as the condition persists. Remember you can stop
the watcher if you don't want to act on the event and neither want to
receive future events.

In general you can register as many read and/or write event watchers per
fd as you want (as long as you don't confuse yourself). Setting all file
descriptors to non-blocking mode is also usually a good idea (but not
required if you know what you are doing).

Another thing you have to watch out for is that it is quite easy to
receive "spurious" readiness notifications, that is, your callback might
be called with C<EV_READ> but a subsequent C<read>(2) will actually block
because there is no data. It is very easy to get into this situation even
with a relatively standard program structure. Thus it is best to always
use non-blocking I/O: An extra C<read>(2) returning C<EAGAIN> is far
preferable to a program hanging until some data arrives.

If you cannot run the fd in non-blocking mode (for example you should
not play around with an Xlib connection), then you have to separately
re-test whether a file descriptor is really ready with a known-to-be good
interface such as poll (fortunately in the case of Xlib, it already does
this on its own, so its quite safe to use). Some people additionally
use C<SIGALRM> and an interval timer, just to be sure you won't block
indefinitely.

But really, best use non-blocking mode.

=head3 The special problem of disappearing file descriptors

Some backends (e.g. kqueue, epoll) need to be told about closing a file
descriptor (either due to calling C<close> explicitly or any other means,
such as C<dup2>). The reason is that you register interest in some file
descriptor, but when it goes away, the operating system will silently drop
this interest. If another file descriptor with the same number then is
registered with libev, there is no efficient way to see that this is, in
fact, a different file descriptor.

To avoid having to explicitly tell libev about such cases, libev follows
the following policy:  Each time C<ev_io_set> is being called, libev
will assume that this is potentially a new file descriptor, otherwise
it is assumed that the file descriptor stays the same. That means that
you I<have> to call C<ev_io_set> (or C<ev_io_init>) when you change the
descriptor even if the file descriptor number itself did not change.

This is how one would do it normally anyway, the important point is that
the libev application should not optimise around libev but should leave
optimisations to libev.

=head3 The special problem of dup'ed file descriptors

Some backends (e.g. epoll), cannot register events for file descriptors,
but only events for the underlying file descriptions. That means when you
have C<dup ()>'ed file descriptors or weirder constellations, and register
events for them, only one file descriptor might actually receive events.

There is no workaround possible except not registering events
for potentially C<dup ()>'ed file descriptors, or to resort to
C<EVBACKEND_SELECT> or C<EVBACKEND_POLL>.

=head3 The special problem of files

Many people try to use C<select> (or libev) on file descriptors
representing files, and expect it to become ready when their program
doesn't block on disk accesses (which can take a long time on their own).

However, this cannot ever work in the "expected" way - you get a readiness
notification as soon as the kernel knows whether and how much data is
there, and in the case of open files, that's always the case, so you
always get a readiness notification instantly, and your read (or possibly
write) will still block on the disk I/O.

Another way to view it is that in the case of sockets, pipes, character
devices and so on, there is another party (the sender) that delivers data
on its own, but in the case of files, there is no such thing: the disk
will not send data on its own, simply because it doesn't know what you
wish to read - you would first have to request some data.

Since files are typically not-so-well supported by advanced notification
mechanism, libev tries hard to emulate POSIX behaviour with respect
to files, even though you should not use it. The reason for this is
convenience: sometimes you want to watch STDIN or STDOUT, which is
usually a tty, often a pipe, but also sometimes files or special devices
(for example, C<epoll> on Linux works with F</dev/random> but not with
F</dev/urandom>), and even though the file might better be served with
asynchronous I/O instead of with non-blocking I/O, it is still useful when
it "just works" instead of freezing.

So avoid file descriptors pointing to files when you know it (e.g. use
libeio), but use them when it is convenient, e.g. for STDIN/STDOUT, or
when you rarely read from a file instead of from a socket, and want to
reuse the same code path.

=head3 The special problem of fork

Some backends (epoll, kqueue) do not support C<fork ()> at all or exhibit
useless behaviour. Libev fully supports fork, but needs to be told about
it in the child if you want to continue to use it in the child.

To support fork in your child processes, you have to call C<ev_loop_fork
()> after a fork in the child, enable C<EVFLAG_FORKCHECK>, or resort to
C<EVBACKEND_SELECT> or C<EVBACKEND_POLL>.

=head3 The special problem of SIGPIPE

While not really specific to libev, it is easy to forget about C<SIGPIPE>:
when writing to a pipe whose other end has been closed, your program gets
sent a SIGPIPE, which, by default, aborts your program. For most programs
this is sensible behaviour, for daemons, this is usually undesirable.

So when you encounter spurious, unexplained daemon exits, make sure you
ignore SIGPIPE (and maybe make sure you log the exit status of your daemon
somewhere, as that would have given you a big clue).

=head3 The special problem of accept()ing when you can't

Many implementations of the POSIX C<accept> function (for example,
found in post-2004 Linux) have the peculiar behaviour of not removing a
connection from the pending queue in all error cases.

For example, larger servers often run out of file descriptors (because
of resource limits), causing C<accept> to fail with C<ENFILE> but not
rejecting the connection, leading to libev signalling readiness on
the next iteration again (the connection still exists after all), and
typically causing the program to loop at 100% CPU usage.

Unfortunately, the set of errors that cause this issue differs between
operating systems, there is usually little the app can do to remedy the
situation, and no known thread-safe method of removing the connection to
cope with overload is known (to me).

One of the easiest ways to handle this situation is to just ignore it
- when the program encounters an overload, it will just loop until the
situation is over. While this is a form of busy waiting, no OS offers an
event-based way to handle this situation, so it's the best one can do.

A better way to handle the situation is to log any errors other than
C<EAGAIN> and C<EWOULDBLOCK>, making sure not to flood the log with such
messages, and continue as usual, which at least gives the user an idea of
what could be wrong ("raise the ulimit!"). For extra points one could stop
the C<ev_io> watcher on the listening fd "for a while", which reduces CPU
usage.

If your program is single-threaded, then you could also keep a dummy file
descriptor for overload situations (e.g. by opening F</dev/null>), and
when you run into C<ENFILE> or C<EMFILE>, close it, run C<accept>,
close that fd, and create a new dummy fd. This will gracefully refuse
clients under typical overload conditions.

The last way to handle it is to simply log the error and C<exit>, as
is often done with C<malloc> failures, but this results in an easy
opportunity for a DoS attack.

=head3 Watcher-Specific Functions

=over 4

=item ev_io_init (ev_io *, callback, int fd, int events)

=item ev_io_set (ev_io *, int fd, int events)

Configures an C<ev_io> watcher. The C<fd> is the file descriptor to
receive events for and C<events> is either C<EV_READ>, C<EV_WRITE> or
C<EV_READ | EV_WRITE>, to express the desire to receive the given events.

=item int fd [read-only]

The file descriptor being watched.

=item int events [read-only]

The events being watched.

=back

=head3 Examples

Example: Call C<stdin_readable_cb> when STDIN_FILENO has become, well
readable, but only once. Since it is likely line-buffered, you could
attempt to read a whole line in the callback.

   static void
   stdin_readable_cb (struct ev_loop *loop, ev_io *w, int revents)
   {
      ev_io_stop (loop, w);
     .. read from stdin here (or from w->fd) and handle any I/O errors
   }

   ...
   struct ev_loop *loop = ev_default_init (0);
   ev_io stdin_readable;
   ev_io_init (&stdin_readable, stdin_readable_cb, STDIN_FILENO, EV_READ);
   ev_io_start (loop, &stdin_readable);
   ev_run (loop, 0);


=head2 C<ev_timer> - relative and optionally repeating timeouts

Timer watchers are simple relative timers that generate an event after a
given time, and optionally repeating in regular intervals after that.

The timers are based on real time, that is, if you register an event that
times out after an hour and you reset your system clock to January last
year, it will still time out after (roughly) one hour. "Roughly" because
detecting time jumps is hard, and some inaccuracies are unavoidable (the
monotonic clock option helps a lot here).

The callback is guaranteed to be invoked only I<after> its timeout has
passed (not I<at>, so on systems with very low-resolution clocks this
might introduce a small delay, see "the special problem of being too
early", below). If multiple timers become ready during the same loop
iteration then the ones with earlier time-out values are invoked before
ones of the same priority with later time-out values (but this is no
longer true when a callback calls C<ev_run> recursively).

=head3 Be smart about timeouts

Many real-world problems involve some kind of timeout, usually for error
recovery. A typical example is an HTTP request - if the other side hangs,
you want to raise some error after a while.

What follows are some ways to handle this problem, from obvious and
inefficient to smart and efficient.

In the following, a 60 second activity timeout is assumed - a timeout that
gets reset to 60 seconds each time there is activity (e.g. each time some
data or other life sign was received).

=over 4

=item 1. Use a timer and stop, reinitialise and start it on activity.

This is the most obvious, but not the most simple way: In the beginning,
start the watcher:

   ev_timer_init (timer, callback, 60., 0.);
   ev_timer_start (loop, timer);

Then, each time there is some activity, C<ev_timer_stop> it, initialise it
and start it again:

   ev_timer_stop (loop, timer);
   ev_timer_set (timer, 60., 0.);
   ev_timer_start (loop, timer);

This is relatively simple to implement, but means that each time there is
some activity, libev will first have to remove the timer from its internal
data structure and then add it again. Libev tries to be fast, but it's
still not a constant-time operation.

=item 2. Use a timer and re-start it with C<ev_timer_again> inactivity.

This is the easiest way, and involves using C<ev_timer_again> instead of
C<ev_timer_start>.

To implement this, configure an C<ev_timer> with a C<repeat> value
of C<60> and then call C<ev_timer_again> at start and each time you
successfully read or write some data. If you go into an idle state where
you do not expect data to travel on the socket, you can C<ev_timer_stop>
the timer, and C<ev_timer_again> will automatically restart it if need be.

That means you can ignore both the C<ev_timer_start> function and the
C<after> argument to C<ev_timer_set>, and only ever use the C<repeat>
member and C<ev_timer_again>.

At start:

   ev_init (timer, callback);
   timer->repeat = 60.;
   ev_timer_again (loop, timer);

Each time there is some activity:

   ev_timer_again (loop, timer);

It is even possible to change the time-out on the fly, regardless of
whether the watcher is active or not:

   timer->repeat = 30.;
   ev_timer_again (loop, timer);

This is slightly more efficient then stopping/starting the timer each time
you want to modify its timeout value, as libev does not have to completely
remove and re-insert the timer from/into its internal data structure.

It is, however, even simpler than the "obvious" way to do it.

=item 3. Let the timer time out, but then re-arm it as required.

This method is more tricky, but usually most efficient: Most timeouts are
relatively long compared to the intervals between other activity - in
our example, within 60 seconds, there are usually many I/O events with
associated activity resets.

In this case, it would be more efficient to leave the C<ev_timer> alone,
but remember the time of last activity, and check for a real timeout only
within the callback:

   ev_tstamp timeout = 60.;
   ev_tstamp last_activity; // time of last activity
   ev_timer timer;

   static void
   callback (EV_P_ ev_timer *w, int revents)
   {
     // calculate when the timeout would happen
     ev_tstamp after = last_activity - ev_now (EV_A) + timeout;

     // if negative, it means we the timeout already occurred
     if (after < 0.)
       {
         // timeout occurred, take action
       }
     else
       {
         // callback was invoked, but there was some recent 
         // activity. simply restart the timer to time out
         // after "after" seconds, which is the earliest time
         // the timeout can occur.
         ev_timer_set (w, after, 0.);
         ev_timer_start (EV_A_ w);
       }
   }

To summarise the callback: first calculate in how many seconds the
timeout will occur (by calculating the absolute time when it would occur,
C<last_activity + timeout>, and subtracting the current time, C<ev_now
(EV_A)> from that).

If this value is negative, then we are already past the timeout, i.e. we
timed out, and need to do whatever is needed in this case.

Otherwise, we now the earliest time at which the timeout would trigger,
and simply start the timer with this timeout value.

In other words, each time the callback is invoked it will check whether
the timeout occurred. If not, it will simply reschedule itself to check
again at the earliest time it could time out. Rinse. Repeat.

This scheme causes more callback invocations (about one every 60 seconds
minus half the average time between activity), but virtually no calls to
libev to change the timeout.

To start the machinery, simply initialise the watcher and set
C<last_activity> to the current time (meaning there was some activity just
now), then call the callback, which will "do the right thing" and start
the timer:

   last_activity = ev_now (EV_A);
   ev_init (&timer, callback);
   callback (EV_A_ &timer, 0);

When there is some activity, simply store the current time in
C<last_activity>, no libev calls at all:

   if (activity detected)
     last_activity = ev_now (EV_A);

When your timeout value changes, then the timeout can be changed by simply
providing a new value, stopping the timer and calling the callback, which
will again do the right thing (for example, time out immediately :).

   timeout = new_value;
   ev_timer_stop (EV_A_ &timer);
   callback (EV_A_ &timer, 0);

This technique is slightly more complex, but in most cases where the
time-out is unlikely to be triggered, much more efficient.

=item 4. Wee, just use a double-linked list for your timeouts.

If there is not one request, but many thousands (millions...), all
employing some kind of timeout with the same timeout value, then one can
do even better:

When starting the timeout, calculate the timeout value and put the timeout
at the I<end> of the list.

Then use an C<ev_timer> to fire when the timeout at the I<beginning> of
the list is expected to fire (for example, using the technique #3).

When there is some activity, remove the timer from the list, recalculate
the timeout, append it to the end of the list again, and make sure to
update the C<ev_timer> if it was taken from the beginning of the list.

This way, one can manage an unlimited number of timeouts in O(1) time for
starting, stopping and updating the timers, at the expense of a major
complication, and having to use a constant timeout. The constant timeout
ensures that the list stays sorted.

=back

So which method the best?

Method #2 is a simple no-brain-required solution that is adequate in most
situations. Method #3 requires a bit more thinking, but handles many cases
better, and isn't very complicated either. In most case, choosing either
one is fine, with #3 being better in typical situations.

Method #1 is almost always a bad idea, and buys you nothing. Method #4 is
rather complicated, but extremely efficient, something that really pays
off after the first million or so of active timers, i.e. it's usually
overkill :)

=head3 The special problem of being too early

If you ask a timer to call your callback after three seconds, then
you expect it to be invoked after three seconds - but of course, this
cannot be guaranteed to infinite precision. Less obviously, it cannot be
guaranteed to any precision by libev - imagine somebody suspending the
process with a STOP signal for a few hours for example.

So, libev tries to invoke your callback as soon as possible I<after> the
delay has occurred, but cannot guarantee this.

A less obvious failure mode is calling your callback too early: many event
loops compare timestamps with a "elapsed delay >= requested delay", but
this can cause your callback to be invoked much earlier than you would
expect.

To see why, imagine a system with a clock that only offers full second
resolution (think windows if you can't come up with a broken enough OS
yourself). If you schedule a one-second timer at the time 500.9, then the
event loop will schedule your timeout to elapse at a system time of 500
(500.9 truncated to the resolution) + 1, or 501.

If an event library looks at the timeout 0.1s later, it will see "501 >=
501" and invoke the callback 0.1s after it was started, even though a
one-second delay was requested - this is being "too early", despite best
intentions.

This is the reason why libev will never invoke the callback if the elapsed
delay equals the requested delay, but only when the elapsed delay is
larger than the requested delay. In the example above, libev would only invoke
the callback at system time 502, or 1.1s after the timer was started.

So, while libev cannot guarantee that your callback will be invoked
exactly when requested, it I<can> and I<does> guarantee that the requested
delay has actually elapsed, or in other words, it always errs on the "too
late" side of things.

=head3 The special problem of time updates

Establishing the current time is a costly operation (it usually takes
at least one system call): EV therefore updates its idea of the current
time only before and after C<ev_run> collects new events, which causes a
growing difference between C<ev_now ()> and C<ev_time ()> when handling
lots of events in one iteration.

The relative timeouts are calculated relative to the C<ev_now ()>
time. This is usually the right thing as this timestamp refers to the time
of the event triggering whatever timeout you are modifying/starting. If
you suspect event processing to be delayed and you I<need> to base the
timeout on the current time, use something like the following to adjust
for it:

   ev_timer_set (&timer, after + (ev_time () - ev_now ()), 0.);

If the event loop is suspended for a long time, you can also force an
update of the time returned by C<ev_now ()> by calling C<ev_now_update
()>, although that will push the event time of all outstanding events
further into the future.

=head3 The special problem of unsynchronised clocks

Modern systems have a variety of clocks - libev itself uses the normal
"wall clock" clock and, if available, the monotonic clock (to avoid time
jumps).

Neither of these clocks is synchronised with each other or any other clock
on the system, so C<ev_time ()> might return a considerably different time
than C<gettimeofday ()> or C<time ()>. On a GNU/Linux system, for example,
a call to C<gettimeofday> might return a second count that is one higher
than a directly following call to C<time>.

The moral of this is to only compare libev-related timestamps with
C<ev_time ()> and C<ev_now ()>, at least if you want better precision than
a second or so.

One more problem arises due to this lack of synchronisation: if libev uses
the system monotonic clock and you compare timestamps from C<ev_time>
or C<ev_now> from when you started your timer and when your callback is
invoked, you will find that sometimes the callback is a bit "early".

This is because C<ev_timer>s work in real time, not wall clock time, so
libev makes sure your callback is not invoked before the delay happened,
I<measured according to the real time>, not the system clock.

If your timeouts are based on a physical timescale (e.g. "time out this
connection after 100 seconds") then this shouldn't bother you as it is
exactly the right behaviour.

If you want to compare wall clock/system timestamps to your timers, then
you need to use C<ev_periodic>s, as these are based on the wall clock
time, where your comparisons will always generate correct results.

=head3 The special problems of suspended animation

When you leave the server world it is quite customary to hit machines that
can suspend/hibernate - what happens to the clocks during such a suspend?

Some quick tests made with a Linux 2.6.28 indicate that a suspend freezes
all processes, while the clocks (C<times>, C<CLOCK_MONOTONIC>) continue
to run until the system is suspended, but they will not advance while the
system is suspended. That means, on resume, it will be as if the program
was frozen for a few seconds, but the suspend time will not be counted
towards C<ev_timer> when a monotonic clock source is used. The real time
clock advanced as expected, but if it is used as sole clocksource, then a
long suspend would be detected as a time jump by libev, and timers would
be adjusted accordingly.

I would not be surprised to see different behaviour in different between
operating systems, OS versions or even different hardware.

The other form of suspend (job control, or sending a SIGSTOP) will see a
time jump in the monotonic clocks and the realtime clock. If the program
is suspended for a very long time, and monotonic clock sources are in use,
then you can expect C<ev_timer>s to expire as the full suspension time
will be counted towards the timers. When no monotonic clock source is in
use, then libev will again assume a timejump and adjust accordingly.

It might be beneficial for this latter case to call C<ev_suspend>
and C<ev_resume> in code that handles C<SIGTSTP>, to at least get
deterministic behaviour in this case (you can do nothing against
C<SIGSTOP>).

=head3 Watcher-Specific Functions and Data Members

=over 4

=item ev_timer_init (ev_timer *, callback, ev_tstamp after, ev_tstamp repeat)

=item ev_timer_set (ev_timer *, ev_tstamp after, ev_tstamp repeat)

Configure the timer to trigger after C<after> seconds. If C<repeat>
is C<0.>, then it will automatically be stopped once the timeout is
reached. If it is positive, then the timer will automatically be
configured to trigger again C<repeat> seconds later, again, and again,
until stopped manually.

The timer itself will do a best-effort at avoiding drift, that is, if
you configure a timer to trigger every 10 seconds, then it will normally
trigger at exactly 10 second intervals. If, however, your program cannot
keep up with the timer (because it takes longer than those 10 seconds to
do stuff) the timer will not fire more than once per event loop iteration.

=item ev_timer_again (loop, ev_timer *)

This will act as if the timer timed out, and restarts it again if it is
repeating. It basically works like calling C<ev_timer_stop>, updating the
timeout to the C<repeat> value and calling C<ev_timer_start>.

The exact semantics are as in the following rules, all of which will be
applied to the watcher:

=over 4

=item If the timer is pending, the pending status is always cleared.

=item If the timer is started but non-repeating, stop it (as if it timed
out, without invoking it).

=item If the timer is repeating, make the C<repeat> value the new timeout
and start the timer, if necessary.

=back

This sounds a bit complicated, see L</Be smart about timeouts>, above, for a
usage example.

=item ev_tstamp ev_timer_remaining (loop, ev_timer *)

Returns the remaining time until a timer fires. If the timer is active,
then this time is relative to the current event loop time, otherwise it's
the timeout value currently configured.

That is, after an C<ev_timer_set (w, 5, 7)>, C<ev_timer_remaining> returns
C<5>. When the timer is started and one second passes, C<ev_timer_remaining>
will return C<4>. When the timer expires and is restarted, it will return
roughly C<7> (likely slightly less as callback invocation takes some time,
too), and so on.

=item ev_tstamp repeat [read-write]

The current C<repeat> value. Will be used each time the watcher times out
or C<ev_timer_again> is called, and determines the next timeout (if any),
which is also when any modifications are taken into account.

=back

=head3 Examples

Example: Create a timer that fires after 60 seconds.

   static void
   one_minute_cb (struct ev_loop *loop, ev_timer *w, int revents)
   {
     .. one minute over, w is actually stopped right here
   }

   ev_timer mytimer;
   ev_timer_init (&mytimer, one_minute_cb, 60., 0.);
   ev_timer_start (loop, &mytimer);

Example: Create a timeout timer that times out after 10 seconds of
inactivity.

   static void
   timeout_cb (struct ev_loop *loop, ev_timer *w, int revents)
   {
     .. ten seconds without any activity
   }

   ev_timer mytimer;
   ev_timer_init (&mytimer, timeout_cb, 0., 10.); /* note, only repeat used */
   ev_timer_again (&mytimer); /* start timer */
   ev_run (loop, 0);

   // and in some piece of code that gets executed on any "activity":
   // reset the timeout to start ticking again at 10 seconds
   ev_timer_again (&mytimer);


=head2 C<ev_periodic> - to cron or not to cron?

Periodic watchers are also timers of a kind, but they are very versatile
(and unfortunately a bit complex).

Unlike C<ev_timer>, periodic watchers are not based on real time (or
relative time, the physical time that passes) but on wall clock time
(absolute time, the thing you can read on your calendar or clock). The
difference is that wall clock time can run faster or slower than real
time, and time jumps are not uncommon (e.g. when you adjust your
wrist-watch).

You can tell a periodic watcher to trigger after some specific point
in time: for example, if you tell a periodic watcher to trigger "in 10
seconds" (by specifying e.g. C<ev_now () + 10.>, that is, an absolute time
not a delay) and then reset your system clock to January of the previous
year, then it will take a year or more to trigger the event (unlike an
C<ev_timer>, which would still trigger roughly 10 seconds after starting
it, as it uses a relative timeout).

C<ev_periodic> watchers can also be used to implement vastly more complex
timers, such as triggering an event on each "midnight, local time", or
other complicated rules. This cannot be done with C<ev_timer> watchers, as
those cannot react to time jumps.

As with timers, the callback is guaranteed to be invoked only when the
point in time where it is supposed to trigger has passed. If multiple
timers become ready during the same loop iteration then the ones with
earlier time-out values are invoked before ones with later time-out values
(but this is no longer true when a callback calls C<ev_run> recursively).

=head3 Watcher-Specific Functions and Data Members

=over 4

=item ev_periodic_init (ev_periodic *, callback, ev_tstamp offset, ev_tstamp interval, reschedule_cb)

=item ev_periodic_set (ev_periodic *, ev_tstamp offset, ev_tstamp interval, reschedule_cb)

Lots of arguments, let's sort it out... There are basically three modes of
operation, and we will explain them from simplest to most complex:

=over 4

=item * absolute timer (offset = absolute time, interval = 0, reschedule_cb = 0)

In this configuration the watcher triggers an event after the wall clock
time C<offset> has passed. It will not repeat and will not adjust when a
time jump occurs, that is, if it is to be run at January 1st 2011 then it
will be stopped and invoked when the system clock reaches or surpasses
this point in time.

=item * repeating interval timer (offset = offset within interval, interval > 0, reschedule_cb = 0)

In this mode the watcher will always be scheduled to time out at the next
C<offset + N * interval> time (for some integer N, which can also be
negative) and then repeat, regardless of any time jumps. The C<offset>
argument is merely an offset into the C<interval> periods.

This can be used to create timers that do not drift with respect to the
system clock, for example, here is an C<ev_periodic> that triggers each
hour, on the hour (with respect to UTC):

   ev_periodic_set (&periodic, 0., 3600., 0);

This doesn't mean there will always be 3600 seconds in between triggers,
but only that the callback will be called when the system time shows a
full hour (UTC), or more correctly, when the system time is evenly divisible
by 3600.

Another way to think about it (for the mathematically inclined) is that
C<ev_periodic> will try to run the callback in this mode at the next possible
time where C<time = offset (mod interval)>, regardless of any time jumps.

The C<interval> I<MUST> be positive, and for numerical stability, the
interval value should be higher than C<1/8192> (which is around 100
microseconds) and C<offset> should be higher than C<0> and should have
at most a similar magnitude as the current time (say, within a factor of
ten). Typical values for offset are, in fact, C<0> or something between
C<0> and C<interval>, which is also the recommended range.

Note also that there is an upper limit to how often a timer can fire (CPU
speed for example), so if C<interval> is very small then timing stability
will of course deteriorate. Libev itself tries to be exact to be about one
millisecond (if the OS supports it and the machine is fast enough).

=item * manual reschedule mode (offset ignored, interval ignored, reschedule_cb = callback)

In this mode the values for C<interval> and C<offset> are both being
ignored. Instead, each time the periodic watcher gets scheduled, the
reschedule callback will be called with the watcher as first, and the
current time as second argument.

NOTE: I<This callback MUST NOT stop or destroy any periodic watcher, ever,
or make ANY other event loop modifications whatsoever, unless explicitly
allowed by documentation here>.

If you need to stop it, return C<now + 1e30> (or so, fudge fudge) and stop
it afterwards (e.g. by starting an C<ev_prepare> watcher, which is the
only event loop modification you are allowed to do).

The callback prototype is C<ev_tstamp (*reschedule_cb)(ev_periodic
*w, ev_tstamp now)>, e.g.:

   static ev_tstamp
   my_rescheduler (ev_periodic *w, ev_tstamp now)
   {
     return now + 60.;
   }

It must return the next time to trigger, based on the passed time value
(that is, the lowest time value larger than to the second argument). It
will usually be called just before the callback will be triggered, but
might be called at other times, too.

NOTE: I<< This callback must always return a time that is higher than or
equal to the passed C<now> value >>.

This can be used to create very complex timers, such as a timer that
triggers on "next midnight, local time". To do this, you would calculate the
next midnight after C<now> and return the timestamp value for this. How
you do this is, again, up to you (but it is not trivial, which is the main
reason I omitted it as an example).

=back

=item ev_periodic_again (loop, ev_periodic *)

Simply stops and restarts the periodic watcher again. This is only useful
when you changed some parameters or the reschedule callback would return
a different time than the last time it was called (e.g. in a crond like
program when the crontabs have changed).

=item ev_tstamp ev_periodic_at (ev_periodic *)

When active, returns the absolute time that the watcher is supposed
to trigger next. This is not the same as the C<offset> argument to
C<ev_periodic_set>, but indeed works even in interval and manual
rescheduling modes.

=item ev_tstamp offset [read-write]

When repeating, this contains the offset value, otherwise this is the
absolute point in time (the C<offset> value passed to C<ev_periodic_set>,
although libev might modify this value for better numerical stability).

Can be modified any time, but changes only take effect when the periodic
timer fires or C<ev_periodic_again> is being called.

=item ev_tstamp interval [read-write]

The current interval value. Can be modified any time, but changes only
take effect when the periodic timer fires or C<ev_periodic_again> is being
called.

=item ev_tstamp (*reschedule_cb)(ev_periodic *w, ev_tstamp now) [read-write]

The current reschedule callback, or C<0>, if this functionality is
switched off. Can be changed any time, but changes only take effect when
the periodic timer fires or C<ev_periodic_again> is being called.

=back

=head3 Examples

Example: Call a callback every hour, or, more precisely, whenever the
system time is divisible by 3600. The callback invocation times have
potentially a lot of jitter, but good long-term stability.

   static void
   clock_cb (struct ev_loop *loop, ev_periodic *w, int revents)
   {
     ... its now a full hour (UTC, or TAI or whatever your clock follows)
   }

   ev_periodic hourly_tick;
   ev_periodic_init (&hourly_tick, clock_cb, 0., 3600., 0);
   ev_periodic_start (loop, &hourly_tick);

Example: The same as above, but use a reschedule callback to do it:

   #include <math.h>

   static ev_tstamp
   my_scheduler_cb (ev_periodic *w, ev_tstamp now)
   {
     return now + (3600. - fmod (now, 3600.));
   }

   ev_periodic_init (&hourly_tick, clock_cb, 0., 0., my_scheduler_cb);

Example: Call a callback every hour, starting now:

   ev_periodic hourly_tick;
   ev_periodic_init (&hourly_tick, clock_cb,
                     fmod (ev_now (loop), 3600.), 3600., 0);
   ev_periodic_start (loop, &hourly_tick);


=head2 C<ev_signal> - signal me when a signal gets signalled!

Signal watchers will trigger an event when the process receives a specific
signal one or more times. Even though signals are very asynchronous, libev
will try its best to deliver signals synchronously, i.e. as part of the
normal event processing, like any other event.

If you want signals to be delivered truly asynchronously, just use
C<sigaction> as you would do without libev and forget about sharing
the signal. You can even use C<ev_async> from a signal handler to
synchronously wake up an event loop.

You can configure as many watchers as you like for the same signal, but
only within the same loop, i.e. you can watch for C<SIGINT> in your
default loop and for C<SIGIO> in another loop, but you cannot watch for
C<SIGINT> in both the default loop and another loop at the same time. At
the moment, C<SIGCHLD> is permanently tied to the default loop.

Only after the first watcher for a signal is started will libev actually
register something with the kernel. It thus coexists with your own signal
handlers as long as you don't register any with libev for the same signal.

If possible and supported, libev will install its handlers with
C<SA_RESTART> (or equivalent) behaviour enabled, so system calls should
not be unduly interrupted. If you have a problem with system calls getting
interrupted by signals you can block all signals in an C<ev_check> watcher
and unblock them in an C<ev_prepare> watcher.

=head3 The special problem of inheritance over fork/execve/pthread_create

Both the signal mask (C<sigprocmask>) and the signal disposition
(C<sigaction>) are unspecified after starting a signal watcher (and after
stopping it again), that is, libev might or might not block the signal,
and might or might not set or restore the installed signal handler (but
see C<EVFLAG_NOSIGMASK>).

While this does not matter for the signal disposition (libev never
sets signals to C<SIG_IGN>, so handlers will be reset to C<SIG_DFL> on
C<execve>), this matters for the signal mask: many programs do not expect
certain signals to be blocked.

This means that before calling C<exec> (from the child) you should reset
the signal mask to whatever "default" you expect (all clear is a good
choice usually).

The simplest way to ensure that the signal mask is reset in the child is
to install a fork handler with C<pthread_atfork> that resets it. That will
catch fork calls done by libraries (such as the libc) as well.

In current versions of libev, the signal will not be blocked indefinitely
unless you use the C<signalfd> API (C<EV_SIGNALFD>). While this reduces
the window of opportunity for problems, it will not go away, as libev
I<has> to modify the signal mask, at least temporarily.

So I can't stress this enough: I<If you do not reset your signal mask when
you expect it to be empty, you have a race condition in your code>. This
is not a libev-specific thing, this is true for most event libraries.

=head3 The special problem of threads signal handling

POSIX threads has problematic signal handling semantics, specifically,
a lot of functionality (sigfd, sigwait etc.) only really works if all
threads in a process block signals, which is hard to achieve.

When you want to use sigwait (or mix libev signal handling with your own
for the same signals), you can tackle this problem by globally blocking
all signals before creating any threads (or creating them with a fully set
sigprocmask) and also specifying the C<EVFLAG_NOSIGMASK> when creating
loops. Then designate one thread as "signal receiver thread" which handles
these signals. You can pass on any signals that libev might be interested
in by calling C<ev_feed_signal>.

=head3 Watcher-Specific Functions and Data Members

=over 4

=item ev_signal_init (ev_signal *, callback, int signum)

=item ev_signal_set (ev_signal *, int signum)

Configures the watcher to trigger on the given signal number (usually one
of the C<SIGxxx> constants).

=item int signum [read-only]

The signal the watcher watches out for.

=back

=head3 Examples

Example: Try to exit cleanly on SIGINT.

   static void
   sigint_cb (struct ev_loop *loop, ev_signal *w, int revents)
   {
     ev_break (loop, EVBREAK_ALL);
   }

   ev_signal signal_watcher;
   ev_signal_init (&signal_watcher, sigint_cb, SIGINT);
   ev_signal_start (loop, &signal_watcher);


=head2 C<ev_child> - watch out for process status changes

Child watchers trigger when your process receives a SIGCHLD in response to
some child status changes (most typically when a child of yours dies or
exits). It is permissible to install a child watcher I<after> the child
has been forked (which implies it might have already exited), as long
as the event loop isn't entered (or is continued from a watcher), i.e.,
forking and then immediately registering a watcher for the child is fine,
but forking and registering a watcher a few event loop iterations later or
in the next callback invocation is not.

Only the default event loop is capable of handling signals, and therefore
you can only register child watchers in the default event loop.

Due to some design glitches inside libev, child watchers will always be
handled at maximum priority (their priority is set to C<EV_MAXPRI> by
libev)

=head3 Process Interaction

Libev grabs C<SIGCHLD> as soon as the default event loop is
initialised. This is necessary to guarantee proper behaviour even if the
first child watcher is started after the child exits. The occurrence
of C<SIGCHLD> is recorded asynchronously, but child reaping is done
synchronously as part of the event loop processing. Libev always reaps all
children, even ones not watched.

=head3 Overriding the Built-In Processing

Libev offers no special support for overriding the built-in child
processing, but if your application collides with libev's default child
handler, you can override it easily by installing your own handler for
C<SIGCHLD> after initialising the default loop, and making sure the
default loop never gets destroyed. You are encouraged, however, to use an
event-based approach to child reaping and thus use libev's support for
that, so other libev users can use C<ev_child> watchers freely.

=head3 Stopping the Child Watcher

Currently, the child watcher never gets stopped, even when the
child terminates, so normally one needs to stop the watcher in the
callback. Future versions of libev might stop the watcher automatically
when a child exit is detected (calling C<ev_child_stop> twice is not a
problem).

=head3 Watcher-Specific Functions and Data Members

=over 4

=item ev_child_init (ev_child *, callback, int pid, int trace)

=item ev_child_set (ev_child *, int pid, int trace)

Configures the watcher to wait for status changes of process C<pid> (or
I<any> process if C<pid> is specified as C<0>). The callback can look
at the C<rstatus> member of the C<ev_child> watcher structure to see
the status word (use the macros from C<sys/wait.h> and see your systems
C<waitpid> documentation). The C<rpid> member contains the pid of the
process causing the status change. C<trace> must be either C<0> (only
activate the watcher when the process terminates) or C<1> (additionally
activate the watcher when the process is stopped or continued).

=item int pid [read-only]

The process id this watcher watches out for, or C<0>, meaning any process id.

=item int rpid [read-write]

The process id that detected a status change.

=item int rstatus [read-write]

The process exit/trace status caused by C<rpid> (see your systems
C<waitpid> and C<sys/wait.h> documentation for details).

=back

=head3 Examples

Example: C<fork()> a new process and install a child handler to wait for
its completion.

   ev_child cw;

   static void
   child_cb (EV_P_ ev_child *w, int revents)
   {
     ev_child_stop (EV_A_ w);
     printf ("process %d exited with status %x\n", w->rpid, w->rstatus);
   }

   pid_t pid = fork ();

   if (pid < 0)
     // error
   else if (pid == 0)
     {
       // the forked child executes here
       exit (1);
     }
   else
     {
       ev_child_init (&cw, child_cb, pid, 0);
       ev_child_start (EV_DEFAULT_ &cw);
     }


=head2 C<ev_stat> - did the file attributes just change?

This watches a file system path for attribute changes. That is, it calls
C<stat> on that path in regular intervals (or when the OS says it changed)
and sees if it changed compared to the last time, invoking the callback
if it did. Starting the watcher C<stat>'s the file, so only changes that
happen after the watcher has been started will be reported.

The path does not need to exist: changing from "path exists" to "path does
not exist" is a status change like any other. The condition "path does not
exist" (or more correctly "path cannot be stat'ed") is signified by the
C<st_nlink> field being zero (which is otherwise always forced to be at
least one) and all the other fields of the stat buffer having unspecified
contents.

The path I<must not> end in a slash or contain special components such as
C<.> or C<..>. The path I<should> be absolute: If it is relative and
your working directory changes, then the behaviour is undefined.

Since there is no portable change notification interface available, the
portable implementation simply calls C<stat(2)> regularly on the path
to see if it changed somehow. You can specify a recommended polling
interval for this case. If you specify a polling interval of C<0> (highly
recommended!) then a I<suitable, unspecified default> value will be used
(which you can expect to be around five seconds, although this might
change dynamically). Libev will also impose a minimum interval which is
currently around C<0.1>, but that's usually overkill.

This watcher type is not meant for massive numbers of stat watchers,
as even with OS-supported change notifications, this can be
resource-intensive.

At the time of this writing, the only OS-specific interface implemented
is the Linux inotify interface (implementing kqueue support is left as an
exercise for the reader. Note, however, that the author sees no way of
implementing C<ev_stat> semantics with kqueue, except as a hint).

=head3 ABI Issues (Largefile Support)

Libev by default (unless the user overrides this) uses the default
compilation environment, which means that on systems with large file
support disabled by default, you get the 32 bit version of the stat
structure. When using the library from programs that change the ABI to
use 64 bit file offsets the programs will fail. In that case you have to
compile libev with the same flags to get binary compatibility. This is
obviously the case with any flags that change the ABI, but the problem is
most noticeably displayed with ev_stat and large file support.

The solution for this is to lobby your distribution maker to make large
file interfaces available by default (as e.g. FreeBSD does) and not
optional. Libev cannot simply switch on large file support because it has
to exchange stat structures with application programs compiled using the
default compilation environment.

=head3 Inotify and Kqueue

When C<inotify (7)> support has been compiled into libev and present at
runtime, it will be used to speed up change detection where possible. The
inotify descriptor will be created lazily when the first C<ev_stat>
watcher is being started.

Inotify presence does not change the semantics of C<ev_stat> watchers
except that changes might be detected earlier, and in some cases, to avoid
making regular C<stat> calls. Even in the presence of inotify support
there are many cases where libev has to resort to regular C<stat> polling,
but as long as kernel 2.6.25 or newer is used (2.6.24 and older have too
many bugs), the path exists (i.e. stat succeeds), and the path resides on
a local filesystem (libev currently assumes only ext2/3, jfs, reiserfs and
xfs are fully working) libev usually gets away without polling.

There is no support for kqueue, as apparently it cannot be used to
implement this functionality, due to the requirement of having a file
descriptor open on the object at all times, and detecting renames, unlinks
etc. is difficult.

=head3 C<stat ()> is a synchronous operation

Libev doesn't normally do any kind of I/O itself, and so is not blocking
the process. The exception are C<ev_stat> watchers - those call C<stat
()>, which is a synchronous operation.

For local paths, this usually doesn't matter: unless the system is very
busy or the intervals between stat's are large, a stat call will be fast,
as the path data is usually in memory already (except when starting the
watcher).

For networked file systems, calling C<stat ()> can block an indefinite
time due to network issues, and even under good conditions, a stat call
often takes multiple milliseconds.

Therefore, it is best to avoid using C<ev_stat> watchers on networked
paths, although this is fully supported by libev.

=head3 The special problem of stat time resolution

The C<stat ()> system call only supports full-second resolution portably,
and even on systems where the resolution is higher, most file systems
still only support whole seconds.

That means that, if the time is the only thing that changes, you can
easily miss updates: on the first update, C<ev_stat> detects a change and
calls your callback, which does something. When there is another update
within the same second, C<ev_stat> will be unable to detect unless the
stat data does change in other ways (e.g. file size).

The solution to this is to delay acting on a change for slightly more
than a second (or till slightly after the next full second boundary), using
a roughly one-second-delay C<ev_timer> (e.g. C<ev_timer_set (w, 0., 1.02);
ev_timer_again (loop, w)>).

The C<.02> offset is added to work around small timing inconsistencies
of some operating systems (where the second counter of the current time
might be be delayed. One such system is the Linux kernel, where a call to
C<gettimeofday> might return a timestamp with a full second later than
a subsequent C<time> call - if the equivalent of C<time ()> is used to
update file times then there will be a small window where the kernel uses
the previous second to update file times but libev might already execute
the timer callback).

=head3 Watcher-Specific Functions and Data Members

=over 4

=item ev_stat_init (ev_stat *, callback, const char *path, ev_tstamp interval)

=item ev_stat_set (ev_stat *, const char *path, ev_tstamp interval)

Configures the watcher to wait for status changes of the given
C<path>. The C<interval> is a hint on how quickly a change is expected to
be detected and should normally be specified as C<0> to let libev choose
a suitable value. The memory pointed to by C<path> must point to the same
path for as long as the watcher is active.

The callback will receive an C<EV_STAT> event when a change was detected,
relative to the attributes at the time the watcher was started (or the
last change was detected).

=item ev_stat_stat (loop, ev_stat *)

Updates the stat buffer immediately with new values. If you change the
watched path in your callback, you could call this function to avoid
detecting this change (while introducing a race condition if you are not
the only one changing the path). Can also be useful simply to find out the
new values.

=item ev_statdata attr [read-only]

The most-recently detected attributes of the file. Although the type is
C<ev_statdata>, this is usually the (or one of the) C<struct stat> types
suitable for your system, but you can only rely on the POSIX-standardised
members to be present. If the C<st_nlink> member is C<0>, then there was
some error while C<stat>ing the file.

=item ev_statdata prev [read-only]

The previous attributes of the file. The callback gets invoked whenever
C<prev> != C<attr>, or, more precisely, one or more of these members
differ: C<st_dev>, C<st_ino>, C<st_mode>, C<st_nlink>, C<st_uid>,
C<st_gid>, C<st_rdev>, C<st_size>, C<st_atime>, C<st_mtime>, C<st_ctime>.

=item ev_tstamp interval [read-only]

The specified interval.

=item const char *path [read-only]

The file system path that is being watched.

=back

=head3 Examples

Example: Watch C</etc/passwd> for attribute changes.

   static void
   passwd_cb (struct ev_loop *loop, ev_stat *w, int revents)
   {
     /* /etc/passwd changed in some way */
     if (w->attr.st_nlink)
       {
         printf ("passwd current size  %ld\n", (long)w->attr.st_size);
         printf ("passwd current atime %ld\n", (long)w->attr.st_mtime);
         printf ("passwd current mtime %ld\n", (long)w->attr.st_mtime);
       }
     else
       /* you shalt not abuse printf for puts */
       puts ("wow, /etc/passwd is not there, expect problems. "
             "if this is windows, they already arrived\n");
   }

   ...
   ev_stat passwd;

   ev_stat_init (&passwd, passwd_cb, "/etc/passwd", 0.);
   ev_stat_start (loop, &passwd);

Example: Like above, but additionally use a one-second delay so we do not
miss updates (however, frequent updates will delay processing, too, so
one might do the work both on C<ev_stat> callback invocation I<and> on
C<ev_timer> callback invocation).

   static ev_stat passwd;
   static ev_timer timer;

   static void
   timer_cb (EV_P_ ev_timer *w, int revents)
   {
     ev_timer_stop (EV_A_ w);

     /* now it's one second after the most recent passwd change */
   }

   static void
   stat_cb (EV_P_ ev_stat *w, int revents)
   {
     /* reset the one-second timer */
     ev_timer_again (EV_A_ &timer);
   }

   ...
   ev_stat_init (&passwd, stat_cb, "/etc/passwd", 0.);
   ev_stat_start (loop, &passwd);
   ev_timer_init (&timer, timer_cb, 0., 1.02);


=head2 C<ev_idle> - when you've got nothing better to do...

Idle watchers trigger events when no other events of the same or higher
priority are pending (prepare, check and other idle watchers do not count
as receiving "events").

That is, as long as your process is busy handling sockets or timeouts
(or even signals, imagine) of the same or higher priority it will not be
triggered. But when your process is idle (or only lower-priority watchers
are pending), the idle watchers are being called once per event loop
iteration - until stopped, that is, or your process receives more events
and becomes busy again with higher priority stuff.

The most noteworthy effect is that as long as any idle watchers are
active, the process will not block when waiting for new events.

Apart from keeping your process non-blocking (which is a useful
effect on its own sometimes), idle watchers are a good place to do
"pseudo-background processing", or delay processing stuff to after the
event loop has handled all outstanding events.

=head3 Abusing an C<ev_idle> watcher for its side-effect

As long as there is at least one active idle watcher, libev will never
sleep unnecessarily. Or in other words, it will loop as fast as possible.
For this to work, the idle watcher doesn't need to be invoked at all - the
lowest priority will do.

This mode of operation can be useful together with an C<ev_check> watcher,
to do something on each event loop iteration - for example to balance load
between different connections.

See L</Abusing an ev_check watcher for its side-effect> for a longer
example.

=head3 Watcher-Specific Functions and Data Members

=over 4

=item ev_idle_init (ev_idle *, callback)

Initialises and configures the idle watcher - it has no parameters of any
kind. There is a C<ev_idle_set> macro, but using it is utterly pointless,
believe me.

=back

=head3 Examples

Example: Dynamically allocate an C<ev_idle> watcher, start it, and in the
callback, free it. Also, use no error checking, as usual.

   static void
   idle_cb (struct ev_loop *loop, ev_idle *w, int revents)
   {
     // stop the watcher
     ev_idle_stop (loop, w);

     // now we can free it
     free (w);

     // now do something you wanted to do when the program has
     // no longer anything immediate to do.
   }

   ev_idle *idle_watcher = malloc (sizeof (ev_idle));
   ev_idle_init (idle_watcher, idle_cb);
   ev_idle_start (loop, idle_watcher);


=head2 C<ev_prepare> and C<ev_check> - customise your event loop!

Prepare and check watchers are often (but not always) used in pairs:
prepare watchers get invoked before the process blocks and check watchers
afterwards.

You I<must not> call C<ev_run> (or similar functions that enter the
current event loop) or C<ev_loop_fork> from either C<ev_prepare> or
C<ev_check> watchers. Other loops than the current one are fine,
however. The rationale behind this is that you do not need to check
for recursion in those watchers, i.e. the sequence will always be
C<ev_prepare>, blocking, C<ev_check> so if you have one watcher of each
kind they will always be called in pairs bracketing the blocking call.

Their main purpose is to integrate other event mechanisms into libev and
their use is somewhat advanced. They could be used, for example, to track
variable changes, implement your own watchers, integrate net-snmp or a
coroutine library and lots more. They are also occasionally useful if
you cache some data and want to flush it before blocking (for example,
in X programs you might want to do an C<XFlush ()> in an C<ev_prepare>
watcher).

This is done by examining in each prepare call which file descriptors
need to be watched by the other library, registering C<ev_io> watchers
for them and starting an C<ev_timer> watcher for any timeouts (many
libraries provide exactly this functionality). Then, in the check watcher,
you check for any events that occurred (by checking the pending status
of all watchers and stopping them) and call back into the library. The
I/O and timer callbacks will never actually be called (but must be valid
nevertheless, because you never know, you know?).

As another example, the Perl Coro module uses these hooks to integrate
coroutines into libev programs, by yielding to other active coroutines
during each prepare and only letting the process block if no coroutines
are ready to run (it's actually more complicated: it only runs coroutines
with priority higher than or equal to the event loop and one coroutine
of lower priority, but only once, using idle watchers to keep the event
loop from blocking if lower-priority coroutines are active, thus mapping
low-priority coroutines to idle/background tasks).

When used for this purpose, it is recommended to give C<ev_check> watchers
highest (C<EV_MAXPRI>) priority, to ensure that they are being run before
any other watchers after the poll (this doesn't matter for C<ev_prepare>
watchers).

Also, C<ev_check> watchers (and C<ev_prepare> watchers, too) should not
activate ("feed") events into libev. While libev fully supports this, they
might get executed before other C<ev_check> watchers did their job. As
C<ev_check> watchers are often used to embed other (non-libev) event
loops those other event loops might be in an unusable state until their
C<ev_check> watcher ran (always remind yourself to coexist peacefully with
others).

=head3 Abusing an C<ev_check> watcher for its side-effect

C<ev_check> (and less often also C<ev_prepare>) watchers can also be
useful because they are called once per event loop iteration. For
example, if you want to handle a large number of connections fairly, you
normally only do a bit of work for each active connection, and if there
is more work to do, you wait for the next event loop iteration, so other
connections have a chance of making progress.

Using an C<ev_check> watcher is almost enough: it will be called on the
next event loop iteration. However, that isn't as soon as possible -
without external events, your C<ev_check> watcher will not be invoked.

This is where C<ev_idle> watchers come in handy - all you need is a
single global idle watcher that is active as long as you have one active
C<ev_check> watcher. The C<ev_idle> watcher makes sure the event loop
will not sleep, and the C<ev_check> watcher makes sure a callback gets
invoked. Neither watcher alone can do that.

=head3 Watcher-Specific Functions and Data Members

=over 4

=item ev_prepare_init (ev_prepare *, callback)

=item ev_check_init (ev_check *, callback)

Initialises and configures the prepare or check watcher - they have no
parameters of any kind. There are C<ev_prepare_set> and C<ev_check_set>
macros, but using them is utterly, utterly, utterly and completely
pointless.

=back

=head3 Examples

There are a number of principal ways to embed other event loops or modules
into libev. Here are some ideas on how to include libadns into libev
(there is a Perl module named C<EV::ADNS> that does this, which you could
use as a working example. Another Perl module named C<EV::Glib> embeds a
Glib main context into libev, and finally, C<Glib::EV> embeds EV into the
Glib event loop).

Method 1: Add IO watchers and a timeout watcher in a prepare handler,
and in a check watcher, destroy them and call into libadns. What follows
is pseudo-code only of course. This requires you to either use a low
priority for the check watcher or use C<ev_clear_pending> explicitly, as
the callbacks for the IO/timeout watchers might not have been called yet.

   static ev_io iow [nfd];
   static ev_timer tw;

   static void
   io_cb (struct ev_loop *loop, ev_io *w, int revents)
   {
   }

   // create io watchers for each fd and a timer before blocking
   static void
   adns_prepare_cb (struct ev_loop *loop, ev_prepare *w, int revents)
   {
     int timeout = 3600000;
     struct pollfd fds [nfd];
     // actual code will need to loop here and realloc etc.
     adns_beforepoll (ads, fds, &nfd, &timeout, timeval_from (ev_time ()));

     /* the callback is illegal, but won't be called as we stop during check */
     ev_timer_init (&tw, 0, timeout * 1e-3, 0.);
     ev_timer_start (loop, &tw);

     // create one ev_io per pollfd
     for (int i = 0; i < nfd; ++i)
       {
         ev_io_init (iow + i, io_cb, fds [i].fd,
           ((fds [i].events & POLLIN ? EV_READ : 0)
            | (fds [i].events & POLLOUT ? EV_WRITE : 0)));

         fds [i].revents = 0;
         ev_io_start (loop, iow + i);
       }
   }

   // stop all watchers after blocking
   static void
   adns_check_cb (struct ev_loop *loop, ev_check *w, int revents)
   {
     ev_timer_stop (loop, &tw);

     for (int i = 0; i < nfd; ++i)
       {
         // set the relevant poll flags
         // could also call adns_processreadable etc. here
         struct pollfd *fd = fds + i;
         int revents = ev_clear_pending (iow + i);
         if (revents & EV_READ ) fd->revents |= fd->events & POLLIN;
         if (revents & EV_WRITE) fd->revents |= fd->events & POLLOUT;

         // now stop the watcher
         ev_io_stop (loop, iow + i);
       }

     adns_afterpoll (adns, fds, nfd, timeval_from (ev_now (loop));
   }

Method 2: This would be just like method 1, but you run C<adns_afterpoll>
in the prepare watcher and would dispose of the check watcher.

Method 3: If the module to be embedded supports explicit event
notification (libadns does), you can also make use of the actual watcher
callbacks, and only destroy/create the watchers in the prepare watcher.

   static void
   timer_cb (EV_P_ ev_timer *w, int revents)
   {
     adns_state ads = (adns_state)w->data;
     update_now (EV_A);

     adns_processtimeouts (ads, &tv_now);
   }

   static void
   io_cb (EV_P_ ev_io *w, int revents)
   {
     adns_state ads = (adns_state)w->data;
     update_now (EV_A);

     if (revents & EV_READ ) adns_processreadable  (ads, w->fd, &tv_now);
     if (revents & EV_WRITE) adns_processwriteable (ads, w->fd, &tv_now);
   }

   // do not ever call adns_afterpoll

Method 4: Do not use a prepare or check watcher because the module you
want to embed is not flexible enough to support it. Instead, you can
override their poll function. The drawback with this solution is that the
main loop is now no longer controllable by EV. The C<Glib::EV> module uses
this approach, effectively embedding EV as a client into the horrible
libglib event loop.

   static gint
   event_poll_func (GPollFD *fds, guint nfds, gint timeout)
   {
     int got_events = 0;

     for (n = 0; n < nfds; ++n)
       // create/start io watcher that sets the relevant bits in fds[n] and increment got_events

     if (timeout >= 0)
       // create/start timer

     // poll
     ev_run (EV_A_ 0);

     // stop timer again
     if (timeout >= 0)
       ev_timer_stop (EV_A_ &to);

     // stop io watchers again - their callbacks should have set
     for (n = 0; n < nfds; ++n)
       ev_io_stop (EV_A_ iow [n]);

     return got_events;
   }


=head2 C<ev_embed> - when one backend isn't enough...

This is a rather advanced watcher type that lets you embed one event loop
into another (currently only C<ev_io> events are supported in the embedded
loop, other types of watchers might be handled in a delayed or incorrect
fashion and must not be used).

There are primarily two reasons you would want that: work around bugs and
prioritise I/O.

As an example for a bug workaround, the kqueue backend might only support
sockets on some platform, so it is unusable as generic backend, but you
still want to make use of it because you have many sockets and it scales
so nicely. In this case, you would create a kqueue-based loop and embed
it into your default loop (which might use e.g. poll). Overall operation
will be a bit slower because first libev has to call C<poll> and then
C<kevent>, but at least you can use both mechanisms for what they are
best: C<kqueue> for scalable sockets and C<poll> if you want it to work :)

As for prioritising I/O: under rare circumstances you have the case where
some fds have to be watched and handled very quickly (with low latency),
and even priorities and idle watchers might have too much overhead. In
this case you would put all the high priority stuff in one loop and all
the rest in a second one, and embed the second one in the first.

As long as the watcher is active, the callback will be invoked every
time there might be events pending in the embedded loop. The callback
must then call C<ev_embed_sweep (mainloop, watcher)> to make a single
sweep and invoke their callbacks (the callback doesn't need to invoke the
C<ev_embed_sweep> function directly, it could also start an idle watcher
to give the embedded loop strictly lower priority for example).

You can also set the callback to C<0>, in which case the embed watcher
will automatically execute the embedded loop sweep whenever necessary.

Fork detection will be handled transparently while the C<ev_embed> watcher
is active, i.e., the embedded loop will automatically be forked when the
embedding loop forks. In other cases, the user is responsible for calling
C<ev_loop_fork> on the embedded loop.

Unfortunately, not all backends are embeddable: only the ones returned by
C<ev_embeddable_backends> are, which, unfortunately, does not include any
portable one.

So when you want to use this feature you will always have to be prepared
that you cannot get an embeddable loop. The recommended way to get around
this is to have a separate variables for your embeddable loop, try to
create it, and if that fails, use the normal loop for everything.

=head3 C<ev_embed> and fork

While the C<ev_embed> watcher is running, forks in the embedding loop will
automatically be applied to the embedded loop as well, so no special
fork handling is required in that case. When the watcher is not running,
however, it is still the task of the libev user to call C<ev_loop_fork ()>
as applicable.

=head3 Watcher-Specific Functions and Data Members

=over 4

=item ev_embed_init (ev_embed *, callback, struct ev_loop *embedded_loop)

=item ev_embed_set (ev_embed *, struct ev_loop *embedded_loop)

Configures the watcher to embed the given loop, which must be
embeddable. If the callback is C<0>, then C<ev_embed_sweep> will be
invoked automatically, otherwise it is the responsibility of the callback
to invoke it (it will continue to be called until the sweep has been done,
if you do not want that, you need to temporarily stop the embed watcher).

=item ev_embed_sweep (loop, ev_embed *)

Make a single, non-blocking sweep over the embedded loop. This works
similarly to C<ev_run (embedded_loop, EVRUN_NOWAIT)>, but in the most
appropriate way for embedded loops.

=item struct ev_loop *other [read-only]

The embedded event loop.

=back

=head3 Examples

Example: Try to get an embeddable event loop and embed it into the default
event loop. If that is not possible, use the default loop. The default
loop is stored in C<loop_hi>, while the embeddable loop is stored in
C<loop_lo> (which is C<loop_hi> in the case no embeddable loop can be
used).

   struct ev_loop *loop_hi = ev_default_init (0);
   struct ev_loop *loop_lo = 0;
   ev_embed embed;

   // see if there is a chance of getting one that works
   // (remember that a flags value of 0 means autodetection)
   loop_lo = ev_embeddable_backends () & ev_recommended_backends ()
     ? ev_loop_new (ev_embeddable_backends () & ev_recommended_backends ())
     : 0;

   // if we got one, then embed it, otherwise default to loop_hi
   if (loop_lo)
     {
       ev_embed_init (&embed, 0, loop_lo);
       ev_embed_start (loop_hi, &embed);
     }
   else
     loop_lo = loop_hi;

Example: Check if kqueue is available but not recommended and create
a kqueue backend for use with sockets (which usually work with any
kqueue implementation). Store the kqueue/socket-only event loop in
C<loop_socket>. (One might optionally use C<EVFLAG_NOENV>, too).

   struct ev_loop *loop = ev_default_init (0);
   struct ev_loop *loop_socket = 0;
   ev_embed embed;

   if (ev_supported_backends () & ~ev_recommended_backends () & EVBACKEND_KQUEUE)
     if ((loop_socket = ev_loop_new (EVBACKEND_KQUEUE))
       {
         ev_embed_init (&embed, 0, loop_socket);
         ev_embed_start (loop, &embed);
       }

   if (!loop_socket)
     loop_socket = loop;

   // now use loop_socket for all sockets, and loop for everything else


=head2 C<ev_fork> - the audacity to resume the event loop after a fork

Fork watchers are called when a C<fork ()> was detected (usually because
whoever is a good citizen cared to tell libev about it by calling
C<ev_loop_fork>). The invocation is done before the event loop blocks next
and before C<ev_check> watchers are being called, and only in the child
after the fork. If whoever good citizen calling C<ev_default_fork> cheats
and calls it in the wrong process, the fork handlers will be invoked, too,
of course.

=head3 The special problem of life after fork - how is it possible?

Most uses of C<fork ()> consist of forking, then some simple calls to set
up/change the process environment, followed by a call to C<exec()>. This
sequence should be handled by libev without any problems.

This changes when the application actually wants to do event handling
in the child, or both parent in child, in effect "continuing" after the
fork.

The default mode of operation (for libev, with application help to detect
forks) is to duplicate all the state in the child, as would be expected
when I<either> the parent I<or> the child process continues.

When both processes want to continue using libev, then this is usually the
wrong result. In that case, usually one process (typically the parent) is
supposed to continue with all watchers in place as before, while the other
process typically wants to start fresh, i.e. without any active watchers.

The cleanest and most efficient way to achieve that with libev is to
simply create a new event loop, which of course will be "empty", and
use that for new watchers. This has the advantage of not touching more
memory than necessary, and thus avoiding the copy-on-write, and the
disadvantage of having to use multiple event loops (which do not support
signal watchers).

When this is not possible, or you want to use the default loop for
other reasons, then in the process that wants to start "fresh", call
C<ev_loop_destroy (EV_DEFAULT)> followed by C<ev_default_loop (...)>.
Destroying the default loop will "orphan" (not stop) all registered
watchers, so you have to be careful not to execute code that modifies
those watchers. Note also that in that case, you have to re-register any
signal watchers.

=head3 Watcher-Specific Functions and Data Members

=over 4

=item ev_fork_init (ev_fork *, callback)

Initialises and configures the fork watcher - it has no parameters of any
kind. There is a C<ev_fork_set> macro, but using it is utterly pointless,
really.

=back


=head2 C<ev_cleanup> - even the best things end

Cleanup watchers are called just before the event loop is being destroyed
by a call to C<ev_loop_destroy>.

While there is no guarantee that the event loop gets destroyed, cleanup
watchers provide a convenient method to install cleanup hooks for your
program, worker threads and so on - you just to make sure to destroy the
loop when you want them to be invoked.

Cleanup watchers are invoked in the same way as any other watcher. Unlike
all other watchers, they do not keep a reference to the event loop (which
makes a lot of sense if you think about it). Like all other watchers, you
can call libev functions in the callback, except C<ev_cleanup_start>.

=head3 Watcher-Specific Functions and Data Members

=over 4

=item ev_cleanup_init (ev_cleanup *, callback)

Initialises and configures the cleanup watcher - it has no parameters of
any kind. There is a C<ev_cleanup_set> macro, but using it is utterly
pointless, I assure you.

=back

Example: Register an atexit handler to destroy the default loop, so any
cleanup functions are called.

   static void
   program_exits (void)
   {
     ev_loop_destroy (EV_DEFAULT_UC);
   }

   ...
   atexit (program_exits);


=head2 C<ev_async> - how to wake up an event loop

In general, you cannot use an C<ev_loop> from multiple threads or other
asynchronous sources such as signal handlers (as opposed to multiple event
loops - those are of course safe to use in different threads).

Sometimes, however, you need to wake up an event loop you do not control,
for example because it belongs to another thread. This is what C<ev_async>
watchers do: as long as the C<ev_async> watcher is active, you can signal
it by calling C<ev_async_send>, which is thread- and signal safe.

This functionality is very similar to C<ev_signal> watchers, as signals,
too, are asynchronous in nature, and signals, too, will be compressed
(i.e. the number of callback invocations may be less than the number of
C<ev_async_send> calls). In fact, you could use signal watchers as a kind
of "global async watchers" by using a watcher on an otherwise unused
signal, and C<ev_feed_signal> to signal this watcher from another thread,
even without knowing which loop owns the signal.

=head3 Queueing

C<ev_async> does not support queueing of data in any way. The reason
is that the author does not know of a simple (or any) algorithm for a
multiple-writer-single-reader queue that works in all cases and doesn't
need elaborate support such as pthreads or unportable memory access
semantics.

That means that if you want to queue data, you have to provide your own
queue. But at least I can tell you how to implement locking around your
queue:

=over 4

=item queueing from a signal handler context

To implement race-free queueing, you simply add to the queue in the signal
handler but you block the signal handler in the watcher callback. Here is
an example that does that for some fictitious SIGUSR1 handler:

   static ev_async mysig;

   static void
   sigusr1_handler (void)
   {
     sometype data;

     // no locking etc.
     queue_put (data);
     ev_async_send (EV_DEFAULT_ &mysig);
   }

   static void
   mysig_cb (EV_P_ ev_async *w, int revents)
   {
     sometype data;
     sigset_t block, prev;

     sigemptyset (&block);
     sigaddset (&block, SIGUSR1);
     sigprocmask (SIG_BLOCK, &block, &prev);

     while (queue_get (&data))
       process (data);

     if (sigismember (&prev, SIGUSR1)
       sigprocmask (SIG_UNBLOCK, &block, 0);
   }

(Note: pthreads in theory requires you to use C<pthread_setmask>
instead of C<sigprocmask> when you use threads, but libev doesn't do it
either...).

=item queueing from a thread context

The strategy for threads is different, as you cannot (easily) block
threads but you can easily preempt them, so to queue safely you need to
employ a traditional mutex lock, such as in this pthread example:

   static ev_async mysig;
   static pthread_mutex_t mymutex = PTHREAD_MUTEX_INITIALIZER;

   static void
   otherthread (void)
   {
     // only need to lock the actual queueing operation
     pthread_mutex_lock (&mymutex);
     queue_put (data);
     pthread_mutex_unlock (&mymutex);

     ev_async_send (EV_DEFAULT_ &mysig);
   }

   static void
   mysig_cb (EV_P_ ev_async *w, int revents)
   {
     pthread_mutex_lock (&mymutex);

     while (queue_get (&data))
       process (data);

     pthread_mutex_unlock (&mymutex);
   }

=back


=head3 Watcher-Specific Functions and Data Members

=over 4

=item ev_async_init (ev_async *, callback)

Initialises and configures the async watcher - it has no parameters of any
kind. There is a C<ev_async_set> macro, but using it is utterly pointless,
trust me.

=item ev_async_send (loop, ev_async *)

Sends/signals/activates the given C<ev_async> watcher, that is, feeds
an C<EV_ASYNC> event on the watcher into the event loop, and instantly
returns.

Unlike C<ev_feed_event>, this call is safe to do from other threads,
signal or similar contexts (see the discussion of C<EV_ATOMIC_T> in the
embedding section below on what exactly this means).

Note that, as with other watchers in libev, multiple events might get
compressed into a single callback invocation (another way to look at
this is that C<ev_async> watchers are level-triggered: they are set on
C<ev_async_send>, reset when the event loop detects that).

This call incurs the overhead of at most one extra system call per event
loop iteration, if the event loop is blocked, and no syscall at all if
the event loop (or your program) is processing events. That means that
repeated calls are basically free (there is no need to avoid calls for
performance reasons) and that the overhead becomes smaller (typically
zero) under load.

=item bool = ev_async_pending (ev_async *)

Returns a non-zero value when C<ev_async_send> has been called on the
watcher but the event has not yet been processed (or even noted) by the
event loop.

C<ev_async_send> sets a flag in the watcher and wakes up the loop. When
the loop iterates next and checks for the watcher to have become active,
it will reset the flag again. C<ev_async_pending> can be used to very
quickly check whether invoking the loop might be a good idea.

Not that this does I<not> check whether the watcher itself is pending,
only whether it has been requested to make this watcher pending: there
is a time window between the event loop checking and resetting the async
notification, and the callback being invoked.

=back


=head1 OTHER FUNCTIONS

There are some other functions of possible interest. Described. Here. Now.

=over 4

=item ev_once (loop, int fd, int events, ev_tstamp timeout, callback)

This function combines a simple timer and an I/O watcher, calls your
callback on whichever event happens first and automatically stops both
watchers. This is useful if you want to wait for a single event on an fd
or timeout without having to allocate/configure/start/stop/free one or
more watchers yourself.

If C<fd> is less than 0, then no I/O watcher will be started and the
C<events> argument is being ignored. Otherwise, an C<ev_io> watcher for
the given C<fd> and C<events> set will be created and started.

If C<timeout> is less than 0, then no timeout watcher will be
started. Otherwise an C<ev_timer> watcher with after = C<timeout> (and
repeat = 0) will be started. C<0> is a valid timeout.

The callback has the type C<void (*cb)(int revents, void *arg)> and is
passed an C<revents> set like normal event callbacks (a combination of
C<EV_ERROR>, C<EV_READ>, C<EV_WRITE> or C<EV_TIMER>) and the C<arg>
value passed to C<ev_once>. Note that it is possible to receive I<both>
a timeout and an io event at the same time - you probably should give io
events precedence.

Example: wait up to ten seconds for data to appear on STDIN_FILENO.

   static void stdin_ready (int revents, void *arg)
   {
     if (revents & EV_READ)
       /* stdin might have data for us, joy! */;
     else if (revents & EV_TIMER)
       /* doh, nothing entered */;
   }

   ev_once (STDIN_FILENO, EV_READ, 10., stdin_ready, 0);

=item ev_feed_fd_event (loop, int fd, int revents)

Feed an event on the given fd, as if a file descriptor backend detected
the given events.

=item ev_feed_signal_event (loop, int signum)

Feed an event as if the given signal occurred. See also C<ev_feed_signal>,
which is async-safe.

=back


=head1 COMMON OR USEFUL IDIOMS (OR BOTH)

This section explains some common idioms that are not immediately
obvious. Note that examples are sprinkled over the whole manual, and this
section only contains stuff that wouldn't fit anywhere else.

=head2 ASSOCIATING CUSTOM DATA WITH A WATCHER

Each watcher has, by default, a C<void *data> member that you can read
or modify at any time: libev will completely ignore it. This can be used
to associate arbitrary data with your watcher. If you need more data and
don't want to allocate memory separately and store a pointer to it in that
data member, you can also "subclass" the watcher type and provide your own
data:

   struct my_io
   {
     ev_io io;
     int otherfd;
     void *somedata;
     struct whatever *mostinteresting;
   };

   ...
   struct my_io w;
   ev_io_init (&w.io, my_cb, fd, EV_READ);

And since your callback will be called with a pointer to the watcher, you
can cast it back to your own type:

   static void my_cb (struct ev_loop *loop, ev_io *w_, int revents)
   {
     struct my_io *w = (struct my_io *)w_;
     ...
   }

More interesting and less C-conformant ways of casting your callback
function type instead have been omitted.

=head2 BUILDING YOUR OWN COMPOSITE WATCHERS

Another common scenario is to use some data structure with multiple
embedded watchers, in effect creating your own watcher that combines
multiple libev event sources into one "super-watcher":

   struct my_biggy
   {
     int some_data;
     ev_timer t1;
     ev_timer t2;
   }

In this case getting the pointer to C<my_biggy> is a bit more
complicated: Either you store the address of your C<my_biggy> struct in
the C<data> member of the watcher (for woozies or C++ coders), or you need
to use some pointer arithmetic using C<offsetof> inside your watchers (for
real programmers):

   #include <stddef.h>

   static void
   t1_cb (EV_P_ ev_timer *w, int revents)
   {
     struct my_biggy big = (struct my_biggy *)
       (((char *)w) - offsetof (struct my_biggy, t1));
   }

   static void
   t2_cb (EV_P_ ev_timer *w, int revents)
   {
     struct my_biggy big = (struct my_biggy *)
       (((char *)w) - offsetof (struct my_biggy, t2));
   }

=head2 AVOIDING FINISHING BEFORE RETURNING

Often you have structures like this in event-based programs:

  callback ()
  {
    free (request);
  }

  request = start_new_request (..., callback);

The intent is to start some "lengthy" operation. The C<request> could be
used to cancel the operation, or do other things with it.

It's not uncommon to have code paths in C<start_new_request> that
immediately invoke the callback, for example, to report errors. Or you add
some caching layer that finds that it can skip the lengthy aspects of the
operation and simply invoke the callback with the result.

The problem here is that this will happen I<before> C<start_new_request>
has returned, so C<request> is not set.

Even if you pass the request by some safer means to the callback, you
might want to do something to the request after starting it, such as
canceling it, which probably isn't working so well when the callback has
already been invoked.

A common way around all these issues is to make sure that
C<start_new_request> I<always> returns before the callback is invoked. If
C<start_new_request> immediately knows the result, it can artificially
delay invoking the callback by using a C<prepare> or C<idle> watcher for
example, or more sneakily, by reusing an existing (stopped) watcher and
pushing it into the pending queue:

   ev_set_cb (watcher, callback);
   ev_feed_event (EV_A_ watcher, 0);

This way, C<start_new_request> can safely return before the callback is
invoked, while not delaying callback invocation too much.

=head2 MODEL/NESTED EVENT LOOP INVOCATIONS AND EXIT CONDITIONS

Often (especially in GUI toolkits) there are places where you have
I<modal> interaction, which is most easily implemented by recursively
invoking C<ev_run>.

This brings the problem of exiting - a callback might want to finish the
main C<ev_run> call, but not the nested one (e.g. user clicked "Quit", but
a modal "Are you sure?" dialog is still waiting), or just the nested one
and not the main one (e.g. user clocked "Ok" in a modal dialog), or some
other combination: In these cases, a simple C<ev_break> will not work.

The solution is to maintain "break this loop" variable for each C<ev_run>
invocation, and use a loop around C<ev_run> until the condition is
triggered, using C<EVRUN_ONCE>:

   // main loop
   int exit_main_loop = 0;

   while (!exit_main_loop)
     ev_run (EV_DEFAULT_ EVRUN_ONCE);

   // in a modal watcher
   int exit_nested_loop = 0;

   while (!exit_nested_loop)
     ev_run (EV_A_ EVRUN_ONCE);

To exit from any of these loops, just set the corresponding exit variable:

   // exit modal loop
   exit_nested_loop = 1;

   // exit main program, after modal loop is finished
   exit_main_loop = 1;

   // exit both
   exit_main_loop = exit_nested_loop = 1;

=head2 THREAD LOCKING EXAMPLE

Here is a fictitious example of how to run an event loop in a different
thread from where callbacks are being invoked and watchers are
created/added/removed.

For a real-world example, see the C<EV::Loop::Async> perl module,
which uses exactly this technique (which is suited for many high-level
languages).

The example uses a pthread mutex to protect the loop data, a condition
variable to wait for callback invocations, an async watcher to notify the
event loop thread and an unspecified mechanism to wake up the main thread.

First, you need to associate some data with the event loop:

   typedef struct {
     mutex_t lock; /* global loop lock */
     ev_async async_w;
     thread_t tid;
     cond_t invoke_cv;
   } userdata;

   void prepare_loop (EV_P)
   {
      // for simplicity, we use a static userdata struct.
      static userdata u;

      ev_async_init (&u->async_w, async_cb);
      ev_async_start (EV_A_ &u->async_w);

      pthread_mutex_init (&u->lock, 0);
      pthread_cond_init (&u->invoke_cv, 0);

      // now associate this with the loop
      ev_set_userdata (EV_A_ u);
      ev_set_invoke_pending_cb (EV_A_ l_invoke);
      ev_set_loop_release_cb (EV_A_ l_release, l_acquire);

      // then create the thread running ev_run
      pthread_create (&u->tid, 0, l_run, EV_A);
   }

The callback for the C<ev_async> watcher does nothing: the watcher is used
solely to wake up the event loop so it takes notice of any new watchers
that might have been added:

   static void
   async_cb (EV_P_ ev_async *w, int revents)
   {
      // just used for the side effects
   }

The C<l_release> and C<l_acquire> callbacks simply unlock/lock the mutex
protecting the loop data, respectively.

   static void
   l_release (EV_P)
   {
     userdata *u = ev_userdata (EV_A);
     pthread_mutex_unlock (&u->lock);
   }

   static void
   l_acquire (EV_P)
   {
     userdata *u = ev_userdata (EV_A);
     pthread_mutex_lock (&u->lock);
   }

The event loop thread first acquires the mutex, and then jumps straight
into C<ev_run>:

   void *
   l_run (void *thr_arg)
   {
     struct ev_loop *loop = (struct ev_loop *)thr_arg;

     l_acquire (EV_A);
     pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS, 0);
     ev_run (EV_A_ 0);
     l_release (EV_A);

     return 0;
   }

Instead of invoking all pending watchers, the C<l_invoke> callback will
signal the main thread via some unspecified mechanism (signals? pipe
writes? C<Async::Interrupt>?) and then waits until all pending watchers
have been called (in a while loop because a) spurious wakeups are possible
and b) skipping inter-thread-communication when there are no pending
watchers is very beneficial):

   static void
   l_invoke (EV_P)
   {
     userdata *u = ev_userdata (EV_A);

     while (ev_pending_count (EV_A))
       {
         wake_up_other_thread_in_some_magic_or_not_so_magic_way ();
         pthread_cond_wait (&u->invoke_cv, &u->lock);
       }
   }

Now, whenever the main thread gets told to invoke pending watchers, it
will grab the lock, call C<ev_invoke_pending> and then signal the loop
thread to continue:

   static void
   real_invoke_pending (EV_P)
   {
     userdata *u = ev_userdata (EV_A);

     pthread_mutex_lock (&u->lock);
     ev_invoke_pending (EV_A);
     pthread_cond_signal (&u->invoke_cv);
     pthread_mutex_unlock (&u->lock);
   }

Whenever you want to start/stop a watcher or do other modifications to an
event loop, you will now have to lock:

   ev_timer timeout_watcher;
   userdata *u = ev_userdata (EV_A);

   ev_timer_init (&timeout_watcher, timeout_cb, 5.5, 0.);

   pthread_mutex_lock (&u->lock);
   ev_timer_start (EV_A_ &timeout_watcher);
   ev_async_send (EV_A_ &u->async_w);
   pthread_mutex_unlock (&u->lock);

Note that sending the C<ev_async> watcher is required because otherwise
an event loop currently blocking in the kernel will have no knowledge
about the newly added timer. By waking up the loop it will pick up any new
watchers in the next event loop iteration.

=head2 THREADS, COROUTINES, CONTINUATIONS, QUEUES... INSTEAD OF CALLBACKS

While the overhead of a callback that e.g. schedules a thread is small, it
is still an overhead. If you embed libev, and your main usage is with some
kind of threads or coroutines, you might want to customise libev so that
doesn't need callbacks anymore.

Imagine you have coroutines that you can switch to using a function
C<switch_to (coro)>, that libev runs in a coroutine called C<libev_coro>
and that due to some magic, the currently active coroutine is stored in a
global called C<current_coro>. Then you can build your own "wait for libev
event" primitive by changing C<EV_CB_DECLARE> and C<EV_CB_INVOKE> (note
the differing C<;> conventions):

   #define EV_CB_DECLARE(type)   struct my_coro *cb;
   #define EV_CB_INVOKE(watcher) switch_to ((watcher)->cb)

That means instead of having a C callback function, you store the
coroutine to switch to in each watcher, and instead of having libev call
your callback, you instead have it switch to that coroutine.

A coroutine might now wait for an event with a function called
C<wait_for_event>. (the watcher needs to be started, as always, but it doesn't
matter when, or whether the watcher is active or not when this function is
called):

   void
   wait_for_event (ev_watcher *w)
   {
     ev_set_cb (w, current_coro);
     switch_to (libev_coro);
   }

That basically suspends the coroutine inside C<wait_for_event> and
continues the libev coroutine, which, when appropriate, switches back to
this or any other coroutine.

You can do similar tricks if you have, say, threads with an event queue -
instead of storing a coroutine, you store the queue object and instead of
switching to a coroutine, you push the watcher onto the queue and notify
any waiters.

To embed libev, see L</EMBEDDING>, but in short, it's easiest to create two
files, F<my_ev.h> and F<my_ev.c> that include the respective libev files:

   // my_ev.h
   #define EV_CB_DECLARE(type)   struct my_coro *cb;
   #define EV_CB_INVOKE(watcher) switch_to ((watcher)->cb)
   #include "../libev/ev.h"

   // my_ev.c
   #define EV_H "my_ev.h"
   #include "../libev/ev.c"

And then use F<my_ev.h> when you would normally use F<ev.h>, and compile
F<my_ev.c> into your project. When properly specifying include paths, you
can even use F<ev.h> as header file name directly.


=head1 LIBEVENT EMULATION

Libev offers a compatibility emulation layer for libevent. It cannot
emulate the internals of libevent, so here are some usage hints:

=over 4

=item * Only the libevent-1.4.1-beta API is being emulated.

This was the newest libevent version available when libev was implemented,
and is still mostly unchanged in 2010.

=item * Use it by including <event.h>, as usual.

=item * The following members are fully supported: ev_base, ev_callback,
ev_arg, ev_fd, ev_res, ev_events.

=item * Avoid using ev_flags and the EVLIST_*-macros, while it is
maintained by libev, it does not work exactly the same way as in libevent (consider
it a private API).

=item * Priorities are not currently supported. Initialising priorities
will fail and all watchers will have the same priority, even though there
is an ev_pri field.

=item * In libevent, the last base created gets the signals, in libev, the
base that registered the signal gets the signals.

=item * Other members are not supported.

=item * The libev emulation is I<not> ABI compatible to libevent, you need
to use the libev header file and library.

=back

=head1 C++ SUPPORT

=head2 C API

The normal C API should work fine when used from C++: both ev.h and the
libev sources can be compiled as C++. Therefore, code that uses the C API
will work fine.

Proper exception specifications might have to be added to callbacks passed
to libev: exceptions may be thrown only from watcher callbacks, all
other callbacks (allocator, syserr, loop acquire/release and periodic
reschedule callbacks) must not throw exceptions, and might need a C<throw
()> specification. If you have code that needs to be compiled as both C
and C++ you can use the C<EV_THROW> macro for this:

   static void
   fatal_error (const char *msg) EV_THROW
   {
     perror (msg);
     abort ();
   }

   ...
   ev_set_syserr_cb (fatal_error);

The only API functions that can currently throw exceptions are C<ev_run>,
C<ev_invoke>, C<ev_invoke_pending> and C<ev_loop_destroy> (the latter
because it runs cleanup watchers).

Throwing exceptions in watcher callbacks is only supported if libev itself
is compiled with a C++ compiler or your C and C++ environments allow
throwing exceptions through C libraries (most do).

=head2 C++ API

Libev comes with some simplistic wrapper classes for C++ that mainly allow
you to use some convenience methods to start/stop watchers and also change
the callback model to a model using method callbacks on objects.

To use it,

   #include <ev++.h>

This automatically includes F<ev.h> and puts all of its definitions (many
of them macros) into the global namespace. All C++ specific things are
put into the C<ev> namespace. It should support all the same embedding
options as F<ev.h>, most notably C<EV_MULTIPLICITY>.

Care has been taken to keep the overhead low. The only data member the C++
classes add (compared to plain C-style watchers) is the event loop pointer
that the watcher is associated with (or no additional members at all if
you disable C<EV_MULTIPLICITY> when embedding libev).

Currently, functions, static and non-static member functions and classes
with C<operator ()> can be used as callbacks. Other types should be easy
to add as long as they only need one additional pointer for context. If
you need support for other types of functors please contact the author
(preferably after implementing it).

For all this to work, your C++ compiler either has to use the same calling
conventions as your C compiler (for static member functions), or you have
to embed libev and compile libev itself as C++.

Here is a list of things available in the C<ev> namespace:

=over 4

=item C<ev::READ>, C<ev::WRITE> etc.

These are just enum values with the same values as the C<EV_READ> etc.
macros from F<ev.h>.

=item C<ev::tstamp>, C<ev::now>

Aliases to the same types/functions as with the C<ev_> prefix.

=item C<ev::io>, C<ev::timer>, C<ev::periodic>, C<ev::idle>, C<ev::sig> etc.

For each C<ev_TYPE> watcher in F<ev.h> there is a corresponding class of
the same name in the C<ev> namespace, with the exception of C<ev_signal>
which is called C<ev::sig> to avoid clashes with the C<signal> macro
defined by many implementations.

All of those classes have these methods:

=over 4

=item ev::TYPE::TYPE ()

=item ev::TYPE::TYPE (loop)

=item ev::TYPE::~TYPE

The constructor (optionally) takes an event loop to associate the watcher
with. If it is omitted, it will use C<EV_DEFAULT>.

The constructor calls C<ev_init> for you, which means you have to call the
C<set> method before starting it.

It will not set a callback, however: You have to call the templated C<set>
method to set a callback before you can start the watcher.

(The reason why you have to use a method is a limitation in C++ which does
not allow explicit template arguments for constructors).

The destructor automatically stops the watcher if it is active.

=item w->set<class, &class::method> (object *)

This method sets the callback method to call. The method has to have a
signature of C<void (*)(ev_TYPE &, int)>, it receives the watcher as
first argument and the C<revents> as second. The object must be given as
parameter and is stored in the C<data> member of the watcher.

This method synthesizes efficient thunking code to call your method from
the C callback that libev requires. If your compiler can inline your
callback (i.e. it is visible to it at the place of the C<set> call and
your compiler is good :), then the method will be fully inlined into the
thunking function, making it as fast as a direct C callback.

Example: simple class declaration and watcher initialisation

   struct myclass
   {
     void io_cb (ev::io &w, int revents) { }
   }

   myclass obj;
   ev::io iow;
   iow.set <myclass, &myclass::io_cb> (&obj);

=item w->set (object *)

This is a variation of a method callback - leaving out the method to call
will default the method to C<operator ()>, which makes it possible to use
functor objects without having to manually specify the C<operator ()> all
the time. Incidentally, you can then also leave out the template argument
list.

The C<operator ()> method prototype must be C<void operator ()(watcher &w,
int revents)>.

See the method-C<set> above for more details.

Example: use a functor object as callback.

   struct myfunctor
   {
     void operator() (ev::io &w, int revents)
     {
       ...
     }
   }

   myfunctor f;

   ev::io w;
   w.set (&f);

=item w->set<function> (void *data = 0)

Also sets a callback, but uses a static method or plain function as
callback. The optional C<data> argument will be stored in the watcher's
C<data> member and is free for you to use.

The prototype of the C<function> must be C<void (*)(ev::TYPE &w, int)>.

See the method-C<set> above for more details.

Example: Use a plain function as callback.

   static void io_cb (ev::io &w, int revents) { }
   iow.set <io_cb> ();

=item w->set (loop)

Associates a different C<struct ev_loop> with this watcher. You can only
do this when the watcher is inactive (and not pending either).

=item w->set ([arguments])

Basically the same as C<ev_TYPE_set> (except for C<ev::embed> watchers>),
with the same arguments. Either this method or a suitable start method
must be called at least once. Unlike the C counterpart, an active watcher
gets automatically stopped and restarted when reconfiguring it with this
method.

For C<ev::embed> watchers this method is called C<set_embed>, to avoid
clashing with the C<set (loop)> method.

=item w->start ()

Starts the watcher. Note that there is no C<loop> argument, as the
constructor already stores the event loop.

=item w->start ([arguments])

Instead of calling C<set> and C<start> methods separately, it is often
convenient to wrap them in one call. Uses the same type of arguments as
the configure C<set> method of the watcher.

=item w->stop ()

Stops the watcher if it is active. Again, no C<loop> argument.

=item w->again () (C<ev::timer>, C<ev::periodic> only)

For C<ev::timer> and C<ev::periodic>, this invokes the corresponding
C<ev_TYPE_again> function.

=item w->sweep () (C<ev::embed> only)

Invokes C<ev_embed_sweep>.

=item w->update () (C<ev::stat> only)

Invokes C<ev_stat_stat>.

=back

=back

Example: Define a class with two I/O and idle watchers, start the I/O
watchers in the constructor.

   class myclass
   {
     ev::io   io  ; void io_cb   (ev::io   &w, int revents);
     ev::io   io2 ; void io2_cb  (ev::io   &w, int revents);
     ev::idle idle; void idle_cb (ev::idle &w, int revents);

     myclass (int fd)
     {
       io  .set <myclass, &myclass::io_cb  > (this);
       io2 .set <myclass, &myclass::io2_cb > (this);
       idle.set <myclass, &myclass::idle_cb> (this);

       io.set (fd, ev::WRITE); // configure the watcher
       io.start ();            // start it whenever convenient

       io2.start (fd, ev::READ); // set + start in one call
     }
   };


=head1 OTHER LANGUAGE BINDINGS

Libev does not offer other language bindings itself, but bindings for a
number of languages exist in the form of third-party packages. If you know
any interesting language binding in addition to the ones listed here, drop
me a note.

=over 4

=item Perl

The EV module implements the full libev API and is actually used to test
libev. EV is developed together with libev. Apart from the EV core module,
there are additional modules that implement libev-compatible interfaces
to C<libadns> (C<EV::ADNS>, but C<AnyEvent::DNS> is preferred nowadays),
C<Net::SNMP> (C<Net::SNMP::EV>) and the C<libglib> event core (C<Glib::EV>
and C<EV::Glib>).

It can be found and installed via CPAN, its homepage is at
L<http://software.schmorp.de/pkg/EV>.

=item Python

Python bindings can be found at L<http://code.google.com/p/pyev/>. It
seems to be quite complete and well-documented.

=item Ruby

Tony Arcieri has written a ruby extension that offers access to a subset
of the libev API and adds file handle abstractions, asynchronous DNS and
more on top of it. It can be found via gem servers. Its homepage is at
L<http://rev.rubyforge.org/>.

Roger Pack reports that using the link order C<-lws2_32 -lmsvcrt-ruby-190>
makes rev work even on mingw.

=item Haskell

A haskell binding to libev is available at
L<http://hackage.haskell.org/cgi-bin/hackage-scripts/package/hlibev>.

=item D

Leandro Lucarella has written a D language binding (F<ev.d>) for libev, to
be found at L<http://www.llucax.com.ar/proj/ev.d/index.html>.

=item Ocaml

Erkki Seppala has written Ocaml bindings for libev, to be found at
L<http://modeemi.cs.tut.fi/~flux/software/ocaml-ev/>.

=item Lua

Brian Maher has written a partial interface to libev for lua (at the
time of this writing, only C<ev_io> and C<ev_timer>), to be found at
L<http://github.com/brimworks/lua-ev>.

=item Javascript

Node.js (L<http://nodejs.org>) uses libev as the underlying event library.

=item Others

There are others, and I stopped counting.

=back


=head1 MACRO MAGIC

Libev can be compiled with a variety of options, the most fundamental
of which is C<EV_MULTIPLICITY>. This option determines whether (most)
functions and callbacks have an initial C<struct ev_loop *> argument.

To make it easier to write programs that cope with either variant, the
following macros are defined:

=over 4

=item C<EV_A>, C<EV_A_>

This provides the loop I<argument> for functions, if one is required ("ev
loop argument"). The C<EV_A> form is used when this is the sole argument,
C<EV_A_> is used when other arguments are following. Example:

   ev_unref (EV_A);
   ev_timer_add (EV_A_ watcher);
   ev_run (EV_A_ 0);

It assumes the variable C<loop> of type C<struct ev_loop *> is in scope,
which is often provided by the following macro.

=item C<EV_P>, C<EV_P_>

This provides the loop I<parameter> for functions, if one is required ("ev
loop parameter"). The C<EV_P> form is used when this is the sole parameter,
C<EV_P_> is used when other parameters are following. Example:

   // this is how ev_unref is being declared
   static void ev_unref (EV_P);

   // this is how you can declare your typical callback
   static void cb (EV_P_ ev_timer *w, int revents)

It declares a parameter C<loop> of type C<struct ev_loop *>, quite
suitable for use with C<EV_A>.

=item C<EV_DEFAULT>, C<EV_DEFAULT_>

Similar to the other two macros, this gives you the value of the default
loop, if multiple loops are supported ("ev loop default"). The default loop
will be initialised if it isn't already initialised.

For non-multiplicity builds, these macros do nothing, so you always have
to initialise the loop somewhere.

=item C<EV_DEFAULT_UC>, C<EV_DEFAULT_UC_>

Usage identical to C<EV_DEFAULT> and C<EV_DEFAULT_>, but requires that the
default loop has been initialised (C<UC> == unchecked). Their behaviour
is undefined when the default loop has not been initialised by a previous
execution of C<EV_DEFAULT>, C<EV_DEFAULT_> or C<ev_default_init (...)>.

It is often prudent to use C<EV_DEFAULT> when initialising the first
watcher in a function but use C<EV_DEFAULT_UC> afterwards.

=back

Example: Declare and initialise a check watcher, utilising the above
macros so it will work regardless of whether multiple loops are supported
or not.

   static void
   check_cb (EV_P_ ev_timer *w, int revents)
   {
     ev_check_stop (EV_A_ w);
   }

   ev_check check;
   ev_check_init (&check, check_cb);
   ev_check_start (EV_DEFAULT_ &check);
   ev_run (EV_DEFAULT_ 0);

=head1 EMBEDDING

Libev can (and often is) directly embedded into host
applications. Examples of applications that embed it include the Deliantra
Game Server, the EV perl module, the GNU Virtual Private Ethernet (gvpe)
and rxvt-unicode.

The goal is to enable you to just copy the necessary files into your
source directory without having to change even a single line in them, so
you can easily upgrade by simply copying (or having a checked-out copy of
libev somewhere in your source tree).

=head2 FILESETS

Depending on what features you need you need to include one or more sets of files
in your application.

=head3 CORE EVENT LOOP

To include only the libev core (all the C<ev_*> functions), with manual
configuration (no autoconf):

   #define EV_STANDALONE 1
   #include "ev.c"

This will automatically include F<ev.h>, too, and should be done in a
single C source file only to provide the function implementations. To use
it, do the same for F<ev.h> in all files wishing to use this API (best
done by writing a wrapper around F<ev.h> that you can include instead and
where you can put other configuration options):

   #define EV_STANDALONE 1
   #include "ev.h"

Both header files and implementation files can be compiled with a C++
compiler (at least, that's a stated goal, and breakage will be treated
as a bug).

You need the following files in your source tree, or in a directory
in your include path (e.g. in libev/ when using -Ilibev):

   ev.h
   ev.c
   ev_vars.h
   ev_wrap.h

   ev_win32.c      required on win32 platforms only

   ev_select.c     only when select backend is enabled
   ev_poll.c       only when poll backend is enabled
   ev_epoll.c      only when the epoll backend is enabled
   ev_kqueue.c     only when the kqueue backend is enabled
   ev_port.c       only when the solaris port backend is enabled

F<ev.c> includes the backend files directly when enabled, so you only need
to compile this single file.

=head3 LIBEVENT COMPATIBILITY API

To include the libevent compatibility API, also include:

   #include "event.c"

in the file including F<ev.c>, and:

   #include "event.h"

in the files that want to use the libevent API. This also includes F<ev.h>.

You need the following additional files for this:

   event.h
   event.c

=head3 AUTOCONF SUPPORT

Instead of using C<EV_STANDALONE=1> and providing your configuration in
whatever way you want, you can also C<m4_include([libev.m4])> in your
F<configure.ac> and leave C<EV_STANDALONE> undefined. F<ev.c> will then
include F<config.h> and configure itself accordingly.

For this of course you need the m4 file:

   libev.m4

=head2 PREPROCESSOR SYMBOLS/MACROS

Libev can be configured via a variety of preprocessor symbols you have to
define before including (or compiling) any of its files. The default in
the absence of autoconf is documented for every option.

Symbols marked with "(h)" do not change the ABI, and can have different
values when compiling libev vs. including F<ev.h>, so it is permissible
to redefine them before including F<ev.h> without breaking compatibility
to a compiled library. All other symbols change the ABI, which means all
users of libev and the libev code itself must be compiled with compatible
settings.

=over 4

=item EV_COMPAT3 (h)

Backwards compatibility is a major concern for libev. This is why this
release of libev comes with wrappers for the functions and symbols that
have been renamed between libev version 3 and 4.

You can disable these wrappers (to test compatibility with future
versions) by defining C<EV_COMPAT3> to C<0> when compiling your
sources. This has the additional advantage that you can drop the C<struct>
from C<struct ev_loop> declarations, as libev will provide an C<ev_loop>
typedef in that case.

In some future version, the default for C<EV_COMPAT3> will become C<0>,
and in some even more future version the compatibility code will be
removed completely.

=item EV_STANDALONE (h)

Must always be C<1> if you do not use autoconf configuration, which
keeps libev from including F<config.h>, and it also defines dummy
implementations for some libevent functions (such as logging, which is not
supported). It will also not define any of the structs usually found in
F<event.h> that are not directly supported by the libev core alone.

In standalone mode, libev will still try to automatically deduce the
configuration, but has to be more conservative.

=item EV_USE_FLOOR

If defined to be C<1>, libev will use the C<floor ()> function for its
periodic reschedule calculations, otherwise libev will fall back on a
portable (slower) implementation. If you enable this, you usually have to
link against libm or something equivalent. Enabling this when the C<floor>
function is not available will fail, so the safe default is to not enable
this.

=item EV_USE_MONOTONIC

If defined to be C<1>, libev will try to detect the availability of the
monotonic clock option at both compile time and runtime. Otherwise no
use of the monotonic clock option will be attempted. If you enable this,
you usually have to link against librt or something similar. Enabling it
when the functionality isn't available is safe, though, although you have
to make sure you link against any libraries where the C<clock_gettime>
function is hiding in (often F<-lrt>). See also C<EV_USE_CLOCK_SYSCALL>.

=item EV_USE_REALTIME

If defined to be C<1>, libev will try to detect the availability of the
real-time clock option at compile time (and assume its availability
at runtime if successful). Otherwise no use of the real-time clock
option will be attempted. This effectively replaces C<gettimeofday>
by C<clock_get (CLOCK_REALTIME, ...)> and will not normally affect
correctness. See the note about libraries in the description of
C<EV_USE_MONOTONIC>, though. Defaults to the opposite value of
C<EV_USE_CLOCK_SYSCALL>.

=item EV_USE_CLOCK_SYSCALL

If defined to be C<1>, libev will try to use a direct syscall instead
of calling the system-provided C<clock_gettime> function. This option
exists because on GNU/Linux, C<clock_gettime> is in C<librt>, but C<librt>
unconditionally pulls in C<libpthread>, slowing down single-threaded
programs needlessly. Using a direct syscall is slightly slower (in
theory), because no optimised vdso implementation can be used, but avoids
the pthread dependency. Defaults to C<1> on GNU/Linux with glibc 2.x or
higher, as it simplifies linking (no need for C<-lrt>).

=item EV_USE_NANOSLEEP

If defined to be C<1>, libev will assume that C<nanosleep ()> is available
and will use it for delays. Otherwise it will use C<select ()>.

=item EV_USE_EVENTFD

If defined to be C<1>, then libev will assume that C<eventfd ()> is
available and will probe for kernel support at runtime. This will improve
C<ev_signal> and C<ev_async> performance and reduce resource consumption.
If undefined, it will be enabled if the headers indicate GNU/Linux + Glibc
2.7 or newer, otherwise disabled.

=item EV_USE_SELECT

If undefined or defined to be C<1>, libev will compile in support for the
C<select>(2) backend. No attempt at auto-detection will be done: if no
other method takes over, select will be it. Otherwise the select backend
will not be compiled in.

=item EV_SELECT_USE_FD_SET

If defined to C<1>, then the select backend will use the system C<fd_set>
structure. This is useful if libev doesn't compile due to a missing
C<NFDBITS> or C<fd_mask> definition or it mis-guesses the bitset layout
on exotic systems. This usually limits the range of file descriptors to
some low limit such as 1024 or might have other limitations (winsocket
only allows 64 sockets). The C<FD_SETSIZE> macro, set before compilation,
configures the maximum size of the C<fd_set>.

=item EV_SELECT_IS_WINSOCKET

When defined to C<1>, the select backend will assume that
select/socket/connect etc. don't understand file descriptors but
wants osf handles on win32 (this is the case when the select to
be used is the winsock select). This means that it will call
C<_get_osfhandle> on the fd to convert it to an OS handle. Otherwise,
it is assumed that all these functions actually work on fds, even
on win32. Should not be defined on non-win32 platforms.

=item EV_FD_TO_WIN32_HANDLE(fd)

If C<EV_SELECT_IS_WINSOCKET> is enabled, then libev needs a way to map
file descriptors to socket handles. When not defining this symbol (the
default), then libev will call C<_get_osfhandle>, which is usually
correct. In some cases, programs use their own file descriptor management,
in which case they can provide this function to map fds to socket handles.

=item EV_WIN32_HANDLE_TO_FD(handle)

If C<EV_SELECT_IS_WINSOCKET> then libev maps handles to file descriptors
using the standard C<_open_osfhandle> function. For programs implementing
their own fd to handle mapping, overwriting this function makes it easier
to do so. This can be done by defining this macro to an appropriate value.

=item EV_WIN32_CLOSE_FD(fd)

If programs implement their own fd to handle mapping on win32, then this
macro can be used to override the C<close> function, useful to unregister
file descriptors again. Note that the replacement function has to close
the underlying OS handle.

=item EV_USE_WSASOCKET

If defined to be C<1>, libev will use C<WSASocket> to create its internal
communication socket, which works better in some environments. Otherwise,
the normal C<socket> function will be used, which works better in other
environments.

=item EV_USE_POLL

If defined to be C<1>, libev will compile in support for the C<poll>(2)
backend. Otherwise it will be enabled on non-win32 platforms. It
takes precedence over select.

=item EV_USE_EPOLL

If defined to be C<1>, libev will compile in support for the Linux
C<epoll>(7) backend. Its availability will be detected at runtime,
otherwise another method will be used as fallback. This is the preferred
backend for GNU/Linux systems. If undefined, it will be enabled if the
headers indicate GNU/Linux + Glibc 2.4 or newer, otherwise disabled.

=item EV_USE_KQUEUE

If defined to be C<1>, libev will compile in support for the BSD style
C<kqueue>(2) backend. Its actual availability will be detected at runtime,
otherwise another method will be used as fallback. This is the preferred
backend for BSD and BSD-like systems, although on most BSDs kqueue only
supports some types of fds correctly (the only platform we found that
supports ptys for example was NetBSD), so kqueue might be compiled in, but
not be used unless explicitly requested. The best way to use it is to find
out whether kqueue supports your type of fd properly and use an embedded
kqueue loop.

=item EV_USE_PORT

If defined to be C<1>, libev will compile in support for the Solaris
10 port style backend. Its availability will be detected at runtime,
otherwise another method will be used as fallback. This is the preferred
backend for Solaris 10 systems.

=item EV_USE_DEVPOLL

Reserved for future expansion, works like the USE symbols above.

=item EV_USE_INOTIFY

If defined to be C<1>, libev will compile in support for the Linux inotify
interface to speed up C<ev_stat> watchers. Its actual availability will
be detected at runtime. If undefined, it will be enabled if the headers
indicate GNU/Linux + Glibc 2.4 or newer, otherwise disabled.

=item EV_NO_SMP

If defined to be C<1>, libev will assume that memory is always coherent
between threads, that is, threads can be used, but threads never run on
different cpus (or different cpu cores). This reduces dependencies
and makes libev faster.

=item EV_NO_THREADS

If defined to be C<1>, libev will assume that it will never be called from
different threads (that includes signal handlers), which is a stronger
assumption than C<EV_NO_SMP>, above. This reduces dependencies and makes
libev faster.

=item EV_ATOMIC_T

Libev requires an integer type (suitable for storing C<0> or C<1>) whose
access is atomic with respect to other threads or signal contexts. No
such type is easily found in the C language, so you can provide your own
type that you know is safe for your purposes. It is used both for signal
handler "locking" as well as for signal and thread safety in C<ev_async>
watchers.

In the absence of this define, libev will use C<sig_atomic_t volatile>
(from F<signal.h>), which is usually good enough on most platforms.

=item EV_H (h)

The name of the F<ev.h> header file used to include it. The default if
undefined is C<"ev.h"> in F<event.h>, F<ev.c> and F<ev++.h>. This can be
used to virtually rename the F<ev.h> header file in case of conflicts.

=item EV_CONFIG_H (h)

If C<EV_STANDALONE> isn't C<1>, this variable can be used to override
F<ev.c>'s idea of where to find the F<config.h> file, similarly to
C<EV_H>, above.

=item EV_EVENT_H (h)

Similarly to C<EV_H>, this macro can be used to override F<event.c>'s idea
of how the F<event.h> header can be found, the default is C<"event.h">.

=item EV_PROTOTYPES (h)

If defined to be C<0>, then F<ev.h> will not define any function
prototypes, but still define all the structs and other symbols. This is
occasionally useful if you want to provide your own wrapper functions
around libev functions.

=item EV_MULTIPLICITY

If undefined or defined to C<1>, then all event-loop-specific functions
will have the C<struct ev_loop *> as first argument, and you can create
additional independent event loops. Otherwise there will be no support
for multiple event loops and there is no first event loop pointer
argument. Instead, all functions act on the single default loop.

Note that C<EV_DEFAULT> and C<EV_DEFAULT_> will no longer provide a
default loop when multiplicity is switched off - you always have to
initialise the loop manually in this case.

=item EV_MINPRI

=item EV_MAXPRI

The range of allowed priorities. C<EV_MINPRI> must be smaller or equal to
C<EV_MAXPRI>, but otherwise there are no non-obvious limitations. You can
provide for more priorities by overriding those symbols (usually defined
to be C<-2> and C<2>, respectively).

When doing priority-based operations, libev usually has to linearly search
all the priorities, so having many of them (hundreds) uses a lot of space
and time, so using the defaults of five priorities (-2 .. +2) is usually
fine.

If your embedding application does not need any priorities, defining these
both to C<0> will save some memory and CPU.

=item EV_PERIODIC_ENABLE, EV_IDLE_ENABLE, EV_EMBED_ENABLE, EV_STAT_ENABLE,
EV_PREPARE_ENABLE, EV_CHECK_ENABLE, EV_FORK_ENABLE, EV_SIGNAL_ENABLE,
EV_ASYNC_ENABLE, EV_CHILD_ENABLE.

If undefined or defined to be C<1> (and the platform supports it), then
the respective watcher type is supported. If defined to be C<0>, then it
is not. Disabling watcher types mainly saves code size.

=item EV_FEATURES

If you need to shave off some kilobytes of code at the expense of some
speed (but with the full API), you can define this symbol to request
certain subsets of functionality. The default is to enable all features
that can be enabled on the platform.

A typical way to use this symbol is to define it to C<0> (or to a bitset
with some broad features you want) and then selectively re-enable
additional parts you want, for example if you want everything minimal,
but multiple event loop support, async and child watchers and the poll
backend, use this:

   #define EV_FEATURES 0
   #define EV_MULTIPLICITY 1
   #define EV_USE_POLL 1
   #define EV_CHILD_ENABLE 1
   #define EV_ASYNC_ENABLE 1

The actual value is a bitset, it can be a combination of the following
values (by default, all of these are enabled):

=over 4

=item C<1> - faster/larger code

Use larger code to speed up some operations.

Currently this is used to override some inlining decisions (enlarging the
code size by roughly 30% on amd64).

When optimising for size, use of compiler flags such as C<-Os> with
gcc is recommended, as well as C<-DNDEBUG>, as libev contains a number of
assertions.

The default is off when C<__OPTIMIZE_SIZE__> is defined by your compiler
(e.g. gcc with C<-Os>).

=item C<2> - faster/larger data structures

Replaces the small 2-heap for timer management by a faster 4-heap, larger
hash table sizes and so on. This will usually further increase code size
and can additionally have an effect on the size of data structures at
runtime.

The default is off when C<__OPTIMIZE_SIZE__> is defined by your compiler
(e.g. gcc with C<-Os>).

=item C<4> - full API configuration

This enables priorities (sets C<EV_MAXPRI>=2 and C<EV_MINPRI>=-2), and
enables multiplicity (C<EV_MULTIPLICITY>=1).

=item C<8> - full API

This enables a lot of the "lesser used" API functions. See C<ev.h> for
details on which parts of the API are still available without this
feature, and do not complain if this subset changes over time.

=item C<16> - enable all optional watcher types

Enables all optional watcher types.  If you want to selectively enable
only some watcher types other than I/O and timers (e.g. prepare,
embed, async, child...) you can enable them manually by defining
C<EV_watchertype_ENABLE> to C<1> instead.

=item C<32> - enable all backends

This enables all backends - without this feature, you need to enable at
least one backend manually (C<EV_USE_SELECT> is a good choice).

=item C<64> - enable OS-specific "helper" APIs

Enable inotify, eventfd, signalfd and similar OS-specific helper APIs by
default.

=back

Compiling with C<gcc -Os -DEV_STANDALONE -DEV_USE_EPOLL=1 -DEV_FEATURES=0>
reduces the compiled size of libev from 24.7Kb code/2.8Kb data to 6.5Kb
code/0.3Kb data on my GNU/Linux amd64 system, while still giving you I/O
watchers, timers and monotonic clock support.

With an intelligent-enough linker (gcc+binutils are intelligent enough
when you use C<-Wl,--gc-sections -ffunction-sections>) functions unused by
your program might be left out as well - a binary starting a timer and an
I/O watcher then might come out at only 5Kb.

=item EV_API_STATIC

If this symbol is defined (by default it is not), then all identifiers
will have static linkage. This means that libev will not export any
identifiers, and you cannot link against libev anymore. This can be useful
when you embed libev, only want to use libev functions in a single file,
and do not want its identifiers to be visible.

To use this, define C<EV_API_STATIC> and include F<ev.c> in the file that
wants to use libev.

This option only works when libev is compiled with a C compiler, as C++
doesn't support the required declaration syntax.

=item EV_AVOID_STDIO

If this is set to C<1> at compiletime, then libev will avoid using stdio
functions (printf, scanf, perror etc.). This will increase the code size
somewhat, but if your program doesn't otherwise depend on stdio and your
libc allows it, this avoids linking in the stdio library which is quite
big.

Note that error messages might become less precise when this option is
enabled.

=item EV_NSIG

The highest supported signal number, +1 (or, the number of
signals): Normally, libev tries to deduce the maximum number of signals
automatically, but sometimes this fails, in which case it can be
specified. Also, using a lower number than detected (C<32> should be
good for about any system in existence) can save some memory, as libev
statically allocates some 12-24 bytes per signal number.

=item EV_PID_HASHSIZE

C<ev_child> watchers use a small hash table to distribute workload by
pid. The default size is C<16> (or C<1> with C<EV_FEATURES> disabled),
usually more than enough. If you need to manage thousands of children you
might want to increase this value (I<must> be a power of two).

=item EV_INOTIFY_HASHSIZE

C<ev_stat> watchers use a small hash table to distribute workload by
inotify watch id. The default size is C<16> (or C<1> with C<EV_FEATURES>
disabled), usually more than enough. If you need to manage thousands of
C<ev_stat> watchers you might want to increase this value (I<must> be a
power of two).

=item EV_USE_4HEAP

Heaps are not very cache-efficient. To improve the cache-efficiency of the
timer and periodics heaps, libev uses a 4-heap when this symbol is defined
to C<1>. The 4-heap uses more complicated (longer) code but has noticeably
faster performance with many (thousands) of watchers.

The default is C<1>, unless C<EV_FEATURES> overrides it, in which case it
will be C<0>.

=item EV_HEAP_CACHE_AT

Heaps are not very cache-efficient. To improve the cache-efficiency of the
timer and periodics heaps, libev can cache the timestamp (I<at>) within
the heap structure (selected by defining C<EV_HEAP_CACHE_AT> to C<1>),
which uses 8-12 bytes more per watcher and a few hundred bytes more code,
but avoids random read accesses on heap changes. This improves performance
noticeably with many (hundreds) of watchers.

The default is C<1>, unless C<EV_FEATURES> overrides it, in which case it
will be C<0>.

=item EV_VERIFY

Controls how much internal verification (see C<ev_verify ()>) will
be done: If set to C<0>, no internal verification code will be compiled
in. If set to C<1>, then verification code will be compiled in, but not
called. If set to C<2>, then the internal verification code will be
called once per loop, which can slow down libev. If set to C<3>, then the
verification code will be called very frequently, which will slow down
libev considerably.

The default is C<1>, unless C<EV_FEATURES> overrides it, in which case it
will be C<0>.

=item EV_COMMON

By default, all watchers have a C<void *data> member. By redefining
this macro to something else you can include more and other types of
members. You have to define it each time you include one of the files,
though, and it must be identical each time.

For example, the perl EV module uses something like this:

   #define EV_COMMON                       \
     SV *self; /* contains this struct */  \
     SV *cb_sv, *fh /* note no trailing ";" */

=item EV_CB_DECLARE (type)

=item EV_CB_INVOKE (watcher, revents)

=item ev_set_cb (ev, cb)

Can be used to change the callback member declaration in each watcher,
and the way callbacks are invoked and set. Must expand to a struct member
definition and a statement, respectively. See the F<ev.h> header file for
their default definitions. One possible use for overriding these is to
avoid the C<struct ev_loop *> as first argument in all cases, or to use
method calls instead of plain function calls in C++.

=back

=head2 EXPORTED API SYMBOLS

If you need to re-export the API (e.g. via a DLL) and you need a list of
exported symbols, you can use the provided F<Symbol.*> files which list
all public symbols, one per line:

   Symbols.ev      for libev proper
   Symbols.event   for the libevent emulation

This can also be used to rename all public symbols to avoid clashes with
multiple versions of libev linked together (which is obviously bad in
itself, but sometimes it is inconvenient to avoid this).

A sed command like this will create wrapper C<#define>'s that you need to
include before including F<ev.h>:

   <Symbols.ev sed -e "s/.*/#define & myprefix_&/" >wrap.h

This would create a file F<wrap.h> which essentially looks like this:

   #define ev_backend     myprefix_ev_backend
   #define ev_check_start myprefix_ev_check_start
   #define ev_check_stop  myprefix_ev_check_stop
   ...

=head2 EXAMPLES

For a real-world example of a program the includes libev
verbatim, you can have a look at the EV perl module
(L<http://software.schmorp.de/pkg/EV.html>). It has the libev files in
the F<libev/> subdirectory and includes them in the F<EV/EVAPI.h> (public
interface) and F<EV.xs> (implementation) files. Only the F<EV.xs> file
will be compiled. It is pretty complex because it provides its own header
file.

The usage in rxvt-unicode is simpler. It has a F<ev_cpp.h> header file
that everybody includes and which overrides some configure choices:

   #define EV_FEATURES 8
   #define EV_USE_SELECT 1
   #define EV_PREPARE_ENABLE 1
   #define EV_IDLE_ENABLE 1
   #define EV_SIGNAL_ENABLE 1
   #define EV_CHILD_ENABLE 1
   #define EV_USE_STDEXCEPT 0
   #define EV_CONFIG_H <config.h>

   #include "ev++.h"

And a F<ev_cpp.C> implementation file that contains libev proper and is compiled:

   #include "ev_cpp.h"
   #include "ev.c"

=head1 INTERACTION WITH OTHER PROGRAMS, LIBRARIES OR THE ENVIRONMENT

=head2 THREADS AND COROUTINES

=head3 THREADS

All libev functions are reentrant and thread-safe unless explicitly
documented otherwise, but libev implements no locking itself. This means
that you can use as many loops as you want in parallel, as long as there
are no concurrent calls into any libev function with the same loop
parameter (C<ev_default_*> calls have an implicit default loop parameter,
of course): libev guarantees that different event loops share no data
structures that need any locking.

Or to put it differently: calls with different loop parameters can be done
concurrently from multiple threads, calls with the same loop parameter
must be done serially (but can be done from different threads, as long as
only one thread ever is inside a call at any point in time, e.g. by using
a mutex per loop).

Specifically to support threads (and signal handlers), libev implements
so-called C<ev_async> watchers, which allow some limited form of
concurrency on the same event loop, namely waking it up "from the
outside".

If you want to know which design (one loop, locking, or multiple loops
without or something else still) is best for your problem, then I cannot
help you, but here is some generic advice:

=over 4

=item * most applications have a main thread: use the default libev loop
in that thread, or create a separate thread running only the default loop.

This helps integrating other libraries or software modules that use libev
themselves and don't care/know about threading.

=item * one loop per thread is usually a good model.

Doing this is almost never wrong, sometimes a better-performance model
exists, but it is always a good start.

=item * other models exist, such as the leader/follower pattern, where one
loop is handed through multiple threads in a kind of round-robin fashion.

Choosing a model is hard - look around, learn, know that usually you can do
better than you currently do :-)

=item * often you need to talk to some other thread which blocks in the
event loop.

C<ev_async> watchers can be used to wake them up from other threads safely
(or from signal contexts...).

An example use would be to communicate signals or other events that only
work in the default loop by registering the signal watcher with the
default loop and triggering an C<ev_async> watcher from the default loop
watcher callback into the event loop interested in the signal.

=back

See also L</THREAD LOCKING EXAMPLE>.

=head3 COROUTINES

Libev is very accommodating to coroutines ("cooperative threads"):
libev fully supports nesting calls to its functions from different
coroutines (e.g. you can call C<ev_run> on the same loop from two
different coroutines, and switch freely between both coroutines running
the loop, as long as you don't confuse yourself). The only exception is
that you must not do this from C<ev_periodic> reschedule callbacks.

Care has been taken to ensure that libev does not keep local state inside
C<ev_run>, and other calls do not usually allow for coroutine switches as
they do not call any callbacks.

=head2 COMPILER WARNINGS

Depending on your compiler and compiler settings, you might get no or a
lot of warnings when compiling libev code. Some people are apparently
scared by this.

However, these are unavoidable for many reasons. For one, each compiler
has different warnings, and each user has different tastes regarding
warning options. "Warn-free" code therefore cannot be a goal except when
targeting a specific compiler and compiler-version.

Another reason is that some compiler warnings require elaborate
workarounds, or other changes to the code that make it less clear and less
maintainable.

And of course, some compiler warnings are just plain stupid, or simply
wrong (because they don't actually warn about the condition their message
seems to warn about). For example, certain older gcc versions had some
warnings that resulted in an extreme number of false positives. These have
been fixed, but some people still insist on making code warn-free with
such buggy versions.

While libev is written to generate as few warnings as possible,
"warn-free" code is not a goal, and it is recommended not to build libev
with any compiler warnings enabled unless you are prepared to cope with
them (e.g. by ignoring them). Remember that warnings are just that:
warnings, not errors, or proof of bugs.


=head2 VALGRIND

Valgrind has a special section here because it is a popular tool that is
highly useful. Unfortunately, valgrind reports are very hard to interpret.

If you think you found a bug (memory leak, uninitialised data access etc.)
in libev, then check twice: If valgrind reports something like:

   ==2274==    definitely lost: 0 bytes in 0 blocks.
   ==2274==      possibly lost: 0 bytes in 0 blocks.
   ==2274==    still reachable: 256 bytes in 1 blocks.

Then there is no memory leak, just as memory accounted to global variables
is not a memleak - the memory is still being referenced, and didn't leak.

Similarly, under some circumstances, valgrind might report kernel bugs
as if it were a bug in libev (e.g. in realloc or in the poll backend,
although an acceptable workaround has been found here), or it might be
confused.

Keep in mind that valgrind is a very good tool, but only a tool. Don't
make it into some kind of religion.

If you are unsure about something, feel free to contact the mailing list
with the full valgrind report and an explanation on why you think this
is a bug in libev (best check the archives, too :). However, don't be
annoyed when you get a brisk "this is no bug" answer and take the chance
of learning how to interpret valgrind properly.

If you need, for some reason, empty reports from valgrind for your project
I suggest using suppression lists.


=head1 PORTABILITY NOTES

=head2 GNU/LINUX 32 BIT LIMITATIONS

GNU/Linux is the only common platform that supports 64 bit file/large file
interfaces but I<disables> them by default.

That means that libev compiled in the default environment doesn't support
files larger than 2GiB or so, which mainly affects C<ev_stat> watchers.

Unfortunately, many programs try to work around this GNU/Linux issue
by enabling the large file API, which makes them incompatible with the
standard libev compiled for their system.

Likewise, libev cannot enable the large file API itself as this would
suddenly make it incompatible to the default compile time environment,
i.e. all programs not using special compile switches.

=head2 OS/X AND DARWIN BUGS

The whole thing is a bug if you ask me - basically any system interface
you touch is broken, whether it is locales, poll, kqueue or even the
OpenGL drivers.

=head3 C<kqueue> is buggy

The kqueue syscall is broken in all known versions - most versions support
only sockets, many support pipes.

Libev tries to work around this by not using C<kqueue> by default on this
rotten platform, but of course you can still ask for it when creating a
loop - embedding a socket-only kqueue loop into a select-based one is
probably going to work well.

=head3 C<poll> is buggy

Instead of fixing C<kqueue>, Apple replaced their (working) C<poll>
implementation by something calling C<kqueue> internally around the 10.5.6
release, so now C<kqueue> I<and> C<poll> are broken.

Libev tries to work around this by not using C<poll> by default on
this rotten platform, but of course you can still ask for it when creating
a loop.

=head3 C<select> is buggy

All that's left is C<select>, and of course Apple found a way to fuck this
one up as well: On OS/X, C<select> actively limits the number of file
descriptors you can pass in to 1024 - your program suddenly crashes when
you use more.

There is an undocumented "workaround" for this - defining
C<_DARWIN_UNLIMITED_SELECT>, which libev tries to use, so select I<should>
work on OS/X.

=head2 SOLARIS PROBLEMS AND WORKAROUNDS

=head3 C<errno> reentrancy

The default compile environment on Solaris is unfortunately so
thread-unsafe that you can't even use components/libraries compiled
without C<-D_REENTRANT> in a threaded program, which, of course, isn't
defined by default. A valid, if stupid, implementation choice.

If you want to use libev in threaded environments you have to make sure
it's compiled with C<_REENTRANT> defined.

=head3 Event port backend

The scalable event interface for Solaris is called "event
ports". Unfortunately, this mechanism is very buggy in all major
releases. If you run into high CPU usage, your program freezes or you get
a large number of spurious wakeups, make sure you have all the relevant
and latest kernel patches applied. No, I don't know which ones, but there
are multiple ones to apply, and afterwards, event ports actually work
great.

If you can't get it to work, you can try running the program by setting
the environment variable C<LIBEV_FLAGS=3> to only allow C<poll> and
C<select> backends.

=head2 AIX POLL BUG

AIX unfortunately has a broken C<poll.h> header. Libev works around
this by trying to avoid the poll backend altogether (i.e. it's not even
compiled in), which normally isn't a big problem as C<select> works fine
with large bitsets on AIX, and AIX is dead anyway.

=head2 WIN32 PLATFORM LIMITATIONS AND WORKAROUNDS

=head3 General issues

Win32 doesn't support any of the standards (e.g. POSIX) that libev
requires, and its I/O model is fundamentally incompatible with the POSIX
model. Libev still offers limited functionality on this platform in
the form of the C<EVBACKEND_SELECT> backend, and only supports socket
descriptors. This only applies when using Win32 natively, not when using
e.g. cygwin. Actually, it only applies to the microsofts own compilers,
as every compiler comes with a slightly differently broken/incompatible
environment.

Lifting these limitations would basically require the full
re-implementation of the I/O system. If you are into this kind of thing,
then note that glib does exactly that for you in a very portable way (note
also that glib is the slowest event library known to man).

There is no supported compilation method available on windows except
embedding it into other applications.

Sensible signal handling is officially unsupported by Microsoft - libev
tries its best, but under most conditions, signals will simply not work.

Not a libev limitation but worth mentioning: windows apparently doesn't
accept large writes: instead of resulting in a partial write, windows will
either accept everything or return C<ENOBUFS> if the buffer is too large,
so make sure you only write small amounts into your sockets (less than a
megabyte seems safe, but this apparently depends on the amount of memory
available).

Due to the many, low, and arbitrary limits on the win32 platform and
the abysmal performance of winsockets, using a large number of sockets
is not recommended (and not reasonable). If your program needs to use
more than a hundred or so sockets, then likely it needs to use a totally
different implementation for windows, as libev offers the POSIX readiness
notification model, which cannot be implemented efficiently on windows
(due to Microsoft monopoly games).

A typical way to use libev under windows is to embed it (see the embedding
section for details) and use the following F<evwrap.h> header file instead
of F<ev.h>:

   #define EV_STANDALONE              /* keeps ev from requiring config.h */
   #define EV_SELECT_IS_WINSOCKET 1   /* configure libev for windows select */

   #include "ev.h"

And compile the following F<evwrap.c> file into your project (make sure
you do I<not> compile the F<ev.c> or any other embedded source files!):

   #include "evwrap.h"
   #include "ev.c"

=head3 The winsocket C<select> function

The winsocket C<select> function doesn't follow POSIX in that it
requires socket I<handles> and not socket I<file descriptors> (it is
also extremely buggy). This makes select very inefficient, and also
requires a mapping from file descriptors to socket handles (the Microsoft
C runtime provides the function C<_open_osfhandle> for this). See the
discussion of the C<EV_SELECT_USE_FD_SET>, C<EV_SELECT_IS_WINSOCKET> and
C<EV_FD_TO_WIN32_HANDLE> preprocessor symbols for more info.

The configuration for a "naked" win32 using the Microsoft runtime
libraries and raw winsocket select is:

   #define EV_USE_SELECT 1
   #define EV_SELECT_IS_WINSOCKET 1   /* forces EV_SELECT_USE_FD_SET, too */

Note that winsockets handling of fd sets is O(n), so you can easily get a
complexity in the O(n²) range when using win32.

=head3 Limited number of file descriptors

Windows has numerous arbitrary (and low) limits on things.

Early versions of winsocket's select only supported waiting for a maximum
of C<64> handles (probably owning to the fact that all windows kernels
can only wait for C<64> things at the same time internally; Microsoft
recommends spawning a chain of threads and wait for 63 handles and the
previous thread in each. Sounds great!).

Newer versions support more handles, but you need to define C<FD_SETSIZE>
to some high number (e.g. C<2048>) before compiling the winsocket select
call (which might be in libev or elsewhere, for example, perl and many
other interpreters do their own select emulation on windows).

Another limit is the number of file descriptors in the Microsoft runtime
libraries, which by default is C<64> (there must be a hidden I<64>
fetish or something like this inside Microsoft). You can increase this
by calling C<_setmaxstdio>, which can increase this limit to C<2048>
(another arbitrary limit), but is broken in many versions of the Microsoft
runtime libraries. This might get you to about C<512> or C<2048> sockets
(depending on windows version and/or the phase of the moon). To get more,
you need to wrap all I/O functions and provide your own fd management, but
the cost of calling select (O(n²)) will likely make this unworkable.

=head2 PORTABILITY REQUIREMENTS

In addition to a working ISO-C implementation and of course the
backend-specific APIs, libev relies on a few additional extensions:

=over 4

=item C<void (*)(ev_watcher_type *, int revents)> must have compatible
calling conventions regardless of C<ev_watcher_type *>.

Libev assumes not only that all watcher pointers have the same internal
structure (guaranteed by POSIX but not by ISO C for example), but it also
assumes that the same (machine) code can be used to call any watcher
callback: The watcher callbacks have different type signatures, but libev
calls them using an C<ev_watcher *> internally.

=item null pointers and integer zero are represented by 0 bytes

Libev uses C<memset> to initialise structs and arrays to C<0> bytes, and
relies on this setting pointers and integers to null.

=item pointer accesses must be thread-atomic

Accessing a pointer value must be atomic, it must both be readable and
writable in one piece - this is the case on all current architectures.

=item C<sig_atomic_t volatile> must be thread-atomic as well

The type C<sig_atomic_t volatile> (or whatever is defined as
C<EV_ATOMIC_T>) must be atomic with respect to accesses from different
threads. This is not part of the specification for C<sig_atomic_t>, but is
believed to be sufficiently portable.

=item C<sigprocmask> must work in a threaded environment

Libev uses C<sigprocmask> to temporarily block signals. This is not
allowed in a threaded program (C<pthread_sigmask> has to be used). Typical
pthread implementations will either allow C<sigprocmask> in the "main
thread" or will block signals process-wide, both behaviours would
be compatible with libev. Interaction between C<sigprocmask> and
C<pthread_sigmask> could complicate things, however.

The most portable way to handle signals is to block signals in all threads
except the initial one, and run the signal handling loop in the initial
thread as well.

=item C<long> must be large enough for common memory allocation sizes

To improve portability and simplify its API, libev uses C<long> internally
instead of C<size_t> when allocating its data structures. On non-POSIX
systems (Microsoft...) this might be unexpectedly low, but is still at
least 31 bits everywhere, which is enough for hundreds of millions of
watchers.

=item C<double> must hold a time value in seconds with enough accuracy

The type C<double> is used to represent timestamps. It is required to
have at least 51 bits of mantissa (and 9 bits of exponent), which is
good enough for at least into the year 4000 with millisecond accuracy
(the design goal for libev). This requirement is overfulfilled by
implementations using IEEE 754, which is basically all existing ones.

With IEEE 754 doubles, you get microsecond accuracy until at least the
year 2255 (and millisecond accuracy till the year 287396 - by then, libev
is either obsolete or somebody patched it to use C<long double> or
something like that, just kidding).

=back

If you know of other additional requirements drop me a note.


=head1 ALGORITHMIC COMPLEXITIES

In this section the complexities of (many of) the algorithms used inside
libev will be documented. For complexity discussions about backends see
the documentation for C<ev_default_init>.

All of the following are about amortised time: If an array needs to be
extended, libev needs to realloc and move the whole array, but this
happens asymptotically rarer with higher number of elements, so O(1) might
mean that libev does a lengthy realloc operation in rare cases, but on
average it is much faster and asymptotically approaches constant time.

=over 4

=item Starting and stopping timer/periodic watchers: O(log skipped_other_timers)

This means that, when you have a watcher that triggers in one hour and
there are 100 watchers that would trigger before that, then inserting will
have to skip roughly seven (C<ld 100>) of these watchers.

=item Changing timer/periodic watchers (by autorepeat or calling again): O(log skipped_other_timers)

That means that changing a timer costs less than removing/adding them,
as only the relative motion in the event queue has to be paid for.

=item Starting io/check/prepare/idle/signal/child/fork/async watchers: O(1)

These just add the watcher into an array or at the head of a list.

=item Stopping check/prepare/idle/fork/async watchers: O(1)

=item Stopping an io/signal/child watcher: O(number_of_watchers_for_this_(fd/signal/pid % EV_PID_HASHSIZE))

These watchers are stored in lists, so they need to be walked to find the
correct watcher to remove. The lists are usually short (you don't usually
have many watchers waiting for the same fd or signal: one is typical, two
is rare).

=item Finding the next timer in each loop iteration: O(1)

By virtue of using a binary or 4-heap, the next timer is always found at a
fixed position in the storage array.

=item Each change on a file descriptor per loop iteration: O(number_of_watchers_for_this_fd)

A change means an I/O watcher gets started or stopped, which requires
libev to recalculate its status (and possibly tell the kernel, depending
on backend and whether C<ev_io_set> was used).

=item Activating one watcher (putting it into the pending state): O(1)

=item Priority handling: O(number_of_priorities)

Priorities are implemented by allocating some space for each
priority. When doing priority-based operations, libev usually has to
linearly search all the priorities, but starting/stopping and activating
watchers becomes O(1) with respect to priority handling.

=item Sending an ev_async: O(1)

=item Processing ev_async_send: O(number_of_async_watchers)

=item Processing signals: O(max_signal_number)

Sending involves a system call I<iff> there were no other C<ev_async_send>
calls in the current loop iteration and the loop is currently
blocked. Checking for async and signal events involves iterating over all
running async watchers or all signal numbers.

=back


=head1 PORTING FROM LIBEV 3.X TO 4.X

The major version 4 introduced some incompatible changes to the API.

At the moment, the C<ev.h> header file provides compatibility definitions
for all changes, so most programs should still compile. The compatibility
layer might be removed in later versions of libev, so better update to the
new API early than late.

=over 4

=item C<EV_COMPAT3> backwards compatibility mechanism

The backward compatibility mechanism can be controlled by
C<EV_COMPAT3>. See L</"PREPROCESSOR SYMBOLS/MACROS"> in the L</EMBEDDING>
section.

=item C<ev_default_destroy> and C<ev_default_fork> have been removed

These calls can be replaced easily by their C<ev_loop_xxx> counterparts:

   ev_loop_destroy (EV_DEFAULT_UC);
   ev_loop_fork (EV_DEFAULT);

=item function/symbol renames

A number of functions and symbols have been renamed:

  ev_loop         => ev_run
  EVLOOP_NONBLOCK => EVRUN_NOWAIT
  EVLOOP_ONESHOT  => EVRUN_ONCE

  ev_unloop       => ev_break
  EVUNLOOP_CANCEL => EVBREAK_CANCEL
  EVUNLOOP_ONE    => EVBREAK_ONE
  EVUNLOOP_ALL    => EVBREAK_ALL

  EV_TIMEOUT      => EV_TIMER

  ev_loop_count   => ev_iteration
  ev_loop_depth   => ev_depth
  ev_loop_verify  => ev_verify

Most functions working on C<struct ev_loop> objects don't have an
C<ev_loop_> prefix, so it was removed; C<ev_loop>, C<ev_unloop> and
associated constants have been renamed to not collide with the C<struct
ev_loop> anymore and C<EV_TIMER> now follows the same naming scheme
as all other watcher types. Note that C<ev_loop_fork> is still called
C<ev_loop_fork> because it would otherwise clash with the C<ev_fork>
typedef.

=item C<EV_MINIMAL> mechanism replaced by C<EV_FEATURES>

The preprocessor symbol C<EV_MINIMAL> has been replaced by a different
mechanism, C<EV_FEATURES>. Programs using C<EV_MINIMAL> usually compile
and work, but the library code will of course be larger.

=back


=head1 GLOSSARY

=over 4

=item active

A watcher is active as long as it has been started and not yet stopped.
See L</WATCHER STATES> for details.

=item application

In this document, an application is whatever is using libev.

=item backend

The part of the code dealing with the operating system interfaces.

=item callback

The address of a function that is called when some event has been
detected. Callbacks are being passed the event loop, the watcher that
received the event, and the actual event bitset.

=item callback/watcher invocation

The act of calling the callback associated with a watcher.

=item event

A change of state of some external event, such as data now being available
for reading on a file descriptor, time having passed or simply not having
any other events happening anymore.

In libev, events are represented as single bits (such as C<EV_READ> or
C<EV_TIMER>).

=item event library

A software package implementing an event model and loop.

=item event loop

An entity that handles and processes external events and converts them
into callback invocations.

=item event model

The model used to describe how an event loop handles and processes
watchers and events.

=item pending

A watcher is pending as soon as the corresponding event has been
detected. See L</WATCHER STATES> for details.

=item real time

The physical time that is observed. It is apparently strictly monotonic :)

=item wall-clock time

The time and date as shown on clocks. Unlike real time, it can actually
be wrong and jump forwards and backwards, e.g. when you adjust your
clock.

=item watcher

A data structure that describes interest in certain events. Watchers need
to be started (attached to an event loop) before they can receive events.

=back

=head1 AUTHOR

Marc Lehmann <libev@schmorp.de>, with repeated corrections by Mikael
Magnusson and Emanuele Giaquinta, and minor corrections by many others.


================================================
FILE: libev/ev_epoll.c
================================================
/*
 * libev epoll fd activity backend
 *
 * Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

/*
 * general notes about epoll:
 *
 * a) epoll silently removes fds from the fd set. as nothing tells us
 *    that an fd has been removed otherwise, we have to continually
 *    "rearm" fds that we suspect *might* have changed (same
 *    problem with kqueue, but much less costly there).
 * b) the fact that ADD != MOD creates a lot of extra syscalls due to a)
 *    and seems not to have any advantage.
 * c) the inability to handle fork or file descriptors (think dup)
 *    limits the applicability over poll, so this is not a generic
 *    poll replacement.
 * d) epoll doesn't work the same as select with many file descriptors
 *    (such as files). while not critical, no other advanced interface
 *    seems to share this (rather non-unixy) limitation.
 * e) epoll claims to be embeddable, but in practise you never get
 *    a ready event for the epoll fd (broken: <=2.6.26, working: >=2.6.32).
 * f) epoll_ctl returning EPERM means the fd is always ready.
 *
 * lots of "weird code" and complication handling in this file is due
 * to these design problems with epoll, as we try very hard to avoid
 * epoll_ctl syscalls for common usage patterns and handle the breakage
 * ensuing from receiving events for closed and otherwise long gone
 * file descriptors.
 */

#include <sys/epoll.h>

#define EV_EMASK_EPERM 0x80

static void
epoll_modify (EV_P_ int fd, int oev, int nev)
{
  struct epoll_event ev;
  unsigned char oldmask;

  /*
   * we handle EPOLL_CTL_DEL by ignoring it here
   * on the assumption that the fd is gone anyways
   * if that is wrong, we have to handle the spurious
   * event in epoll_poll.
   * if the fd is added again, we try to ADD it, and, if that
   * fails, we assume it still has the same eventmask.
   */
  if (!nev)
    return;

  oldmask = anfds [fd].emask;
  anfds [fd].emask = nev;

  /* store the generation counter in the upper 32 bits, the fd in the lower 32 bits */
  ev.data.u64 = (uint64_t)(uint32_t)fd
              | ((uint64_t)(uint32_t)++anfds [fd].egen << 32);
  ev.events   = (nev & EV_READ  ? EPOLLIN  : 0)
              | (nev & EV_WRITE ? EPOLLOUT : 0);

  if (expect_true (!epoll_ctl (backend_fd, oev && oldmask != nev ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, fd, &ev)))
    return;

  if (expect_true (errno == ENOENT))
    {
      /* if ENOENT then the fd went away, so try to do the right thing */
      if (!nev)
        goto dec_egen;

      if (!epoll_ctl (backend_fd, EPOLL_CTL_ADD, fd, &ev))
        return;
    }
  else if (expect_true (errno == EEXIST))
    {
      /* EEXIST means we ignored a previous DEL, but the fd is still active */
      /* if the kernel mask is the same as the new mask, we assume it hasn't changed */
      if (oldmask == nev)
        goto dec_egen;

      if (!epoll_ctl (backend_fd, EPOLL_CTL_MOD, fd, &ev))
        return;
    }
  else if (expect_true (errno == EPERM))
    {
      /* EPERM means the fd is always ready, but epoll is too snobbish */
      /* to handle it, unlike select or poll. */
      anfds [fd].emask = EV_EMASK_EPERM;

      /* add fd to epoll_eperms, if not already inside */
      if (!(oldmask & EV_EMASK_EPERM))
        {
          array_needsize (int, epoll_eperms, epoll_epermmax, epoll_epermcnt + 1, EMPTY2);
          epoll_eperms [epoll_epermcnt++] = fd;
        }

      return;
    }

  fd_kill (EV_A_ fd);

dec_egen:
  /* we didn't successfully call epoll_ctl, so decrement the generation counter again */
  --anfds [fd].egen;
}

static void
epoll_poll (EV_P_ ev_tstamp timeout)
{
  int i;
  int eventcnt;

  if (expect_false (epoll_epermcnt))
    timeout = 0.;

  /* epoll wait times cannot be larger than (LONG_MAX - 999UL) / HZ msecs, which is below */
  /* the default libev max wait time, however. */
  EV_RELEASE_CB;
  eventcnt = epoll_wait (backend_fd, epoll_events, epoll_eventmax, timeout * 1e3);
  EV_ACQUIRE_CB;

  if (expect_false (eventcnt < 0))
    {
      if (errno != EINTR)
        ev_syserr ("(libev) epoll_wait");

      return;
    }

  for (i = 0; i < eventcnt; ++i)
    {
      struct epoll_event *ev = epoll_events + i;

      int fd = (uint32_t)ev->data.u64; /* mask out the lower 32 bits */
      int want = anfds [fd].events;
      int got  = (ev->events & (EPOLLOUT | EPOLLERR | EPOLLHUP) ? EV_WRITE : 0)
               | (ev->events & (EPOLLIN  | EPOLLERR | EPOLLHUP) ? EV_READ  : 0);

      /*
       * check for spurious notification.
       * this only finds spurious notifications on egen updates
       * other spurious notifications will be found by epoll_ctl, below
       * we assume that fd is always in range, as we never shrink the anfds array
       */
      if (expect_false ((uint32_t)anfds [fd].egen != (uint32_t)(ev->data.u64 >> 32)))
        {
          /* recreate kernel state */
          postfork |= 2;
          continue;
        }

      if (expect_false (got & ~want))
        {
          anfds [fd].emask = want;

          /*
           * we received an event but are not interested in it, try mod or del
           * this often happens because we optimistically do not unregister fds
           * when we are no longer interested in them, but also when we get spurious
           * notifications for fds from another process. this is partially handled
           * above with the gencounter check (== our fd is not the event fd), and
           * partially here, when epoll_ctl returns an error (== a child has the fd
           * but we closed it).
           */
          ev->events = (want & EV_READ  ? EPOLLIN  : 0)
                     | (want & EV_WRITE ? EPOLLOUT : 0);

          /* pre-2.6.9 kernels require a non-null pointer with EPOLL_CTL_DEL, */
          /* which is fortunately easy to do for us. */
          if (epoll_ctl (backend_fd, want ? EPOLL_CTL_MOD : EPOLL_CTL_DEL, fd, ev))
            {
              postfork |= 2; /* an error occurred, recreate kernel state */
              continue;
            }
        }

      fd_event (EV_A_ fd, got);
    }

  /* if the receive array was full, increase its size */
  if (expect_false (eventcnt == epoll_eventmax))
    {
      ev_free (epoll_events);
      epoll_eventmax = array_nextsize (sizeof (struct epoll_event), epoll_eventmax, epoll_eventmax + 1);
      epoll_events = (struct epoll_event *)ev_malloc (sizeof (struct epoll_event) * epoll_eventmax);
    }

  /* now synthesize events for all fds where epoll fails, while select works... */
  for (i = epoll_epermcnt; i--; )
    {
      int fd = epoll_eperms [i];
      unsigned char events = anfds [fd].events & (EV_READ | EV_WRITE);

      if (anfds [fd].emask & EV_EMASK_EPERM && events)
        fd_event (EV_A_ fd, events);
      else
        {
          epoll_eperms [i] = epoll_eperms [--epoll_epermcnt];
          anfds [fd].emask = 0;
        }
    }
}

inline_size
int
epoll_init (EV_P_ int flags)
{
#if defined EPOLL_CLOEXEC && !defined __ANDROID__
  backend_fd = epoll_create1 (EPOLL_CLOEXEC);

  if (backend_fd < 0 && (errno == EINVAL || errno == ENOSYS))
#endif
    backend_fd = epoll_create (256);

  if (backend_fd < 0)
    return 0;

  fcntl (backend_fd, F_SETFD, FD_CLOEXEC);

  backend_mintime = 1e-3; /* epoll does sometimes return early, this is just to avoid the worst */
  backend_modify  = epoll_modify;
  backend_poll    = epoll_poll;

  epoll_eventmax = 64; /* initial number of events receivable per poll */
  epoll_events = (struct epoll_event *)ev_malloc (sizeof (struct epoll_event) * epoll_eventmax);

  return EVBACKEND_EPOLL;
}

inline_size
void
epoll_destroy (EV_P)
{
  ev_free (epoll_events);
  array_free (epoll_eperm, EMPTY);
}

inline_size
void
epoll_fork (EV_P)
{
  close (backend_fd);

  while ((backend_fd = epoll_create (256)) < 0)
    ev_syserr ("(libev) epoll_create");

  fcntl (backend_fd, F_SETFD, FD_CLOEXEC);

  fd_rearm_all (EV_A);
}


================================================
FILE: libev/ev_kqueue.c
================================================
/*
 * libev kqueue backend
 *
 * Copyright (c) 2007,2008,2009,2010,2011,2012,2013 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

#include <sys/types.h>
#include <sys/time.h>
#include <sys/event.h>
#include <string.h>
#include <errno.h>

inline_speed
void
kqueue_change (EV_P_ int fd, int filter, int flags, int fflags)
{
  ++kqueue_changecnt;
  array_needsize (struct kevent, kqueue_changes, kqueue_changemax, kqueue_changecnt, EMPTY2);

  EV_SET (&kqueue_changes [kqueue_changecnt - 1], fd, filter, flags, fflags, 0, 0);
}

/* OS X at least needs this */
#ifndef EV_ENABLE
# define EV_ENABLE 0
#endif
#ifndef NOTE_EOF
# define NOTE_EOF 0
#endif

static void
kqueue_modify (EV_P_ int fd, int oev, int nev)
{
  if (oev != nev)
    {
      if (oev & EV_READ)
        kqueue_change (EV_A_ fd, EVFILT_READ , EV_DELETE, 0);

      if (oev & EV_WRITE)
        kqueue_change (EV_A_ fd, EVFILT_WRITE, EV_DELETE, 0);
    }

  /* to detect close/reopen reliably, we have to re-add */
  /* event requests even when oev == nev */

  if (nev & EV_READ)
    kqueue_change (EV_A_ fd, EVFILT_READ , EV_ADD | EV_ENABLE, NOTE_EOF);

  if (nev & EV_WRITE)
    kqueue_change (EV_A_ fd, EVFILT_WRITE, EV_ADD | EV_ENABLE, NOTE_EOF);
}

static void
kqueue_poll (EV_P_ ev_tstamp timeout)
{
  int res, i;
  struct timespec ts;

  /* need to resize so there is enough space for errors */
  if (kqueue_changecnt > kqueue_eventmax)
    {
      ev_free (kqueue_events);
      kqueue_eventmax = array_nextsize (sizeof (struct kevent), kqueue_eventmax, kqueue_changecnt);
      kqueue_events = (struct kevent *)ev_malloc (sizeof (struct kevent) * kqueue_eventmax);
    }

  EV_RELEASE_CB;
  EV_TS_SET (ts, timeout);
  res = kevent (backend_fd, kqueue_changes, kqueue_changecnt, kqueue_events, kqueue_eventmax, &ts);
  EV_ACQUIRE_CB;
  kqueue_changecnt = 0;

  if (expect_false (res < 0))
    {
      if (errno != EINTR)
        ev_syserr ("(libev) kevent");

      return;
    }

  for (i = 0; i < res; ++i)
    {
      int fd = kqueue_events [i].ident;

      if (expect_false (kqueue_events [i].flags & EV_ERROR))
        {
          int err = kqueue_events [i].data;

          /* we are only interested in errors for fds that we are interested in :) */
          if (anfds [fd].events)
            {
              if (err == ENOENT) /* resubmit changes on ENOENT */
                kqueue_modify (EV_A_ fd, 0, anfds [fd].events);
              else if (err == EBADF) /* on EBADF, we re-check the fd */
                {
                  if (fd_valid (fd))
                    kqueue_modify (EV_A_ fd, 0, anfds [fd].events);
                  else
                    fd_kill (EV_A_ fd);
                }
              else /* on all other errors, we error out on the fd */
                fd_kill (EV_A_ fd);
            }
        }
      else
        fd_event (
          EV_A_
          fd,
          kqueue_events [i].filter == EVFILT_READ ? EV_READ
          : kqueue_events [i].filter == EVFILT_WRITE ? EV_WRITE
          : 0
        );
    }

  if (expect_false (res == kqueue_eventmax))
    {
      ev_free (kqueue_events);
      kqueue_eventmax = array_nextsize (sizeof (struct kevent), kqueue_eventmax, kqueue_eventmax + 1);
      kqueue_events = (struct kevent *)ev_malloc (sizeof (struct kevent) * kqueue_eventmax);
    }
}

inline_size
int
kqueue_init (EV_P_ int flags)
{
  /* initialize the kernel queue */
  kqueue_fd_pid = getpid ();
  if ((backend_fd = kqueue ()) < 0)
    return 0;

  fcntl (backend_fd, F_SETFD, FD_CLOEXEC); /* not sure if necessary, hopefully doesn't hurt */

  backend_mintime = 1e-9; /* apparently, they did the right thing in freebsd */
  backend_modify  = kqueue_modify;
  backend_poll    = kqueue_poll;

  kqueue_eventmax = 64; /* initial number of events receivable per poll */
  kqueue_events = (struct kevent *)ev_malloc (sizeof (struct kevent) * kqueue_eventmax);

  kqueue_changes   = 0;
  kqueue_changemax = 0;
  kqueue_changecnt = 0;

  return EVBACKEND_KQUEUE;
}

inline_size
void
kqueue_destroy (EV_P)
{
  ev_free (kqueue_events);
  ev_free (kqueue_changes);
}

inline_size
void
kqueue_fork (EV_P)
{
  /* some BSD kernels don't just destroy the kqueue itself,
   * but also close the fd, which isn't documented, and
   * impossible to support properly.
   * we remember the pid of the kqueue call and only close
   * the fd if the pid is still the same.
   * this leaks fds on sane kernels, but BSD interfaces are
   * notoriously buggy and rarely get fixed.
   */
  pid_t newpid = getpid ();

  if (newpid == kqueue_fd_pid)
    close (backend_fd);

  kqueue_fd_pid = newpid;
  while ((backend_fd = kqueue ()) < 0)
    ev_syserr ("(libev) kqueue");

  fcntl (backend_fd, F_SETFD, FD_CLOEXEC);

  /* re-register interest in fds */
  fd_rearm_all (EV_A);
}

/* sys/event.h defines EV_ERROR */
#undef EV_ERROR


================================================
FILE: libev/ev_poll.c
================================================
/*
 * libev poll fd activity backend
 *
 * Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

#include <poll.h>

inline_size
void
pollidx_init (int *base, int count)
{
  /* consider using memset (.., -1, ...), which is practically guaranteed
   * to work on all systems implementing poll */
  while (count--)
    *base++ = -1;
}

static void
poll_modify (EV_P_ int fd, int oev, int nev)
{
  int idx;

  if (oev == nev)
    return;

  array_needsize (int, pollidxs, pollidxmax, fd + 1, pollidx_init);

  idx = pollidxs [fd];

  if (idx < 0) /* need to allocate a new pollfd */
    {
      pollidxs [fd] = idx = pollcnt++;
      array_needsize (struct pollfd, polls, pollmax, pollcnt, EMPTY2);
      polls [idx].fd = fd;
    }

  assert (polls [idx].fd == fd);

  if (nev)
    polls [idx].events =
        (nev & EV_READ ? POLLIN : 0)
        | (nev & EV_WRITE ? POLLOUT : 0);
  else /* remove pollfd */
    {
      pollidxs [fd] = -1;

      if (expect_true (idx < --pollcnt))
        {
          polls [idx] = polls [pollcnt];
          pollidxs [polls [idx].fd] = idx;
        }
    }
}

static void
poll_poll (EV_P_ ev_tstamp timeout)
{
  struct pollfd *p;
  int res;
  
  EV_RELEASE_CB;
  res = poll (polls, pollcnt, timeout * 1e3);
  EV_ACQUIRE_CB;

  if (expect_false (res < 0))
    {
      if (errno == EBADF)
        fd_ebadf (EV_A);
      else if (errno == ENOMEM && !syserr_cb)
        fd_enomem (EV_A);
      else if (errno != EINTR)
        ev_syserr ("(libev) poll");
    }
  else
    for (p = polls; res; ++p)
      {
        assert (("libev: poll() returned illegal result, broken BSD kernel?", p < polls + pollcnt));

        if (expect_false (p->revents)) /* this expect is debatable */
          {
            --res;

            if (expect_false (p->revents & POLLNVAL))
              fd_kill (EV_A_ p->fd);
            else
              fd_event (
                EV_A_
                p->fd,
                (p->revents & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0)
                | (p->revents & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0)
              );
          }
      }
}

inline_size
int
poll_init (EV_P_ int flags)
{
  backend_mintime = 1e-3;
  backend_modify  = poll_modify;
  backend_poll    = poll_poll;

  pollidxs = 0; pollidxmax = 0;
  polls    = 0; pollmax    = 0; pollcnt = 0;

  return EVBACKEND_POLL;
}

inline_size
void
poll_destroy (EV_P)
{
  ev_free (pollidxs);
  ev_free (polls);
}


================================================
FILE: libev/ev_port.c
================================================
/*
 * libev solaris event port backend
 *
 * Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

/* useful reading:
 *
 * http://bugs.opensolaris.org/view_bug.do?bug_id=6268715 (random results)
 * http://bugs.opensolaris.org/view_bug.do?bug_id=6455223 (just totally broken)
 * http://bugs.opensolaris.org/view_bug.do?bug_id=6873782 (manpage ETIME)
 * http://bugs.opensolaris.org/view_bug.do?bug_id=6874410 (implementation ETIME)
 * http://www.mail-archive.com/networking-discuss@opensolaris.org/msg11898.html ETIME vs. nget
 * http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/lib/libc/port/gen/event_port.c (libc)
 * http://cvs.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/uts/common/fs/portfs/port.c#1325 (kernel)
 */

#include <sys/types.h>
#include <sys/time.h>
#include <poll.h>
#include <port.h>
#include <string.h>
#include <errno.h>

inline_speed
void
port_associate_and_check (EV_P_ int fd, int ev)
{
  if (0 >
      port_associate (
         backend_fd, PORT_SOURCE_FD, fd,
         (ev & EV_READ ? POLLIN : 0)
         | (ev & EV_WRITE ? POLLOUT : 0),
         0
      )
  )
    {
      if (errno == EBADFD)
        fd_kill (EV_A_ fd);
      else
        ev_syserr ("(libev) port_associate");
    }
}

static void
port_modify (EV_P_ int fd, int oev, int nev)
{
  /* we need to reassociate no matter what, as closes are
   * once more silently being discarded.
   */
  if (!nev)
    {
      if (oev)
        port_dissociate (backend_fd, PORT_SOURCE_FD, fd);
    }
  else
    port_associate_and_check (EV_A_ fd, nev);
}

static void
port_poll (EV_P_ ev_tstamp timeout)
{
  int res, i;
  struct timespec ts;
  uint_t nget = 1;

  /* we initialise this to something we will skip in the loop, as */
  /* port_getn can return with nget unchanged, but no indication */
  /* whether it was the original value or has been updated :/ */
  port_events [0].portev_source = 0;

  EV_RELEASE_CB;
  EV_TS_SET (ts, timeout);
  res = port_getn (backend_fd, port_events, port_eventmax, &nget, &ts);
  EV_ACQUIRE_CB;

  /* port_getn may or may not set nget on error */
  /* so we rely on port_events [0].portev_source not being updated */
  if (res == -1 && errno != ETIME && errno != EINTR)
    ev_syserr ("(libev) port_getn (see http://bugs.opensolaris.org/view_bug.do?bug_id=6268715, try LIBEV_FLAGS=3 env variable)");

  for (i = 0; i < nget; ++i)
    {
      if (port_events [i].portev_source == PORT_SOURCE_FD)
        {
          int fd = port_events [i].portev_object;

          fd_event (
            EV_A_
            fd,
            (port_events [i].portev_events & (POLLOUT | POLLERR | POLLHUP) ? EV_WRITE : 0)
            | (port_events [i].portev_events & (POLLIN | POLLERR | POLLHUP) ? EV_READ : 0)
          );

          fd_change (EV_A_ fd, EV__IOFDSET);
        }
    }

  if (expect_false (nget == port_eventmax))
    {
      ev_free (port_events);
      port_eventmax = array_nextsize (sizeof (port_event_t), port_eventmax, port_eventmax + 1);
      port_events = (port_event_t *)ev_malloc (sizeof (port_event_t) * port_eventmax);
    }
}

inline_size
int
port_init (EV_P_ int flags)
{
  /* Initialize the kernel queue */
  if ((backend_fd = port_create ()) < 0)
    return 0;

  assert (("libev: PORT_SOURCE_FD must not be zero", PORT_SOURCE_FD));

  fcntl (backend_fd, F_SETFD, FD_CLOEXEC); /* not sure if necessary, hopefully doesn't hurt */

  /* if my reading of the opensolaris kernel sources are correct, then
   * opensolaris does something very stupid: it checks if the time has already
   * elapsed and doesn't round up if that is the case,m otherwise it DOES round
   * up. Since we can't know what the case is, we need to guess by using a
   * "large enough" timeout. Normally, 1e-9 would be correct.
   */
  backend_mintime = 1e-3; /* needed to compensate for port_getn returning early */
  backend_modify  = port_modify;
  backend_poll    = port_poll;

  port_eventmax = 64; /* initial number of events receivable per poll */
  port_events = (port_event_t *)ev_malloc (sizeof (port_event_t) * port_eventmax);

  return EVBACKEND_PORT;
}

inline_size
void
port_destroy (EV_P)
{
  ev_free (port_events);
}

inline_size
void
port_fork (EV_P)
{
  close (backend_fd);

  while ((backend_fd = port_create ()) < 0)
    ev_syserr ("(libev) port");

  fcntl (backend_fd, F_SETFD, FD_CLOEXEC);

  /* re-register interest in fds */
  fd_rearm_all (EV_A);
}


================================================
FILE: libev/ev_select.c
================================================
/*
 * libev select fd activity backend
 *
 * Copyright (c) 2007,2008,2009,2010,2011 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

#ifndef _WIN32
/* for unix systems */
# include <inttypes.h>
# ifndef __hpux
/* for REAL unix systems */
#  include <sys/select.h>
# endif
#endif

#ifndef EV_SELECT_USE_FD_SET
# ifdef NFDBITS
#  define EV_SELECT_USE_FD_SET 0
# else
#  define EV_SELECT_USE_FD_SET 1
# endif
#endif

#if EV_SELECT_IS_WINSOCKET
# undef EV_SELECT_USE_FD_SET
# define EV_SELECT_USE_FD_SET 1
# undef NFDBITS
# define NFDBITS 0
#endif

#if !EV_SELECT_USE_FD_SET
# define NFDBYTES (NFDBITS / 8)
#endif

#include <string.h>

static void
select_modify (EV_P_ int fd, int oev, int nev)
{
  if (oev == nev)
    return;

  {
#if EV_SELECT_USE_FD_SET

    #if EV_SELECT_IS_WINSOCKET
    SOCKET handle = anfds [fd].handle;
    #else
    int handle = fd;
    #endif

    assert (("libev: fd >= FD_SETSIZE passed to fd_set-based select backend", fd < FD_SETSIZE));

    /* FD_SET is broken on windows (it adds the fd to a set twice or more,
     * which eventually leads to overflows). Need to call it only on changes.
     */
    #if EV_SELECT_IS_WINSOCKET
    if ((oev ^ nev) & EV_READ)
    #endif
      if (nev & EV_READ)
        FD_SET (handle, (fd_set *)vec_ri);
      else
        FD_CLR (handle, (fd_set *)vec_ri);

    #if EV_SELECT_IS_WINSOCKET
    if ((oev ^ nev) & EV_WRITE)
    #endif
      if (nev & EV_WRITE)
        FD_SET (handle, (fd_set *)vec_wi);
      else
        FD_CLR (handle, (fd_set *)vec_wi);

#else

    int     word = fd / NFDBITS;
    fd_mask mask = 1UL << (fd % NFDBITS);

    if (expect_false (vec_max <= word))
      {
        int new_max = word + 1;

        vec_ri = ev_realloc (vec_ri, new_max * NFDBYTES);
        vec_ro = ev_realloc (vec_ro, new_max * NFDBYTES); /* could free/malloc */
        vec_wi = ev_realloc (vec_wi, new_max * NFDBYTES);
        vec_wo = ev_realloc (vec_wo, new_max * NFDBYTES); /* could free/malloc */
        #ifdef _WIN32
        vec_eo = ev_realloc (vec_eo, new_max * NFDBYTES); /* could free/malloc */
        #endif

        for (; vec_max < new_max; ++vec_max)
          ((fd_mask *)vec_ri) [vec_max] =
          ((fd_mask *)vec_wi) [vec_max] = 0;
      }

    ((fd_mask *)vec_ri) [word] |= mask;
    if (!(nev & EV_READ))
      ((fd_mask *)vec_ri) [word] &= ~mask;

    ((fd_mask *)vec_wi) [word] |= mask;
    if (!(nev & EV_WRITE))
      ((fd_mask *)vec_wi) [word] &= ~mask;
#endif
  }
}

static void
select_poll (EV_P_ ev_tstamp timeout)
{
  struct timeval tv;
  int res;
  int fd_setsize;

  EV_RELEASE_CB;
  EV_TV_SET (tv, timeout);

#if EV_SELECT_USE_FD_SET
  fd_setsize = sizeof (fd_set);
#else
  fd_setsize = vec_max * NFDBYTES;
#endif

  memcpy (vec_ro, vec_ri, fd_setsize);
  memcpy (vec_wo, vec_wi, fd_setsize);

#ifdef _WIN32
  /* pass in the write set as except set.
   * the idea behind this is to work around a windows bug that causes
   * errors to be reported as an exception and not by setting
   * the writable bit. this is so uncontrollably lame.
   */
  memcpy (vec_eo, vec_wi, fd_setsize);
  res = select (vec_max * NFDBITS, (fd_set *)vec_ro, (fd_set *)vec_wo, (fd_set *)vec_eo, &tv);
#elif EV_SELECT_USE_FD_SET
  fd_setsize = anfdmax < FD_SETSIZE ? anfdmax : FD_SETSIZE;
  res = select (fd_setsize, (fd_set *)vec_ro, (fd_set *)vec_wo, 0, &tv);
#else
  res = select (vec_max * NFDBITS, (fd_set *)vec_ro, (fd_set *)vec_wo, 0, &tv);
#endif
  EV_ACQUIRE_CB;

  if (expect_false (res < 0))
    {
      #if EV_SELECT_IS_WINSOCKET
      errno = WSAGetLastError ();
      #endif
      #ifdef WSABASEERR
      /* on windows, select returns incompatible error codes, fix this */
      if (errno >= WSABASEERR && errno < WSABASEERR + 1000)
        if (errno == WSAENOTSOCK)
          errno = EBADF;
        else
          errno -= WSABASEERR;
      #endif

      #ifdef _WIN32
      /* select on windows erroneously returns EINVAL when no fd sets have been
       * provided (this is documented). what microsoft doesn't tell you that this bug
       * exists even when the fd sets _are_ provided, so we have to check for this bug
       * here and emulate by sleeping manually.
       * we also get EINVAL when the timeout is invalid, but we ignore this case here
       * and assume that EINVAL always means: you have to wait manually.
       */
      if (errno == EINVAL)
        {
          if (timeout)
            {
              unsigned long ms = timeout * 1e3;
              Sleep (ms ? ms : 1);
            }

          return;
        }
      #endif

      if (errno == EBADF)
        fd_ebadf (EV_A);
      else if (errno == ENOMEM && !syserr_cb)
        fd_enomem (EV_A);
      else if (errno != EINTR)
        ev_syserr ("(libev) select");

      return;
    }

#if EV_SELECT_USE_FD_SET

  {
    int fd;

    for (fd = 0; fd < anfdmax; ++fd)
      if (anfds [fd].events)
        {
          int events = 0;
          #if EV_SELECT_IS_WINSOCKET
          SOCKET handle = anfds [fd].handle;
          #else
          int handle = fd;
          #endif

          if (FD_ISSET (handle, (fd_set *)vec_ro)) events |= EV_READ;
          if (FD_ISSET (handle, (fd_set *)vec_wo)) events |= EV_WRITE;
          #ifdef _WIN32
          if (FD_ISSET (handle, (fd_set *)vec_eo)) events |= EV_WRITE;
          #endif

          if (expect_true (events))
            fd_event (EV_A_ fd, events);
        }
  }

#else

  {
    int word, bit;
    for (word = vec_max; word--; )
      {
        fd_mask word_r = ((fd_mask *)vec_ro) [word];
        fd_mask word_w = ((fd_mask *)vec_wo) [word];
        #ifdef _WIN32
        word_w |= ((fd_mask *)vec_eo) [word];
        #endif

        if (word_r || word_w)
          for (bit = NFDBITS; bit--; )
            {
              fd_mask mask = 1UL << bit;
              int events = 0;

              events |= word_r & mask ? EV_READ  : 0;
              events |= word_w & mask ? EV_WRITE : 0;

              if (expect_true (events))
                fd_event (EV_A_ word * NFDBITS + bit, events);
            }
      }
  }

#endif
}

inline_size
int
select_init (EV_P_ int flags)
{
  backend_mintime = 1e-6;
  backend_modify  = select_modify;
  backend_poll    = select_poll;

#if EV_SELECT_USE_FD_SET
  vec_ri  = ev_malloc (sizeof (fd_set)); FD_ZERO ((fd_set *)vec_ri);
  vec_ro  = ev_malloc (sizeof (fd_set));
  vec_wi  = ev_malloc (sizeof (fd_set)); FD_ZERO ((fd_set *)vec_wi);
  vec_wo  = ev_malloc (sizeof (fd_set));
  #ifdef _WIN32
  vec_eo  = ev_malloc (sizeof (fd_set));
  #endif
#else
  vec_max = 0;
  vec_ri  = 0;
  vec_ro  = 0;
  vec_wi  = 0;
  vec_wo  = 0;
  #ifdef _WIN32
  vec_eo  = 0;
  #endif
#endif

  return EVBACKEND_SELECT;
}

inline_size
void
select_destroy (EV_P)
{
  ev_free (vec_ri);
  ev_free (vec_ro);
  ev_free (vec_wi);
  ev_free (vec_wo);
  #ifdef _WIN32
  ev_free (vec_eo);
  #endif
}


================================================
FILE: libev/ev_vars.h
================================================
/*
 * loop member variable declarations
 *
 * Copyright (c) 2007,2008,2009,2010,2011,2012,2013 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

#define VARx(type,name) VAR(name, type name)

VARx(ev_tstamp, now_floor) /* last time we refreshed rt_time */
VARx(ev_tstamp, mn_now)    /* monotonic clock "now" */
VARx(ev_tstamp, rtmn_diff) /* difference realtime - monotonic time */

/* for reverse feeding of events */
VARx(W *, rfeeds)
VARx(int, rfeedmax)
VARx(int, rfeedcnt)

VAR (pendings, ANPENDING *pendings [NUMPRI])
VAR (pendingmax, int pendingmax [NUMPRI])
VAR (pendingcnt, int pendingcnt [NUMPRI])
VARx(int, pendingpri) /* highest priority currently pending */
VARx(ev_prepare, pending_w) /* dummy pending watcher */

VARx(ev_tstamp, io_blocktime)
VARx(ev_tstamp, timeout_blocktime)

VARx(int, backend)
VARx(int, activecnt) /* total number of active events ("refcount") */
VARx(EV_ATOMIC_T, loop_done)  /* signal by ev_break */

VARx(int, backend_fd)
VARx(ev_tstamp, backend_mintime) /* assumed typical timer resolution */
VAR (backend_modify, void (*backend_modify)(EV_P_ int fd, int oev, int nev))
VAR (backend_poll  , void (*backend_poll)(EV_P_ ev_tstamp timeout))

VARx(ANFD *, anfds)
VARx(int, anfdmax)

VAR (evpipe, int evpipe [2])
VARx(ev_io, pipe_w)
VARx(EV_ATOMIC_T, pipe_write_wanted)
VARx(EV_ATOMIC_T, pipe_write_skipped)

#if !defined(_WIN32) || EV_GENWRAP
VARx(pid_t, curpid)
#endif

VARx(char, postfork)  /* true if we need to recreate kernel state after fork */

#if EV_USE_SELECT || EV_GENWRAP
VARx(void *, vec_ri)
VARx(void *, vec_ro)
VARx(void *, vec_wi)
VARx(void *, vec_wo)
#if defined(_WIN32) || EV_GENWRAP
VARx(void *, vec_eo)
#endif
VARx(int, vec_max)
#endif

#if EV_USE_POLL || EV_GENWRAP
VARx(struct pollfd *, polls)
VARx(int, pollmax)
VARx(int, pollcnt)
VARx(int *, pollidxs) /* maps fds into structure indices */
VARx(int, pollidxmax)
#endif

#if EV_USE_EPOLL || EV_GENWRAP
VARx(struct epoll_event *, epoll_events)
VARx(int, epoll_eventmax)
VARx(int *, epoll_eperms)
VARx(int, epoll_epermcnt)
VARx(int, epoll_epermmax)
#endif

#if EV_USE_KQUEUE || EV_GENWRAP
VARx(pid_t, kqueue_fd_pid)
VARx(struct kevent *, kqueue_changes)
VARx(int, kqueue_changemax)
VARx(int, kqueue_changecnt)
VARx(struct kevent *, kqueue_events)
VARx(int, kqueue_eventmax)
#endif

#if EV_USE_PORT || EV_GENWRAP
VARx(struct port_event *, port_events)
VARx(int, port_eventmax)
#endif

#if EV_USE_IOCP || EV_GENWRAP
VARx(HANDLE, iocp)
#endif

VARx(int *, fdchanges)
VARx(int, fdchangemax)
VARx(int, fdchangecnt)

VARx(ANHE *, timers)
VARx(int, timermax)
VARx(int, timercnt)

#if EV_PERIODIC_ENABLE || EV_GENWRAP
VARx(ANHE *, periodics)
VARx(int, periodicmax)
VARx(int, periodiccnt)
#endif

#if EV_IDLE_ENABLE || EV_GENWRAP
VAR (idles, ev_idle **idles [NUMPRI])
VAR (idlemax, int idlemax [NUMPRI])
VAR (idlecnt, int idlecnt [NUMPRI])
#endif
VARx(int, idleall) /* total number */

VARx(struct ev_prepare **, prepares)
VARx(int, preparemax)
VARx(int, preparecnt)

VARx(struct ev_check **, checks)
VARx(int, checkmax)
VARx(int, checkcnt)

#if EV_FORK_ENABLE || EV_GENWRAP
VARx(struct ev_fork **, forks)
VARx(int, forkmax)
VARx(int, forkcnt)
#endif

#if EV_CLEANUP_ENABLE || EV_GENWRAP
VARx(struct ev_cleanup **, cleanups)
VARx(int, cleanupmax)
VARx(int, cleanupcnt)
#endif

#if EV_ASYNC_ENABLE || EV_GENWRAP
VARx(EV_ATOMIC_T, async_pending)
VARx(struct ev_async **, asyncs)
VARx(int, asyncmax)
VARx(int, asynccnt)
#endif

#if EV_USE_INOTIFY || EV_GENWRAP
VARx(int, fs_fd)
VARx(ev_io, fs_w)
VARx(char, fs_2625) /* whether we are running in linux 2.6.25 or newer */
VAR (fs_hash, ANFS fs_hash [EV_INOTIFY_HASHSIZE])
#endif

VARx(EV_ATOMIC_T, sig_pending)
#if EV_USE_SIGNALFD || EV_GENWRAP
VARx(int, sigfd)
VARx(ev_io, sigfd_w)
VARx(sigset_t, sigfd_set)
#endif

VARx(unsigned int, origflags) /* original loop flags */

#if EV_FEATURE_API || EV_GENWRAP
VARx(unsigned int, loop_count) /* total number of loop iterations/blocks */
VARx(unsigned int, loop_depth) /* #ev_run enters - #ev_run leaves */

VARx(void *, userdata)
/* C++ doesn't support the ev_loop_callback typedef here. stinks. */
VAR (release_cb, void (*release_cb)(EV_P) EV_THROW)
VAR (acquire_cb, void (*acquire_cb)(EV_P) EV_THROW)
VAR (invoke_cb , ev_loop_callback invoke_cb)
#endif

#undef VARx


================================================
FILE: libev/ev_win32.c
================================================
/*
 * libev win32 compatibility cruft (_not_ a backend)
 *
 * Copyright (c) 2007,2008,2009 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

#ifdef _WIN32

/* note: the comment below could not be substantiated, but what would I care */
/* MSDN says this is required to handle SIGFPE */
/* my wild guess would be that using something floating-pointy is required */
/* for the crt to do something about it */
volatile double SIGFPE_REQ = 0.0f;

static SOCKET
ev_tcp_socket (void)
{
#if EV_USE_WSASOCKET
  return WSASocket (AF_INET, SOCK_STREAM, 0, 0, 0, 0);
#else
  return socket (AF_INET, SOCK_STREAM, 0);
#endif
}

/* oh, the humanity! */
static int
ev_pipe (int filedes [2])
{
  struct sockaddr_in addr = { 0 };
  int addr_size = sizeof (addr);
  struct sockaddr_in adr2;
  int adr2_size = sizeof (adr2);
  SOCKET listener;
  SOCKET sock [2] = { -1, -1 };

  if ((listener = ev_tcp_socket ()) == INVALID_SOCKET)
    return -1;

  addr.sin_family = AF_INET;
  addr.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  addr.sin_port = 0;

  if (bind (listener, (struct sockaddr *)&addr, addr_size))
    goto fail;

  if (getsockname (listener, (struct sockaddr *)&addr, &addr_size))
    goto fail;

  if (listen (listener, 1))
    goto fail;

  if ((sock [0] = ev_tcp_socket ()) == INVALID_SOCKET)
    goto fail;

  if (connect (sock [0], (struct sockaddr *)&addr, addr_size))
    goto fail;

  /* TODO: returns INVALID_SOCKET on winsock accept, not < 0. fix it */
  /* when convenient, probably by just removing error checking altogether? */
  if ((sock [1] = accept (listener, 0, 0)) < 0)
    goto fail;

  /* windows vista returns fantasy port numbers for sockets:
   * example for two interconnected tcp sockets:
   *
   * (Socket::unpack_sockaddr_in getsockname $sock0)[0] == 53364
   * (Socket::unpack_sockaddr_in getpeername $sock0)[0] == 53363
   * (Socket::unpack_sockaddr_in getsockname $sock1)[0] == 53363
   * (Socket::unpack_sockaddr_in getpeername $sock1)[0] == 53365
   *
   * wow! tridirectional sockets!
   *
   * this way of checking ports seems to work:
   */
  if (getpeername (sock [0], (struct sockaddr *)&addr, &addr_size))
    goto fail;

  if (getsockname (sock [1], (struct sockaddr *)&adr2, &adr2_size))
    goto fail;

  errno = WSAEINVAL;
  if (addr_size != adr2_size
      || addr.sin_addr.s_addr != adr2.sin_addr.s_addr /* just to be sure, I mean, it's windows */
      || addr.sin_port        != adr2.sin_port)
    goto fail;

  closesocket (listener);

#if EV_SELECT_IS_WINSOCKET
  filedes [0] = EV_WIN32_HANDLE_TO_FD (sock [0]);
  filedes [1] = EV_WIN32_HANDLE_TO_FD (sock [1]);
#else
  /* when select isn't winsocket, we also expect socket, connect, accept etc.
   * to work on fds */
  filedes [0] = sock [0];
  filedes [1] = sock [1];
#endif

  return 0;

fail:
  closesocket (listener);

  if (sock [0] != INVALID_SOCKET) closesocket (sock [0]);
  if (sock [1] != INVALID_SOCKET) closesocket (sock [1]);

  return -1;
}

#undef pipe
#define pipe(filedes) ev_pipe (filedes)

#define EV_HAVE_EV_TIME 1
ev_tstamp
ev_time (void)
{
  FILETIME ft;
  ULARGE_INTEGER ui;

  GetSystemTimeAsFileTime (&ft);
  ui.u.LowPart  = ft.dwLowDateTime;
  ui.u.HighPart = ft.dwHighDateTime;

  /* msvc cannot convert ulonglong to double... yes, it is that sucky */
  return (LONGLONG)(ui.QuadPart - 116444736000000000) * 1e-7;
}

#endif


================================================
FILE: libev/ev_wrap.h
================================================
/* DO NOT EDIT, automatically generated by update_ev_wrap */
#ifndef EV_WRAP_H
#define EV_WRAP_H
#define acquire_cb ((loop)->acquire_cb)
#define activecnt ((loop)->activecnt)
#define anfdmax ((loop)->anfdmax)
#define anfds ((loop)->anfds)
#define async_pending ((loop)->async_pending)
#define asynccnt ((loop)->asynccnt)
#define asyncmax ((loop)->asyncmax)
#define asyncs ((loop)->asyncs)
#define backend ((loop)->backend)
#define backend_fd ((loop)->backend_fd)
#define backend_mintime ((loop)->backend_mintime)
#define backend_modify ((loop)->backend_modify)
#define backend_poll ((loop)->backend_poll)
#define checkcnt ((loop)->checkcnt)
#define checkmax ((loop)->checkmax)
#define checks ((loop)->checks)
#define cleanupcnt ((loop)->cleanupcnt)
#define cleanupmax ((loop)->cleanupmax)
#define cleanups ((loop)->cleanups)
#define curpid ((loop)->curpid)
#define epoll_epermcnt ((loop)->epoll_epermcnt)
#define epoll_epermmax ((loop)->epoll_epermmax)
#define epoll_eperms ((loop)->epoll_eperms)
#define epoll_eventmax ((loop)->epoll_eventmax)
#define epoll_events ((loop)->epoll_events)
#define evpipe ((loop)->evpipe)
#define fdchangecnt ((loop)->fdchangecnt)
#define fdchangemax ((loop)->fdchangemax)
#define fdchanges ((loop)->fdchanges)
#define forkcnt ((loop)->forkcnt)
#define forkmax ((loop)->forkmax)
#define forks ((loop)->forks)
#define fs_2625 ((loop)->fs_2625)
#define fs_fd ((loop)->fs_fd)
#define fs_hash ((loop)->fs_hash)
#define fs_w ((loop)->fs_w)
#define idleall ((loop)->idleall)
#define idlecnt ((loop)->idlecnt)
#define idlemax ((loop)->idlemax)
#define idles ((loop)->idles)
#define invoke_cb ((loop)->invoke_cb)
#define io_blocktime ((loop)->io_blocktime)
#define iocp ((loop)->iocp)
#define kqueue_changecnt ((loop)->kqueue_changecnt)
#define kqueue_changemax ((loop)->kqueue_changemax)
#define kqueue_changes ((loop)->kqueue_changes)
#define kqueue_eventmax ((loop)->kqueue_eventmax)
#define kqueue_events ((loop)->kqueue_events)
#define kqueue_fd_pid ((loop)->kqueue_fd_pid)
#define loop_count ((loop)->loop_count)
#define loop_depth ((loop)->loop_depth)
#define loop_done ((loop)->loop_done)
#define mn_now ((loop)->mn_now)
#define now_floor ((loop)->now_floor)
#define origflags ((loop)->origflags)
#define pending_w ((loop)->pending_w)
#define pendingcnt ((loop)->pendingcnt)
#define pendingmax ((loop)->pendingmax)
#define pendingpri ((loop)->pendingpri)
#define pendings ((loop)->pendings)
#define periodiccnt ((loop)->periodiccnt)
#define periodicmax ((loop)->periodicmax)
#define periodics ((loop)->periodics)
#define pipe_w ((loop)->pipe_w)
#define pipe_write_skipped ((loop)->pipe_write_skipped)
#define pipe_write_wanted ((loop)->pipe_write_wanted)
#define pollcnt ((loop)->pollcnt)
#define pollidxmax ((loop)->pollidxmax)
#define pollidxs ((loop)->pollidxs)
#define pollmax ((loop)->pollmax)
#define polls ((loop)->polls)
#define port_eventmax ((loop)->port_eventmax)
#define port_events ((loop)->port_events)
#define postfork ((loop)->postfork)
#define preparecnt ((loop)->preparecnt)
#define preparemax ((loop)->preparemax)
#define prepares ((loop)->prepares)
#define release_cb ((loop)->release_cb)
#define rfeedcnt ((loop)->rfeedcnt)
#define rfeedmax ((loop)->rfeedmax)
#define rfeeds ((loop)->rfeeds)
#define rtmn_diff ((loop)->rtmn_diff)
#define sig_pending ((loop)->sig_pending)
#define sigfd ((loop)->sigfd)
#define sigfd_set ((loop)->sigfd_set)
#define sigfd_w ((loop)->sigfd_w)
#define timeout_blocktime ((loop)->timeout_blocktime)
#define timercnt ((loop)->timercnt)
#define timermax ((loop)->timermax)
#define timers ((loop)->timers)
#define userdata ((loop)->userdata)
#define vec_eo ((loop)->vec_eo)
#define vec_max ((loop)->vec_max)
#define vec_ri ((loop)->vec_ri)
#define vec_ro ((loop)->vec_ro)
#define vec_wi ((loop)->vec_wi)
#define vec_wo ((loop)->vec_wo)
#else
#undef EV_WRAP_H
#undef acquire_cb
#undef activecnt
#undef anfdmax
#undef anfds
#undef async_pending
#undef asynccnt
#undef asyncmax
#undef asyncs
#undef backend
#undef backend_fd
#undef backend_mintime
#undef backend_modify
#undef backend_poll
#undef checkcnt
#undef checkmax
#undef checks
#undef cleanupcnt
#undef cleanupmax
#undef cleanups
#undef curpid
#undef epoll_epermcnt
#undef epoll_epermmax
#undef epoll_eperms
#undef epoll_eventmax
#undef epoll_events
#undef evpipe
#undef fdchangecnt
#undef fdchangemax
#undef fdchanges
#undef forkcnt
#undef forkmax
#undef forks
#undef fs_2625
#undef fs_fd
#undef fs_hash
#undef fs_w
#undef idleall
#undef idlecnt
#undef idlemax
#undef idles
#undef invoke_cb
#undef io_blocktime
#undef iocp
#undef kqueue_changecnt
#undef kqueue_changemax
#undef kqueue_changes
#undef kqueue_eventmax
#undef kqueue_events
#undef kqueue_fd_pid
#undef loop_count
#undef loop_depth
#undef loop_done
#undef mn_now
#undef now_floor
#undef origflags
#undef pending_w
#undef pendingcnt
#undef pendingmax
#undef pendingpri
#undef pendings
#undef periodiccnt
#undef periodicmax
#undef periodics
#undef pipe_w
#undef pipe_write_skipped
#undef pipe_write_wanted
#undef pollcnt
#undef pollidxmax
#undef pollidxs
#undef pollmax
#undef polls
#undef port_eventmax
#undef port_events
#undef postfork
#undef preparecnt
#undef preparemax
#undef prepares
#undef release_cb
#undef rfeedcnt
#undef rfeedmax
#undef rfeeds
#undef rtmn_diff
#undef sig_pending
#undef sigfd
#undef sigfd_set
#undef sigfd_w
#undef timeout_blocktime
#undef timercnt
#undef timermax
#undef timers
#undef userdata
#undef vec_eo
#undef vec_max
#undef vec_ri
#undef vec_ro
#undef vec_wi
#undef vec_wo
#endif


================================================
FILE: libev/event.c
================================================
/*
 * libevent compatibility layer
 *
 * Copyright (c) 2007,2008,2009,2010,2012 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

#include <stddef.h>
#include <stdlib.h>
#include <assert.h>

#ifdef EV_EVENT_H
# include EV_EVENT_H
#else
# include "event.h"
#endif

#if EV_MULTIPLICITY
# define dLOOPev struct ev_loop *loop = (struct ev_loop *)ev->ev_base
# define dLOOPbase struct ev_loop *loop = (struct ev_loop *)base
#else
# define dLOOPev
# define dLOOPbase
#endif

/* never accessed, will always be cast from/to ev_loop */
struct event_base
{
  int dummy;
};

static struct event_base *ev_x_cur;

static ev_tstamp
ev_tv_get (struct timeval *tv)
{
  if (tv)
    {
      ev_tstamp after = tv->tv_sec + tv->tv_usec * 1e-6;
      return after ? after : 1e-6;
    }
  else
    return -1.;
}

#define EVENT_STRINGIFY(s) # s
#define EVENT_VERSION(a,b) EVENT_STRINGIFY (a) "." EVENT_STRINGIFY (b)

const char *
event_get_version (void)
{
  /* returns ABI, not API or library, version */
  return EVENT_VERSION (EV_VERSION_MAJOR, EV_VERSION_MINOR);
}

const char *
event_get_method (void)
{
  return "libev";
}

void *event_init (void)
{
#if EV_MULTIPLICITY
  if (ev_x_cur)
    ev_x_cur = (struct event_base *)ev_loop_new (EVFLAG_AUTO);
  else
    ev_x_cur = (struct event_base *)ev_default_loop (EVFLAG_AUTO);
#else
  assert (("libev: multiple event bases not supported when not compiled with EV_MULTIPLICITY", !ev_x_cur));

  ev_x_cur = (struct event_base *)(long)ev_default_loop (EVFLAG_AUTO);
#endif

  return ev_x_cur;
}

const char *
event_base_get_method (const struct event_base *base)
{
  return "libev";
}

struct event_base *
event_base_new (void)
{
#if EV_MULTIPLICITY
  return (struct event_base *)ev_loop_new (EVFLAG_AUTO);
#else
  assert (("libev: multiple event bases not supported when not compiled with EV_MULTIPLICITY"));
  return NULL;
#endif
}

void event_base_free (struct event_base *base)
{
  dLOOPbase;

#if EV_MULTIPLICITY
  if (!ev_is_default_loop (loop))
    ev_loop_destroy (loop);
#endif
}

int event_dispatch (void)
{
  return event_base_dispatch (ev_x_cur);
}

#ifdef EV_STANDALONE
void event_set_log_callback (event_log_cb cb)
{
  /* nop */
}
#endif

int event_loop (int flags)
{
  return event_base_loop (ev_x_cur, flags);
}

int event_loopexit (struct timeval *tv)
{
  return event_base_loopexit (ev_x_cur, tv);
}

event_callback_fn event_get_callback
(const struct event *ev)
{
  return ev->ev_callback;
}

static void
ev_x_cb (struct event *ev, int revents)
{
  revents &= EV_READ | EV_WRITE | EV_TIMER | EV_SIGNAL;

  ev->ev_res = revents;
  ev->ev_callback (ev->ev_fd, (short)revents, ev->ev_arg);
}

static void
ev_x_cb_sig (EV_P_ struct ev_signal *w, int revents)
{
  struct event *ev = (struct event *)(((char *)w) - offsetof (struct event, iosig.sig));

  if (revents & EV_ERROR)
    event_del (ev);

  ev_x_cb (ev, revents);
}

static void
ev_x_cb_io (EV_P_ struct ev_io *w, int revents)
{
  struct event *ev = (struct event *)(((char *)w) - offsetof (struct event, iosig.io));

  if ((revents & EV_ERROR) || !(ev->ev_events & EV_PERSIST))
    event_del (ev);

  ev_x_cb (ev, revents);
}

static void
ev_x_cb_to (EV_P_ struct ev_timer *w, int revents)
{
  struct event *ev = (struct event *)(((char *)w) - offsetof (struct event, to));

  event_del (ev);

  ev_x_cb (ev, revents);
}

void event_set (struct event *ev, int fd, short events, void (*cb)(int, short, void *), void *arg)
{
  if (events & EV_SIGNAL)
    ev_init (&ev->iosig.sig, ev_x_cb_sig);
  else
    ev_init (&ev->iosig.io, ev_x_cb_io);

  ev_init (&ev->to, ev_x_cb_to);

  ev->ev_base     = ev_x_cur; /* not threadsafe, but it's how libevent works */
  ev->ev_fd       = fd;
  ev->ev_events   = events;
  ev->ev_pri      = 0;
  ev->ev_callback = cb;
  ev->ev_arg      = arg;
  ev->ev_res      = 0;
  ev->ev_flags    = EVLIST_INIT;
}

int event_once (int fd, short events, void (*cb)(int, short, void *), void *arg, struct timeval *tv)
{
  return event_base_once (ev_x_cur, fd, events, cb, arg, tv);
}

int event_add (struct event *ev, struct timeval *tv)
{
  dLOOPev;

  if (ev->ev_events & EV_SIGNAL)
    {
      if (!ev_is_active (&ev->iosig.sig))
        {
          ev_signal_set (&ev->iosig.sig, ev->ev_fd);
          ev_signal_start (EV_A_ &ev->iosig.sig);

          ev->ev_flags |= EVLIST_SIGNAL;
        }
    }
  else if (ev->ev_events & (EV_READ | EV_WRITE))
    {
      if (!ev_is_active (&ev->iosig.io))
        {
          ev_io_set (&ev->iosig.io, ev->ev_fd, ev->ev_events & (EV_READ | EV_WRITE));
          ev_io_start (EV_A_ &ev->iosig.io);

          ev->ev_flags |= EVLIST_INSERTED;
        }
    }

  if (tv)
    {
      ev->to.repeat = ev_tv_get (tv);
      ev_timer_again (EV_A_ &ev->to);
      ev->ev_flags |= EVLIST_TIMEOUT;
    }
  else
    {
      ev_timer_stop (EV_A_ &ev->to);
      ev->ev_flags &= ~EVLIST_TIMEOUT;
    }

  ev->ev_flags |= EVLIST_ACTIVE;

  return 0;
}

int event_del (struct event *ev)
{
  dLOOPev;

  if (ev->ev_events & EV_SIGNAL)
    ev_signal_stop (EV_A_ &ev->iosig.sig);
  else if (ev->ev_events & (EV_READ | EV_WRITE))
    ev_io_stop (EV_A_ &ev->iosig.io);

  if (ev_is_active (&ev->to))
    ev_timer_stop (EV_A_ &ev->to);

  ev->ev_flags = EVLIST_INIT;

  return 0;
}

void event_active (struct event *ev, int res, short ncalls)
{
  dLOOPev;

  if (res & EV_TIMEOUT)
    ev_feed_event (EV_A_ &ev->to, res & EV_TIMEOUT);

  if (res & EV_SIGNAL)
    ev_feed_event (EV_A_ &ev->iosig.sig, res & EV_SIGNAL);

  if (res & (EV_READ | EV_WRITE))
    ev_feed_event (EV_A_ &ev->iosig.io, res & (EV_READ | EV_WRITE));
}

int event_pending (struct event *ev, short events, struct timeval *tv)
{
  short revents = 0;
  dLOOPev;

  if (ev->ev_events & EV_SIGNAL)
    {
      /* sig */
      if (ev_is_active (&ev->iosig.sig) || ev_is_pending (&ev->iosig.sig))
        revents |= EV_SIGNAL;
    }
  else if (ev->ev_events & (EV_READ | EV_WRITE))
    {
      /* io */
      if (ev_is_active (&ev->iosig.io) || ev_is_pending (&ev->iosig.io))
        revents |= ev->ev_events & (EV_READ | EV_WRITE);
    }

  if (ev->ev_events & EV_TIMEOUT || ev_is_active (&ev->to) || ev_is_pending (&ev->to))
    {
      revents |= EV_TIMEOUT;

      if (tv)
        {
          ev_tstamp at = ev_now (EV_A);

          tv->tv_sec  = (long)at;
          tv->tv_usec = (long)((at - (ev_tstamp)tv->tv_sec) * 1e6);
        }
    }

  return events & revents;
}

int event_priority_init (int npri)
{
  return event_base_priority_init (ev_x_cur, npri);
}

int event_priority_set (struct event *ev, int pri)
{
  ev->ev_pri = pri;

  return 0;
}

int event_base_set (struct event_base *base, struct event *ev)
{
  ev->ev_base = base;

  return 0;
}

int event_base_loop (struct event_base *base, int flags)
{
  dLOOPbase;

  return !ev_run (EV_A_ flags);
}

int event_base_dispatch (struct event_base *base)
{
  return event_base_loop (base, 0);
}

static void
ev_x_loopexit_cb (int revents, void *base)
{
  dLOOPbase;

  ev_break (EV_A_ EVBREAK_ONE);
}

int event_base_loopexit (struct event_base *base, struct timeval *tv)
{
  ev_tstamp after = ev_tv_get (tv);
  dLOOPbase;

  ev_once (EV_A_ -1, 0, after >= 0. ? after : 0., ev_x_loopexit_cb, (void *)base);

  return 0;
}

struct ev_x_once
{
  int fd;
  void (*cb)(int, short, void *);
  void *arg;
};

static void
ev_x_once_cb (int revents, void *arg)
{
  struct ev_x_once *once = (struct ev_x_once *)arg;

  once->cb (once->fd, (short)revents, once->arg);
  free (once);
}

int event_base_once (struct event_base *base, int fd, short events, void (*cb)(int, short, void *), void *arg, struct timeval *tv)
{
  struct ev_x_once *once = (struct ev_x_once *)malloc (sizeof (struct ev_x_once));
  dLOOPbase;

  if (!once)
    return -1;

  once->fd  = fd;
  once->cb  = cb;
  once->arg = arg;

  ev_once (EV_A_ fd, events & (EV_READ | EV_WRITE), ev_tv_get (tv), ev_x_once_cb, (void *)once);

  return 0;
}

int event_base_priority_init (struct event_base *base, int npri)
{
  /*dLOOPbase;*/

  return 0;
}


================================================
FILE: libev/event.h
================================================
/*
 * libevent compatibility header, only core events supported
 *
 * Copyright (c) 2007,2008,2010,2012 Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modifica-
 * tion, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
 * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
 * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Alternatively, the contents of this file may be used under the terms of
 * the GNU General Public License ("GPL") version 2 or any later version,
 * in which case the provisions of the GPL are applicable instead of
 * the above. If you wish to allow the use of your version of this file
 * only under the terms of the GPL and not to allow others to use your
 * version of this file under the BSD license, indicate your decision
 * by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL. If you do not delete the
 * provisions above, a recipient may use your version of this file under
 * either the BSD or the GPL.
 */

#ifndef EVENT_H_
#define EVENT_H_

#ifdef EV_H
# include EV_H
#else
# include "ev.h"
#endif

#ifndef EVLOOP_NONBLOCK
# define EVLOOP_NONBLOCK EVRUN_NOWAIT
#endif
#ifndef EVLOOP_ONESHOT
# define EVLOOP_ONESHOT EVRUN_ONCE
#endif
#ifndef EV_TIMEOUT
# define EV_TIMEOUT EV_TIMER
#endif

#ifdef __cplusplus
extern "C" {
#endif

/* we need sys/time.h for struct timeval only */
#if !defined (WIN32) || defined (__MINGW32__)
# include <time.h> /* mingw seems to need this, for whatever reason */
# include <sys/time.h>
#endif

struct event_base;

#define EVLIST_TIMEOUT  0x01
#define EVLIST_INSERTED 0x02
#define EVLIST_SIGNAL   0x04
#define EVLIST_ACTIVE   0x08
#define EVLIST_INTERNAL 0x10
#define EVLIST_INIT     0x80

typedef void (*event_callback_fn)(int, short, void *);

struct event
{
  /* libev watchers we map onto */
  union {
    struct ev_io io;
    struct ev_signal sig;
  } iosig;
  struct ev_timer to;

  /* compatibility slots */
  struct event_base *ev_base;
  event_callback_fn ev_callback;
  void *ev_arg;
  int ev_fd;
  int ev_pri;
  int ev_res;
  int ev_flags;
  short ev_events;
};

event_callback_fn event_get_callback (const struct event *ev);

#define EV_READ                    EV_READ
#define EV_WRITE                   EV_WRITE
#define EV_PERSIST                 0x10
#define EV_ET                      0x20 /* nop */

#define EVENT_SIGNAL(ev)           ((int) (ev)->ev_fd)
#define EVENT_FD(ev)               ((int) (ev)->ev_fd)

#define event_initialized(ev)      ((ev)->ev_flags & EVLIST_INIT)

#define evtimer_add(ev,tv)         event_add (ev, tv)
#define evtimer_set(ev,cb,data)    event_set (ev, -1, 0, cb, data)
#define evtimer_del(ev)            event_del (ev)
#define evtimer_pending(ev,tv)     event_pending (ev, EV_TIMEOUT, tv)
#define evtimer_initialized(ev)    event_initialized (ev)

#define timeout_add(ev,tv)         evtimer_add (ev, tv)
#define timeout_set(ev,cb,data)    evtimer_set (ev, cb, data)
#define timeout_del(ev)            evtimer_del (ev)
#define timeout_pending(ev,tv)     evtimer_pending (ev, tv)
#define timeout_initialized(ev)    evtimer_initialized (ev)

#define signal_add(ev,tv)          event_add (ev, tv)
#define signal_set(ev,sig,cb,data) event_set (ev, sig, EV_SIGNAL | EV_PERSIST, cb, data)
#define signal_del(ev)             event_del (ev)
#define signal_pending(ev,tv)      event_pending (ev, EV_SIGNAL, tv)
#define signal_initialized(ev)     event_initialized (ev)

const char *event_get_version (void);
const char *event_get_method (void);

void *event_init (void);
void event_base_free (struct event_base *base);

#define EVLOOP_ONCE      EVLOOP_ONESHOT
int event_loop (int);
int event_loopexit (struct timeval *tv);
int event_dispatch (void);

#define _EVENT_LOG_DEBUG 0
#define _EVENT_LOG_MSG   1
#define _EVENT_LOG_WARN  2
#define _EVENT_LOG_ERR   3
typedef void (*event_log_cb)(int severity, const char *msg);
void event_set_log_callback(event_log_cb cb);

void event_set (struct event *ev, int fd, short events, void (*cb)(int, short, void *), void *arg);
int event_once (int fd, short events, void (*cb)(int, short, void *), void *arg, struct timeval *tv);

int event_add (struct event *ev, struct timeval *tv);
int event_del (struct event *ev);
void event_active (struct event *ev, int res, short ncalls); /* ncalls is being ignored */

int event_pending (struct event *ev, short, struct timeval *tv);

int event_priority_init (int npri);
int event_priority_set (struct event *ev, int pri);

struct event_base *event_base_new (void);
const char *event_base_get_method (const struct event_base *);
int event_base_set (struct event_base *base, struct event *ev);
int event_base_loop (struct event_base *base, int);
int event_base_loopexit (struct event_base *base, struct timeval *tv);
int event_base_dispatch (struct event_base *base);
int event_base_once (struct event_base *base, int fd, short events, void (*cb)(int, short, void *), void *arg, struct timeval *tv);
int event_base_priority_init (struct event_base *base, int fd);

/* next line is different in the libevent+libev version */
/*libevent-include*/

#ifdef __cplusplus
}
#endif

#endif


================================================
FILE: libev/event_compat.h
================================================
/*
 * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
 * Copyright (c) 2008      Marc Alexander Lehmann <libev@schmorp.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
#ifdef __cplusplus
extern "C" {
#endif

#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
# undef WIN32_LEAN_AND_MEAN
typedef unsigned char u_char;
typedef unsigned short u_short;
#else
# include <sys/types.h>
# include <sys/time.h>
# include <inttypes.h>
#endif

#include <stdarg.h>

/* Fix so that ppl dont have to run with <sys/queue.h> */
#ifndef TAILQ_ENTRY
#define _EVENT_DEFINED_TQENTRY
#define TAILQ_ENTRY(type)						\
struct {								\
	struct type *tqe_next;	/* next element */			\
	struct type **tqe_prev;	/* address of previous next element */	\
}
#endif /* !TAILQ_ENTRY */
#ifndef RB_ENTRY
#define _EVENT_DEFINED_RBENTRY
#define RB_ENTRY(type)							\
struct {								\
	struct type *rbe_left;		/* left element */		\
	struct type *rbe_right;		/* right element */		\
	struct type *rbe_parent;	/* parent element */		\
	int rbe_color;			/* node color */		\
}
#endif /* !RB_ENTRY */

/*
 * Key-Value pairs.  Can be used for HTTP headers but also for
 * query argument parsing.
 */
struct evkeyval {
	TAILQ_ENTRY(evkeyval) next;

	char *key;
	char *value;
};

#ifdef _EVENT_DEFINED_TQENTRY
#undef TAILQ_ENTRY
struct event_list;
struct evkeyvalq;
#undef _EVENT_DEFINED_TQENTRY
#else
TAILQ_HEAD (event_list, event);
TAILQ_HEAD (evkeyvalq, evkeyval);
#endif /* _EVENT_DEFINED_TQENTRY */
#ifdef _EVENT_DEFINED_RBENTRY
#undef RB_ENTRY
#undef _EVENT_DEFINED_RBENTRY
#endif /* _EVENT_DEFINED_RBENTRY */

struct eventop {
	char *name;
	void *(*init)(struct event_base *);
	int (*add)(void *, struct event *);
	int (*del)(void *, struct event *);
	int (*recalc)(struct event_base *, void *, int);
	int (*dispatch)(struct event_base *, void *, struct timeval *);
	void (*dealloc)(struct event_base *, void *);
};

/* These functions deal with buffering input and output */

struct evbuffer {
	u_char *buffer;
	u_char *orig_buffer;

	size_t misalign;
	size_t totallen;
	size_t off;

	void (*cb)(struct evbuffer *, size_t, size_t, void *);
	void *cbarg;
};

/* Just for error reporting - use other constants otherwise */
#define EVBUFFER_READ		0x01
#define EVBUFFER_WRITE		0x02
#define EVBUFFER_EOF		0x10
#define EVBUFFER_ERROR		0x20
#define EVBUFFER_TIMEOUT	0x40

struct bufferevent;
typedef void (*evbuffercb)(struct bufferevent *, void *);
typedef void (*everrorcb)(struct bufferevent *, short what, void *);

struct event_watermark {
	size_t low;
	size_t high;
};

struct bufferevent {
	struct event ev_read;
	struct event ev_write;

	struct evbuffer *input;
	struct evbuffer *output;

	struct event_watermark wm_read;
	struct event_watermark wm_write;

	evbuffercb readcb;
	evbuffercb writecb;
	everrorcb errorcb;
	void *cbarg;

	int timeout_read;	/* in seconds */
	int timeout_write;	/* in seconds */

	short enabled;	/* events that are currently enabled */
};

struct bufferevent *bufferevent_new(int fd,
    evbuffercb readcb, evbuffercb writecb, everrorcb errorcb, void *cbarg);
int bufferevent_base_set(struct event_base *base, struct bufferevent *bufev);
int bufferevent_priority_set(struct bufferevent *bufev, int pri);
void bufferevent_free(struct bufferevent *bufev);
int bufferevent_write(struct bufferevent *bufev, const void *data, size_t size);
int bufferevent_write_buffer(struct bufferevent *bufev, struct evbuffer *buf);
size_t bufferevent_read(struct bufferevent *bufev, void *data, size_t size);
int bufferevent_enable(struct bufferevent *bufev, short event);
int bufferevent_disable(struct bufferevent *bufev, short event);
void bufferevent_settimeout(struct bufferevent *bufev,
    int timeout_read, int timeout_write);

#define EVBUFFER_LENGTH(x)	(x)->off
#define EVBUFFER_DATA(x)	(x)->buffer
#define EVBUFFER_INPUT(x)	(x)->input
#define EVBUFFER_OUTPUT(x)	(x)->output

struct evbuffer *evbuffer_new(void);
void evbuffer_free(struct evbuffer *);
int evbuffer_expand(struct evbuffer *, size_t);
int evbuffer_add(struct evbuffer *, const void *, size_t);
int evbuffer_remove(struct evbuffer *, void *, size_t);
char *evbuffer_readline(struct evbuffer *);
int evbuffer_add_buffer(struct evbuffer *, struct evbuffer *);
int evbuffer_add_printf(struct evbuffer *, const char *fmt, ...);
int evbuffer_add_vprintf(struct evbuffer *, const char *fmt, va_list ap);
void evbuffer_drain(struct evbuffer *, size_t);
int evbuffer_write(struct evbuffer *, int);
int evbuffer_read(struct evbuffer *, int, int);
u_char *evbuffer_find(struct evbuffer *, const u_char *, size_t);
void evbuffer_setcb(struct evbuffer *, void (*)(struct evbuffer *, size_t, size_t, void *), void *);

/*
 * Marshaling tagged data - We assume that all tags are inserted in their
 * numeric order - so that unknown tags will always be higher than the
 * known ones - and we can just ignore the end of an event buffer.
 */

void evtag_init(void);

void evtag_marshal(struct evbuffer *evbuf, uint32_t tag, const void *data,
    uint32_t len);

void encode_int(struct evbuffer *evbuf, uint32_t number);

void evtag_marshal_int(struct evbuffer *evbuf, uint32_t tag, uint32_t integer);

void evtag_marshal_string(struct evbuffer *buf, uint32_t tag,
    const char *string);

void evtag_marshal_timeval(struct evbuffer *evbuf, uint32_t tag,
    struct timeval *tv);

int evtag_unmarshal(struct evbuffer *src, uint32_t *ptag, struct evbuffer *dst);
int evtag_peek(struct evbuffer *evbuf, uint32_t *ptag);
int evtag_peek_length(struct evbuffer *evbuf, uint32_t *plength);
int evtag_payload_length(struct evbuffer *evbuf, uint32_t *plength);
int evtag_consume(struct evbuffer *evbuf);

int evtag_unmarshal_int(struct evbuffer *evbuf, uint32_t need_tag,
    uint32_t *pinteger);

int evtag_unmarshal_fixed(struct evbuffer *src, uint32_t need_tag, void *data,
    size_t len);

int evtag_unmarshal_string(struct evbuffer *evbuf, uint32_t need_tag,
    char **pstring);

int evtag_unmarshal_timeval(struct evbuffer *evbuf, uint32_t need_tag,
    struct timeval *ptv);

#ifdef __cplusplus
}
#endif


================================================
FILE: libev/import_libevent
================================================
#!/bin/sh

LE=../libevent-1.4.3-stable

if ! [ -e evbuffer.c ]; then
   echo do not run this programm unless you know what you are doing
   exit 1
fi

# this program combines libev and libevent into a single package

cvs update -AdP libev
rsync -avP libev/. . --exclude CVS

rm -f configure.ac

cp $LE/evdns.h .

perl -i -pe 's%^/.libevent-include./%#include "event_compat.h"%' event.h

perl -ne '
   s/\s+char buf\[64\];/\tchar buf[96];/;
   if (/#include "event.h"/) {
      print "#ifndef EV_STANDALONE\n$_#endif\n";
      next;
   }
   if (/#include "misc.h"/) {
      print "#ifndef EV_STANDALONE\n$_#endif\n";
      next;
   }
   if (/#include "(unistd.h|sys\/time.h)"/) {
      print "#ifndef WIN32\n$_#endif\n";
      next;
   }
   next if /#include "log.h"/;

   print;
' <$LE/evdns.c >evdns.c

cp $LE/autogen.sh .
cp $LE/epoll_sub.c .
cp $LE/evbuffer.c .
cp $LE/buffer.c .
cp $LE/evhttp.h .
cp $LE/evutil.h .
cp $LE/evutil.c .
cp $LE/event-config.h .
cp $LE/event-internal.h .
cp $LE/evrpc.h .
cp $LE/evrpc.c .
cp $LE/evrpc-internal.h .
cp $LE/http.c .
cp $LE/event_tagging.c .
cp $LE/http-internal.h .
cp $LE/strlcpy-internal.h .
cp $LE/log.c .
cp $LE/log.h .
cp $LE/strlcpy.c .
rsync -a $LE/WIN32* $LE/sample $LE/test $LE/compat . --del
#rename 's/libevent/libev/' WIN32-Prj/lib*
cp $LE/aclocal.m4 .
#cp $LE/acconfig.h .
cp $LE/config.h.in .
cp $LE/event_rpcgen.py .
cp $LE/*.3 .

#perl -i -pe 's/libevent/libev/g' sample/Makefile.am
#perl -i -pe 's/libevent/libev/g' test/Makefile.am

perl -i -pe 's/#include <event.h>$/#include "event.h"/' test/*.c

perl -i -ne '
   next if /"event-internal.h"/;
   s/base\d?->sig.ev_signal_added/0/;
   s/base\d?->sig.ev_signal_pair\[0\]/-1/;
   s/base->sig.evsignal_caught/0/;
   next if /^\ttest_signal_(dealloc|pipeloss|switchbase|assert|restore)\(\)/;
   next if /^\ttest_simplesignal\(\)/; # non-default-loop
   next if /^\ttest_immediatesignal\(\)/; # non-default-loop
   next if /test_priorities\(\d\)/;
   print;
' test/regress.c

perl -ne '
   s/\bmin_heap.h\b//g;
   s/\bsignal.c\b//g;
   s/\bevport.c\b//g;
   s/\bkqueue.c\b//g;
   s/\bdevpoll.c\b//g;
   s/\brtsig.c\b//g;
   s/\bselect.c\b//g;
   s/\bpoll.c\b//g;
   s/\bepoll.c\b//g;
   s/\bepoll_sub.c\b//g;
   s/\bevent-internal.h\b//g;
   s/\bevsignal.h\b//g;
   s/^(man_MANS\s*=)/$1 ev.3 /;
   s/^(EXTRA_DIST\s*=)/$1 libev.m4 ev.h ev_vars.h ev_wrap.h event_compat.h ev++.h ev_epoll.c ev_select.c ev_poll.c ev_kqueue.c ev_port.c ev_win32.c ev.3 ev.pod /;
   s/^(include_HEADERS\s*=)/$1 ev.h event_compat.h ev++.h /;
   s/^(CORE_SRC\s*=)/$1 ev.c /;
   s/^(SYS_LIBS\s*=)/$1 -lm /;
   #s/libevent/libev/g;
   print;
' <$LE/Makefile.am >Makefile.am

perl -ne '
   #s/-Wall/-Wall -Wno-comment -Wunused-function -Wno-unused-value/;
   s/-Wall//g;
   #s/libevent/libev/g;
   #VERSION
   s/AM_INIT_AUTOMAKE\s*\(.*,(.*)\)/AM_INIT_AUTOMAKE(libevent-$1+libev,3.1)/;
   s/AC_LIBOBJ\(select\)/: ;/g;
   s/AC_LIBOBJ\(poll\)/: ;/g;
   s/AC_LIBOBJ\(kqueue\)/: ;/g;
   s/AC_LIBOBJ\(epoll\)/: ;/g;
   s/AC_LIBOBJ\(devpoll\)/: ;/g;
   s/AC_LIBOBJ\(evport\)/: ;/g;
   s/AC_LIBOBJ\(signal\)/: ;/g;
   s/AC_LIBOBJ\(rtsig\)/: ;/g;
   print "m4_include([libev.m4])\n" if /^AC_OUTPUT/;
   print;
' <$LE/configure.in >configure.in

aclocal-1.7
automake-1.7 --add-missing
autoconf
autoheader
libtoolize
CC="ccache gcc" ./configure --prefix=/opt/libev --disable-shared "$@"


================================================
FILE: libev/libev.m4
================================================
dnl this file is part of libev, do not make local modifications
dnl http://software.schmorp.de/pkg/libev

dnl libev support
AC_CHECK_HEADERS(sys/inotify.h sys/epoll.h sys/event.h port.h poll.h sys/select.h sys/eventfd.h sys/signalfd.h)
 
AC_CHECK_FUNCS(inotify_init epoll_ctl kqueue port_create poll select eventfd signalfd)
 
AC_CHECK_FUNCS(clock_gettime, [], [
   dnl on linux, try syscall wrapper first
   if test $(uname) = Linux; then
      AC_MSG_CHECKING(for clock_gettime syscall)
      AC_LINK_IFELSE([AC_LANG_PROGRAM(
                      [#include <unistd.h>
                       #include <sys/syscall.h>
                       #include <time.h>],
                      [struct timespec ts; int status = syscall (SYS_clock_gettime, CLOCK_REALTIME, &ts)])],
                     [ac_have_clock_syscall=1
                      AC_DEFINE(HAVE_CLOCK_SYSCALL, 1, Define to 1 to use the syscall interface for clock_gettime)
                      AC_MSG_RESULT(yes)],
                     [AC_MSG_RESULT(no)])
   fi
   if test -z "$LIBEV_M4_AVOID_LIBRT" && test -z "$ac_have_clock_syscall"; then
      AC_CHECK_LIB(rt, clock_gettime)
      unset ac_cv_func_clock_gettime
      AC_CHECK_FUNCS(clock_gettime)
   fi
])

AC_CHECK_FUNCS(nanosleep, [], [
   if test -z "$LIBEV_M4_AVOID_LIBRT"; then
      AC_CHECK_LIB(rt, nanosleep)
      unset ac_cv_func_nanosleep
      AC_CHECK_FUNCS(nanosleep)
   fi
])

if test -z "$LIBEV_M4_AVOID_LIBM"; then
   LIBM=m
fi
AC_SEARCH_LIBS(floor, $LIBM, [AC_DEFINE(HAVE_FLOOR, 1, Define to 1 if the floor function is available)])


================================================
FILE: libev/update_ev_c
================================================
#!/bin/sh -e

(
   sed -ne '1,\%/\* ECB.H BEGIN \*/%p' ev.c
   cat ~/src/libecb/ecb.h
   sed -ne '\%/\* ECB.H END \*/%,$p' ev.c
) >ev.c~ && mv ev.c~ ev.c


================================================
FILE: libev/update_ev_wrap
================================================
#!/bin/sh

(
   echo '#define VAR(name,decl) name'
   echo '#define EV_GENWRAP 1'
   cat ev_vars.h
) | cc -E -o - - | perl -ne '
   while (<>) {
      push @syms, $1 if /(^\w+)/;
   }
   print "/* DO NOT EDIT, automatically generated by update_ev_wrap */\n",
         "#ifndef EV_WRAP_H\n",
         "#define EV_WRAP_H\n",
         (map "#define $_ ((loop)->$_)\n", sort @syms),
         "#else\n",
         "#undef EV_WRAP_H\n",
         (map "#undef $_\n", sort @syms),
         "#endif\n";
' >ev_wrap.h


================================================
FILE: libev/update_symbols
================================================
#!/bin/sh

make ev.o event.o || exit

nm ev.o           | perl -ne 'print "$1\n" if /\S+ [A-Z] (\S+)/' > Symbols.ev
nm event.o        | perl -ne 'print "$1\n" if /\S+ [A-Z] (\S+)/' > Symbols.event


================================================
FILE: log.cpp
================================================
#include "log.h"
#include "misc.h"

int log_level = log_info;

int enable_log_position = 0;
int enable_log_color = 1;

void log0(const char* file, const char* function, int line, int level, const char* str, ...) {
    if (level > log_level) return;
    if (level > log_trace || level < 0) return;

    time_t timer;
    char buffer[100];
    struct tm* tm_info;

    time(&timer);
    tm_info = localtime(&timer);

    if (enable_log_color)
        printf("%s", log_color[level]);

    strftime(buffer, 100, "%Y-%m-%d %H:%M:%S", tm_info);
    printf("[%s][%s]", buffer, log_text[level]);

    if (enable_log_position) printf("[%s,func:%s,line:%d]", file, function, line);

    va_list vlist;
    va_start(vlist, str);
    vfprintf(stdout, str, vlist);
    va_end(vlist);
    if (enable_log_color)
        printf("%s", RESET);

    // printf("\n");
    // if(enable_log_color)
    // printf(log_color[level]);
    fflush(stdout);

    if (log_level == log_fatal) {
        about_to_exit = 1;
    }
}

void log_bare(int level, const char* str, ...) {
    if (level > log_level) return;
    if (level > log_trace || level < 0) return;
    if (enable_log_color)
        printf("%s", log_color[level]);
    va_list vlist;
    va_start(vlist, str);
    vfprintf(stdout, str, vlist);
    va_end(vlist);
    if (enable_log_color)
        printf("%s", RESET);
    fflush(stdout);
}


================================================
FILE: log.h
================================================

#ifndef UDP2RAW_LOG_MYLOG_H_
#define UDP2RAW_LOG_MYLOG_H_

#include "common.h"

using namespace std;

#define RED "\x1B[31m"
#define GRN "\x1B[32m"
#define YEL "\x1B[33m"
#define BLU "\x1B[34m"
#define MAG "\x1B[35m"
#define CYN "\x1B[36m"
#define WHT "\x1B[37m"
#define RESET "\x1B[0m"

const int log_never = 0;
const int log_fatal = 1;
const int log_error = 2;
const int log_warn = 3;
const int log_info = 4;
const int log_debug = 5;
const int log_trace = 6;
const int log_end = 7;

const char log_text[][20] = {"NEVER", "FATAL", "ERROR", "WARN", "INFO", "DEBUG", "TRACE", ""};
const char log_color[][20] = {RED, RED, RED, YEL, GRN, MAG, ""};

extern int log_level;
extern int enable_log_position;
extern int enable_log_color;

#ifdef MY_DEBUG
#define mylog(__first_argu__dummy_abcde__, ...) printf(__VA_ARGS__)

#else
#define mylog(...) log0(__FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
#endif

//#define mylog(__first_argu__dummy_abcde__,...) {;}

void log0(const char* file, const char* function, int line, int level, const char* str, ...);

void log_bare(int level, const char* str, ...);

#endif


================================================
FILE: main.cpp
================================================
#include "common.h"
#include "network.h"
#include "connection.h"
#include "misc.h"
#include "log.h"
#include "lib/md5.h"
#include "encrypt.h"
#include "fd_manager.h"

void sigpipe_cb(struct ev_loop *l, ev_signal *w, int revents) {
    mylog(log_info, "got sigpipe, ignored");
}

void sigterm_cb(struct ev_loop *l, ev_signal *w, int revents) {
    mylog(log_info, "got sigterm, exit");
    myexit(0);
}

void sigint_cb(struct ev_loop *l, ev_signal *w, int revents) {
    mylog(log_info, "got sigint, exit");
    myexit(0);
}

int client_event_loop();
int server_event_loop();

int main(int argc, char *argv[]) {
    assert(sizeof(unsigned short) == 2);
    assert(sizeof(unsigned int) == 4);
    assert(sizeof(unsigned long long) == 8);

#ifdef UDP2RAW_MP
    init_ws();
#endif

    dup2(1, 2);  // redirect stderr to stdout
#if defined(__MINGW32__)
    enable_log_color = 0;
#endif

    pre_process_arg(argc, argv);

    ev_signal signal_watcher_sigpipe;
    ev_signal signal_watcher_sigterm;
    ev_signal signal_watcher_sigint;

    if (program_mode == client_mode) {
        struct ev_loop *loop = ev_default_loop(0);
#if !defined(__MINGW32__)
        ev_signal_init(&signal_watcher_sigpipe, sigpipe_cb, SIGPIPE);
        ev_signal_start(loop, &signal_watcher_sigpipe);
#endif
        ev_signal_init(&signal_watcher_sigterm, sigterm_cb, SIGTERM);
        ev_signal_start(loop, &signal_watcher_sigterm);

        ev_signal_init(&signal_watcher_sigint, sigint_cb, SIGINT);
        ev_signal_start(loop, &signal_watcher_sigint);
    } else {
#ifdef UDP2RAW_LINUX
        signal(SIGINT, signal_handler);
        signal(SIGHUP, signal_handler);
        signal(SIGKILL, signal_handler);
        signal(SIGTERM, signal_handler);
        signal(SIGQUIT, signal_handler);
#else
        mylog(log_fatal, "server mode not supported in multi-platform version\n");
        myexit(-1);
#endif
    }
#if !defined(__MINGW32__)
    if (geteuid() != 0) {
        mylog(log_warn, "root check failed, it seems like you are using a non-root account. we can try to continue, but it may fail. If you want to run udp2raw as non-root, you have to add iptables rule manually, and grant udp2raw CAP_NET_RAW capability, check README.md in repo for more info.\n");
    } else {
        mylog(log_warn, "you can run udp2raw with non-root account for better security. check README.md in repo for more info.\n");
    }
#endif

    mylog(log_info, "remote_ip=[%s], make sure this is a vaild IP address\n", remote_addr.get_ip());

    // init_random_number_fd();
    srand(get_true_random_number_nz());
    const_id = get_true_random_number_nz();

    mylog(log_info, "const_id:%x\n", const_id);

    my_init_keys(key_string, program_mode == client_mode ? 1 : 0);

    iptables_rule();

#ifdef UDP2RAW_LINUX
    init_raw_socket();
#endif

    if (program_mode == client_mode) {
        client_event_loop();
    } else {
#ifdef UDP2RAW_LINUX
        server_event_loop();
#else
        mylog(log_fatal, "server mode not supported in multi-platform version\n");
        myexit(-1);
#endif
    }

    return 0;
}


================================================
FILE: makefile
================================================
cc_cross=/home/wangyu/Desktop/arm-2014.05/bin/arm-none-linux-gnueabi-g++
cc_local=g++
cc_mips24kc_be=/toolchains/lede-sdk-17.01.2-ar71xx-generic_gcc-5.4.0_musl-1.1.16.Linux-x86_64/staging_dir/toolchain-mips_24kc_gcc-5.4.0_musl-1.1.16/bin/mips-openwrt-linux-musl-g++
cc_mips24kc_le=/toolchains/lede-sdk-17.01.2-ramips-mt7621_gcc-5.4.0_musl-1.1.16.Linux-x86_64/staging_dir/toolchain-mipsel_24kc_gcc-5.4.0_musl-1.1.16/bin/mipsel-openwrt-linux-musl-g++
cc_arm= /toolchains/lede-sdk-17.01.2-bcm53xx_gcc-5.4.0_musl-1.1.16_eabi.Linux-x86_64/staging_dir/toolchain-arm_cortex-a9_gcc-5.4.0_musl-1.1.16_eabi/bin/arm-openwrt-linux-c++
cc_mingw_cross=i686-w64-mingw32-g++-posix
cc_mac_cross=o64-clang++ -stdlib=libc++
cc_x86=/toolchains/lede-sdk-17.01.2-x86-generic_gcc-5.4.0_musl-1.1.16.Linux-x86_64/staging_dir/toolchain-i386_pentium4_gcc-5.4.0_musl-1.1.16/bin/i486-openwrt-linux-c++
cc_amd64=/toolchains/lede-sdk-17.01.2-x86-64_gcc-5.4.0_musl-1.1.16.Linux-x86_64/staging_dir/toolchain-x86_64_gcc-5.4.0_musl-1.1.16/bin/x86_64-openwrt-linux-c++
#cc_bcm2708=/home/wangyu/raspberry/tools/arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian/bin/arm-linux-gnueabihf-g++ 


FLAGS= -std=c++11   -Wall -Wextra -Wno-unused-variable -Wno-unused-parameter -Wno-missing-field-initializers ${OPT}

COMMON=main.cpp lib/md5.cpp lib/pbkdf2-sha1.cpp lib/pbkdf2-sha256.cpp encrypt.cpp log.cpp network.cpp common.cpp  connection.cpp misc.cpp fd_manager.cpp client.cpp server.cpp -lpthread

SOURCES0= $(COMMON) lib/aes_faster_c/aes.cpp lib/aes_faster_c/wrapper.cpp
SOURCES= ${SOURCES0} my_ev.cpp -isystem libev
SOURCES_AES_ACC= $(COMMON) $(wildcard lib/aes_acc/aes*.c) my_ev.cpp -isystem libev
PCAP="-lpcap"
MP="-DUDP2RAW_MP"


NAME=udp2raw

TARGETS=amd64 arm amd64_hw_aes arm_asm_aes mips24kc_be mips24kc_be_asm_aes x86 x86_asm_aes mips24kc_le mips24kc_le_asm_aes

TAR=${NAME}_binaries.tar.gz `echo ${TARGETS}|sed -r 's/([^ ]+)/${NAME}_\1/g'` version.txt

TARGETS_MP= mingw_cross mingw_cross_wepoll mac_cross

export STAGING_DIR=/tmp/    #just for supress warning of staging_dir not define

# targets for nativei (non-cross) compile 
all:git_version
	rm -f ${NAME}
	${cc_local}   -o ${NAME}          -I. ${SOURCES} ${FLAGS} -lrt -ggdb -static -O2

#dynamic link
dynamic: git_version
	${cc_local}   -o ${NAME}_$@          -I. ${SOURCES} ${FLAGS} -lrt -O2

#targes for general cross compile

cross:git_version
	${cc_cross}   -o ${NAME}_cross    -I. ${SOURCES} ${FLAGS} -lrt -O2

cross2:git_version
	${cc_cross}   -o ${NAME}_cross    -I. ${SOURCES} ${FLAGS} -lrt -static -lgcc_eh -O2

cross3:git_version
	${cc_cross}   -o ${NAME}_cross    -I. ${SOURCES} ${FLAGS} -lrt -static -O2

#targets only for debug purpose
fast: git_version
	rm -f ${NAME}
	${cc_local}   -o ${NAME}          -I. ${SOURCES} ${FLAGS} -lrt -ggdb
debug: git_version
	rm -f ${NAME}
	${cc_local}   -o ${NAME}          -I. ${SOURCES} ${FLAGS} -lrt -Wformat-nonliteral -D MY_DEBUG  -ggdb
debug2: git_version
	rm -f ${NAME}
	${cc_local}   -o ${NAME}          -I. ${SOURCES} ${FLAGS} -lrt -Wformat-nonliteral -ggdb -fsanitize=address

#targets only for 'make release'

mips24kc_be: git_version
	${cc_mips24kc_be}  -o ${NAME}_$@   -I. ${SOURCES} ${FLAGS} -lrt -lgcc_eh -static -O2
mips24kc_be_asm_aes: git_version
	${cc_mips24kc_be}  -o ${NAME}_$@   -I. ${SOURCES_AES_ACC} ${FLAGS} -lrt -lgcc_eh -static -O2 lib/aes_acc/asm/mips_be.S
mips24kc_le: git_version
	${cc_mips24kc_le}  -o ${NAME}_$@   -I. ${SOURCES} ${FLAGS} -lrt -lgcc_eh -static -O2
mips24kc_le_asm_aes: git_version
	${cc_mips24kc_le}  -o ${NAME}_$@   -I. ${SOURCES_AES_ACC} ${FLAGS} -lrt -lgcc_eh -static -O2 lib/aes_acc/asm/mips.S
amd64:git_version
	${cc_amd64}   -o ${NAME}_$@    -I. ${SOURCES} ${FLAGS} -lrt -static -O2 -lgcc_eh -ggdb
amd64_hw_aes:git_version
	${cc_amd64}   -o ${NAME}_$@   -I. ${SOURCES_AES_ACC} ${FLAGS} -lrt -static -O2 lib/aes_acc/asm/x64.S -lgcc_eh -ggdb
x86:git_version
	${cc_x86}   -o ${NAME}_$@      -I. ${SOURCES} ${FLAGS} -lrt -static -O2 -lgcc_eh -ggdb
x86_asm_aes:git_version
	${cc_x86}   -o ${NAME}_$@    -I. ${SOURCES_AES_ACC} ${FLAGS} -lrt -static -O2 lib/aes_acc/asm/x86.S -lgcc_eh -ggdb
arm:git_version
	${cc_arm}   -o ${NAME}_$@      -I. ${SOURCES} ${FLAGS} -lrt -static -O2 -lgcc_eh
arm_asm_aes:git_version
	${cc_arm}   -o ${NAME}_$@    -I. ${SOURCES_AES_ACC} ${FLAGS} -lrt -static -O2 lib/aes_acc/asm/arm.S -lgcc_eh

release: ${TARGETS}
	cp git_version.h version.txt
	tar -zcvf ${TAR}

#targets for multi-platform version (native compile)
cygwin:git_version
	${cc_local}   -o ${NAME}_$@          -I. ${SOURCES} pcap_wrapper.cpp ${FLAGS} -lrt -ggdb -static -O2 -D_GNU_SOURCE ${MP}

mingw:git_version
	${cc_local}   -o ${NAME}_$@         -I. ${SOURCES} pcap_wrapper.cpp ${FLAGS} -ggdb -static -O2 -lws2_32 ${MP}

mingw_wepoll:git_version
	${cc_local}   -o ${NAME}_$@        -I. ${SOURCES0} pcap_wrapper.cpp ${FLAGS} -ggdb -static -O2 -DNO_LIBEV_EMBED -D_WIN32 -lev -lws2_32 ${MP}

linux:git_version
	${cc_local}   -o ${NAME}_$@          -I. ${SOURCES} ${PCAP} ${FLAGS} -lrt -ggdb -static -O2 ${MP}

freebsd:git_version
	${cc_local}   -o ${NAME}_$@        -I. ${SOURCES} ${PCAP} ${FLAGS} -lrt -ggdb -static -libverbs -O2 ${MP}

mac:git_version
	${cc_local}   -o ${NAME}_$@        -I. ${SOURCES} ${PCAP} ${FLAGS} -ggdb -O2 ${MP}

#targets for multi-platform version (cross compile)

mingw_cross:git_version
	${cc_mingw_cross}   -o ${NAME}_mp.exe          -I. ${SOURCES} pcap_wrapper.cpp ${FLAGS} -ggdb -static -O2 -lws2_32 ${MP}

mingw_cross_wepoll:git_version
	${cc_mingw_cross}   -o ${NAME}_mp_wepoll.exe   -I. ${SOURCES0} pcap_wrapper.cpp ${FLAGS} -ggdb -static -O2 -DNO_LIBEV_EMBED -D_WIN32 -lev -lws2_32 ${MP}

mac_cross:git_version
	${cc_mac_cross}   -o ${NAME}_mp_mac            -I. ${SOURCES} ${PCAP} ${FLAGS} -ggdb -O2 ${MP}

release_mp:${TARGETS_MP}
	cp git_version.h version.txt
	tar -zcvf ${NAME}_mp_binaries.tar.gz ${NAME}_mp.exe ${NAME}_mp_wepoll.exe ${NAME}_mp_mac version.txt


clean:	
	rm -f ${TAR}
	rm -f ${NAME} ${NAME}_cross ${NAME}.exe ${NAME}_wepoll.exe ${NAME}_mac
	rm -f ${NAME}_mp_binaries.tar.gz ${NAME}_mp.exe ${NAME}_mp_wepoll.exe ${NAME}_mp_mac
	rm -f git_version.h

git_version:
	    echo "const char *gitversion = \"$(shell git rev-parse HEAD)\";" > git_version.h


================================================
FILE: misc.cpp
================================================
/*
 * misc.cpp
 *
 *  Created on: Sep 23, 2017
 *      Author: root
 */
#include "git_version.h"
#include "common.h"
#include "encrypt.h"
#include "misc.h"
#include "network.h"
#include "connection.h"
#include "fd_manager.h"

int hb_mode = 1;
int hb_len = 1200;
char hb_buf[buf_len];

int mtu_warn = 1375;  // if a packet larger than mtu warn is receviced,there will be a warning

int max_rst_to_show = 15;

int max_rst_allowed = -1;

int enable_dns_resolve = 0;

int ttl_value = 64;

fd_manager_t fd_manager;

// char remote_address[max_address_len]="";
// char local_ip[100]="0.0.0.0", remote_ip[100]="255.255.255.255",source_ip[100]="0.0.0.0";//local_ip is for -l option,remote_ip for -r option,source for --source-ip
// u32_t local_ip_uint32,remote_ip_uint32,source_ip_uint32;//convert from last line.
// int local_port = -1, remote_port=-1,source_port=0;//similiar to local_ip  remote_ip,buf for port.source_port=0 indicates --source-port is not enabled
address_t local_addr, remote_addr, source_addr;

my_ip_t bind_addr;

int source_port = -1;

int bind_addr_used = 0;
int force_source_ip = 0;  // if --source-ip is enabled
int force_source_port = 0;

my_id_t const_id = 0;  // an id used for connection recovery,its generated randomly,it never change since its generated

int udp_fd = -1;   // for client only. client use this fd to listen and handle udp connection
int bind_fd = -1;  // bind only,never send or recv.  its just a dummy fd for bind,so that other program wont occupy the same port
#ifdef UDP2RAW_LINUX
int epollfd = -1;   // fd for epoll
int timer_fd = -1;  // the general timer fd for client and server.for server this is not the only timer find,every connection has a timer fd.
#endif
int fail_time_counter = 0;      // determine if the max_fail_time is reached
int epoll_trigger_counter = 0;  // for debug only
int debug_flag = 0;             // for debug only

int simple_rule = 0;                 // deprecated.
int keep_rule = 0;                   // whether to monitor the iptables rule periodly,re-add if losted
int auto_add_iptables_rule = 0;      // if -a is set
int generate_iptables_rule = 0;      // if -g is set
int generate_iptables_rule_add = 0;  // if --gen-add is set

int retry_on_error = 0;

int debug_resend = 0;  // debug only

char key_string[1000] = "secret key";  // -k option

char fifo_file[1000] = "";

int clear_iptables = 0;
int wait_xtables_lock = 0;
#ifdef UDP2RAW_LINUX
string iptables_command0 = "iptables/ip6tables ";
string iptables_command = "";
string iptables_pattern = "";
int iptables_rule_added = 0;
int iptables_rule_keeped = 0;
int iptables_rule_keep_index = 0;
#endif

program_mode_t program_mode = unset_mode;  // 0 unset; 1client 2server
raw_mode_t raw_mode = mode_faketcp;
u32_t raw_ip_version = (u32_t)-1;
unordered_map<int, const char *> raw_mode_tostring = {{mode_faketcp, "faketcp"}, {mode_udp, "udp"}, {mode_icmp, "icmp"}};

int about_to_exit = 0;

int socket_buf_size = 1024 * 1024;
// int force_socket_buf=0;

// char lower_level_arg[1000];
#ifdef UDP2RAW_LINUX
int process_lower_level_arg()  // handle --lower-level option
{
    lower_level = 1;
    if (strcmp(optarg, "auto") == 0) {
        return 0;
    }

    lower_level_manual = 1;
    if (strchr(optarg, '#') == 0) {
        mylog(log_fatal,
              "lower-level parameter invaild,check help page for format\n");
        myexit(-1);
    }
    lower_level = 1;
    u32_t hw[6];
    memset(hw, 0, sizeof(hw));
    sscanf(optarg, "%[^#]#%x:%x:%x:%x:%x:%x", if_name, &hw[0], &hw[1], &hw[2],
           &hw[3], &hw[4], &hw[5]);

    mylog(log_warn,
          "make sure this is correct:   if_name=<%s>  dest_mac_adress=<%02x:%02x:%02x:%02x:%02x:%02x>  \n",
          if_name, hw[0], hw[1], hw[2], hw[3], hw[4], hw[5]);
    for (int i = 0; i < 6; i++) {
        dest_hw_addr[i] = uint8_t(hw[i]);
    }
    return 0;
}
#endif
void print_help() {
    char git_version_buf[100] = {0};
    strncpy(git_version_buf, gitversion, 10);
    printf("udp2raw-tunnel\n");
    printf("git version:%s    ", git_version_buf);
    printf("build date:%s %s\n", __DATE__, __TIME__);
    printf("repository: https://github.com/wangyu-/udp2raw-tunnel\n");
    printf("\n");
#ifdef UDP2RAW_MP
#ifdef NO_LIBNET
    printf("libnet is disabled at compile time\n");
    printf("\n");
#endif
#endif
    printf("usage:\n");
    printf("    run as client : ./this_program -c -l local_listen_ip:local_port -r server_address:server_port  [options]\n");
    printf("    run as server : ./this_program -s -l server_listen_ip:server_port -r remote_address:remote_port  [options]\n");
    printf("\n");
    printf("common options,these options must be same on both side:\n");
    printf("    --raw-mode            <string>        available values:faketcp(default),udp,icmp and easy-faketcp\n");
    printf("    -k,--key              <string>        password to gen symetric key,default:\"secret key\"\n");
    printf("    --cipher-mode         <string>        available values:aes128cfb,aes128cbc(default),xor,none\n");
    printf("    --auth-mode           <string>        available values:hmac_sha1,md5(default),crc32,simple,none\n");
    printf("    -a,--auto-rule                        auto add (and delete) iptables rule\n");
    printf("    -g,--gen-rule                         generate iptables rule then exit,so that you can copy and\n");
    printf("                                          add it manually.overrides -a\n");
    printf("    --disable-anti-replay                 disable anti-replay,not suggested\n");
    printf("    --fix-gro                             try to fix huge packet caused by GRO. this option is at an early stage.\n");
    printf("                                          make sure client and server are at same version.\n");

    // printf("\n");
    printf("client options:\n");
    printf("    --source-ip           <ip>            force source-ip for raw socket\n");
    printf("    --source-port         <port>          force source-port for raw socket,tcp/udp only\n");
    printf("                                          this option disables port changing while re-connecting\n");
    //	printf("                                          \n");
    printf("other options:\n");
    printf("    --conf-file           <string>        read options from a configuration file instead of command line.\n");
    printf("                                          check example.conf in repo for format\n");
    printf("    --fifo                <string>        use a fifo(named pipe) for sending commands to the running program,\n");
    printf("                                          check readme.md in repository for supported commands.\n");
    printf("    --log-level           <number>        0:never    1:fatal   2:error   3:warn \n");
    printf("                                          4:info (default)     5:debug   6:trace\n");
    //	printf("\n");
    printf("    --log-position                        enable file name,function name,line number in log\n");
    printf("    --disable-color                       disable log color\n");
    printf("    --disable-bpf                         disable the kernel space filter,most time its not necessary\n");
    printf("                                          unless you suspect there is a bug\n");
//	printf("\n");
#ifdef UDP2RAW_LINUX
    printf("    --dev                 <string>        bind raw socket to a device, not necessary but improves performance\n");
#endif
    printf("    --sock-buf            <number>        buf size for socket,>=10 and <=10240,unit:kbyte,default:1024\n");
    printf("    --force-sock-buf                      bypass system limitation while setting sock-buf\n");
    printf("    --seq-mode            <number>        seq increase mode for faketcp:\n");
    printf("                                          0:static header,do not increase seq and ack_seq\n");
    printf("                                          1:increase seq for every packet,simply ack last seq\n");
    printf("                                          2:increase seq randomly, about every 3 packets,simply ack last seq\n");
    printf("                                          3:simulate an almost real seq/ack procedure(default)\n");
    printf("                                          4:similiar to 3,but do not consider TCP Option Window_Scale,\n");
    printf("                                          maybe useful when firewall doesnt support TCP Option \n");
    //	printf("\n");
    printf("    --lower-level         <string>        send packets at OSI level 2, format:'if_name#dest_mac_adress'\n");
    printf("                                          ie:'eth0#00:23:45:67:89:b9'.or try '--lower-level auto' to obtain\n");
    printf("                                          the parameter automatically,specify it manually if 'auto' failed\n");
    printf("    --wait-lock                           wait for xtables lock while invoking iptables, need iptables v1.4.20+\n");
    printf("    --gen-add                             generate iptables rule and add it permanently,then exit.overrides -g\n");
    printf("    --keep-rule                           monitor iptables and auto re-add if necessary.implys -a\n");
    printf("    --hb-len              <number>        length of heart-beat packet, >=0 and <=1500\n");
    printf("    --mtu-warn            <number>        mtu warning threshold, unit:byte, default:1375\n");
    printf("    --clear                               clear any iptables rules added by this program.overrides everything\n");
    printf("    --retry-on-error                      retry on error, allow to start udp2raw before network is initialized\n");
    printf("    -h,--help                             print this help message\n");
    // printf("common options,these options must be same on both side\n");
}

int load_config(char *file_name, int &argc, vector<string> &argv)  // load conf file and append to argv
{
    // Load configurations from config_file instead of the command line.
    // See config.example for example configurations
    std::ifstream conf_file(file_name);
    std::string line;
    if (conf_file.fail()) {
        mylog(log_fatal, "conf_file %s open failed,reason :%s\n", file_name, get_sock_error());
        myexit(-1);
    }
    while (std::getline(conf_file, line)) {
        auto res = parse_conf_line(line);

        argc += res.size();
        for (int i = 0; i < (int)res.size(); i++) {
            argv.push_back(res[i]);
        }
    }
    conf_file.close();

    return 0;
}

int process_log_level(int argc, char *argv[])  // process  --log-level and --disable-cloer --log-postion options
{
    int i, j, k;
    for (i = 0; i < argc; i++) {
        if (strcmp(argv[i], "--log-level") == 0) {
            if (i < argc - 1) {
                sscanf(argv[i + 1], "%d", &log_level);
                if (0 <= log_level && log_level < log_end) {
                } else {
                    log_bare(log_fatal, "invalid log_level\n");
                    myexit(-1);
                }
            }
        }
        if (strcmp(argv[i], "--enable-color") == 0) {
            enable_log_color = 1;
        }
        if (strcmp(argv[i], "--disable-color") == 0) {
            enable_log_color = 0;
        }
        if (strcmp(argv[i], "--log-position") == 0) {
            enable_log_position = 1;
        }
    }
    return 0;
}
void process_arg(int argc, char *argv[])  // process all options
{
    int i, j, k, opt;

    int option_index = 0;

    char options[] = "l:r:schk:ag";
    static struct option long_options[] =
        {
            /* These options set a flag. */
            {"source-ip", required_argument, 0, 1},
            {"source-port", required_argument, 0, 1},
            {"log-level", required_argument, 0, 1},
            {"key", required_argument, 0, 'k'},
            {"auth-mode", required_argument, 0, 1},
            {"cipher-mode", required_argument, 0, 1},
            {"raw-mode", required_argument, 0, 1},
            {"disable-color", no_argument, 0, 1},
            {"enable-color", no_argument, 0, 1},
            {"log-position", no_argument, 0, 1},
            {"disable-bpf", no_argument, 0, 1},
            {"disable-anti-replay", no_argument, 0, 1},
            {"auto-rule", no_argument, 0, 'a'},
            {"gen-rule", no_argument, 0, 'g'},
            {"gen-add", no_argument, 0, 1},
            {"debug", no_argument, 0, 1},
            {"retry-on-error", no_argument, 0, 1},
            {"clear", no_argument, 0, 1},
            {"simple-rule", no_argument, 0, 1},
            {"keep-rule", no_argument, 0, 1},
            {"lower-level", required_argument, 0, 1},
            {"sock-buf", required_argument, 0, 1},
            {"seq-mode", required_argument, 0, 1},
            {"conf-file", required_argument, 0, 1},
            {"force-sock-buf", no_argument, 0, 1},
            {"wait-lock", no_argument, 0, 1},
            {"random-drop", required_argument, 0, 1},
            {"fifo", required_argument, 0, 1},
            {"hb-mode", required_argument, 0, 1},
            {"hb-len", required_argument, 0, 1},
            {"mtu-warn", required_argument, 0, 1},
            {"max-rst-to-show", required_argument, 0, 1},
            {"max-rst-allowed", required_argument, 0, 1},
            {"set-ttl", required_argument, 0, 1},
            {"dev", required_argument, 0, 1},
            {"dns-resolve", no_argument, 0, 1},
            {"easy-tcp", no_argument, 0, 1},
#ifdef UDP2RAW_MP
            {"pcap-send", no_argument, 0, 1},
            {"no-pcap-mutex", no_argument, 0, 1},
#endif
            {"fix-gro", no_argument, 0, 1},
            {NULL, 0, 0, 0}};

    process_log_level(argc, argv);

    set<string> all_options;
    map<string, string> shortcut_map;

    all_options.insert("--help");
    all_options.insert("-h");
    string dummy = "";
    for (i = 0; i < (int)strlen(options); i++) {
        char val = options[i];
        if ((val >= '0' && val <= '9') || (val >= 'a' && val <= 'z') || (val >= 'A' && val <= 'Z')) {
            all_options.insert(dummy + '-' + val);
        }
    }
    for (i = 0; i < int(sizeof(long_options) / sizeof(long_options[0])); i++) {
        if (long_options[i].name == NULL) break;
        int val = long_options[i].val;
        if ((val >= '0' && val <= '9') || (val >= 'a' && val <= 'z') || (val >= 'A' && val <= 'Z')) {
            shortcut_map[dummy + "--" + long_options[i].name] = dummy + "-" + char(val);
        }
        all_options.insert(dummy + "--" + long_options[i].name);
    }

    for (i = 0; i < argc; i++) {
        int len = strlen(argv[i]);
        if (len == 0) {
            mylog(log_fatal, "found an empty string in options\n");
            myexit(-1);
        }
        if (len == 1 && argv[i][0] == '-') {
            mylog(log_fatal, "invaild option '-' in argv\n");
            myexit(-1);
        }
        if (len == 2 && argv[i][0] == '-' && argv[i][1] == '-') {
            mylog(log_fatal, "invaild option '--' in argv\n");
            myexit(-1);
        }
    }

    mylog(log_info, "argc=%d ", argc);

    for (i = 0; i < argc; i++) {
        log_bare(log_info, "%s ", argv[i]);
    }
    log_bare(log_info, "\n");

    // string dummy="";
    for (i = +1; i < argc; i++) {
        if (argv[i][0] != '-') continue;
        string a = argv[i];
        if (a[0] == '-' && a[1] != '-')
            a = dummy + a[0] + a[1];

        if (all_options.find(a.c_str()) == all_options.end()) {
            mylog(log_fatal, "invaild option %s\n", a.c_str());
            myexit(-1);
        }
        for (j = i + 1; j < argc; j++) {
            if (argv[j][0] != '-') continue;

            string b = argv[j];

            if (b[0] == '-' && b[1] != '-')
                b = dummy + b[0] + b[1];

            if (shortcut_map.find(a) != shortcut_map.end())
                a = shortcut_map[a];
            if (shortcut_map.find(b) != shortcut_map.end())
                b = shortcut_map[b];
            if (a == b) {
                mylog(log_fatal, "%s duplicates with %s\n", argv[i], argv[j]);
                myexit(-1);
            }
        }
    }

    int no_l = 1, no_r = 1;
    while ((opt = getopt_long(argc, argv, options, long_options, &option_index)) != -1) {
        // string opt_key;
        // opt_key+=opt;
        switch (opt) {
            case 'l':
                no_l = 0;
                local_addr.from_str(optarg);
                if (local_addr.get_port() == 22) {
                    mylog(log_fatal, "port 22 not allowed\n");
                    myexit(-1);
                }
                /*
                if (strchr(optarg, ':') != 0) {
                        sscanf(optarg, "%[^:]:%d", local_ip, &local_port);
                        if(local_port==22)
                        {
                                mylog(log_fatal,"port 22 not allowed\n");
                                myexit(-1);
                        }
                } else {
                        mylog(log_fatal,"invalid parameter for -l ,%s,should be ip:port\n",optarg);
                        myexit(-1);
                }*/
                break;
            case 'r':
                no_r = 0;
                remote_addr.from_str(optarg);
                if (remote_addr.get_port() == 22) {
                    mylog(log_fatal, "port 22 not allowed\n");
                    myexit(-1);
                }
                /*
                if (strchr(optarg, ':') != 0) {
                        sscanf(optarg, "%[^:]:%d", remote_address, &remote_port);
                        if(remote_port==22)
                        {
                                mylog(log_fatal,"port 22 not allowed\n");
                                myexit(-1);
                        }
                } else {
                        mylog(log_fatal,"invalid parameter for -r ,%s,should be ip:port\n",optarg);
                        myexit(-1);
                }*/
                break;
            case 's':
                if (program_mode == 0) {
                    program_mode = server_mode;
                } else {
                    mylog(log_fatal, "-s /-c has already been set,conflict\n");
                    myexit(-1);
                }
                break;
            case 'c':
                if (program_mode == 0) {
                    program_mode = client_mode;
                } else {
                    mylog(log_fatal, "-s /-c has already been set,conflict\n");
                    myexit(-1);
                }
                break;
            case 'h':
                break;
            case 'a':
                if (is_udp2raw_mp) {
                    mylog(log_fatal, "-a not supported in this version, check -g or --raw-mode easyfaketcp\n");
                    myexit(-1);
                }
                auto_add_iptables_rule = 1;
                break;
            case 'g':
                generate_iptables_rule = 1;
                break;
            case 'k':
                mylog(log_debug, "parsing key option\n");
                sscanf(optarg, "%s", key_string);
                break;
            case 1:
                mylog(log_debug, "option_index: %d\n", option_index);
                if (strcmp(long_options[option_index].name, "clear") == 0) {
                    if (is_udp2raw_mp) {
                        mylog(log_fatal, "--clear not supported in this version\n");
                        myexit(-1);
                    }

                    clear_iptables = 1;
                } else if (strcmp(long_options[option_index].name, "source-ip") == 0) {
                    mylog(log_debug, "parsing long option :source-ip\n");
                    // sscanf(optarg, "%s", source_ip);
                    source_addr.from_str_ip_only(optarg);
                    mylog(log_debug, "source: %s\n", source_addr.get_ip());
                    force_source_ip = 1;
                } else if (strcmp(long_options[option_index].name, "source-port") == 0) {
                    mylog(log_debug, "parsing long option :source-port\n");
                    sscanf(optarg, "%d", &source_port);
                    mylog(log_info, "source: %d\n", source_port);
                    force_source_port = 1;
                } else if (strcmp(long_options[option_index].name, "raw-mode") == 0) {
                    /*
                    for(i=0;i<mode_end;i++)
                    {
                            if(strcmp(optarg,raw_mode_tostring[i])==0)
                            {
                                    //printf("%d i\n",i);
                                    //printf("%s",raw_mode_tostring[i]);
                                    raw_mode=(raw_mode_t)i;
                                    break;
                            }
                    }
                    if(i==mode_end)
                    {
                            mylog(log_fatal,"no such raw_mode %s\n",optarg);
                            myexit(-1);
                    }
                     */
                    if (strcmp(optarg, "easyfaketcp") == 0 || strcmp(optarg, "easy_faketcp") == 0 || strcmp(optarg, "easy-faketcp") == 0) {
                        raw_mode = mode_faketcp;
                        use_tcp_dummy_socket = 1;
                    } else {
                        for (i = 0; i < mode_end; i++) {
                            if (strcmp(optarg, raw_mode_tostring[i]) == 0) {
                                // printf("%d i\n",i);
                                // printf("%s",raw_mode_tostring[i]);
                                raw_mode = (raw_mode_t)i;
                                break;
                            }
                        }
                        if (i == mode_end) {
                            mylog(log_fatal, "no such raw_mode %s\n", optarg);
                            myexit(-1);
                        }
                    }
                } else if (strcmp(long_options[option_index].name, "auth-mode") == 0) {
                    for (i = 0; i < auth_end; i++) {
                        if (strcmp(optarg, auth_mode_tostring[i]) == 0) {
                            auth_mode = (auth_mode_t)i;
                            if (auth_mode == auth_none) {
                                disable_anti_replay = 1;
                            }
                            break;
                        }
                    }
                    if (i == auth_end) {
                        mylog(log_fatal, "no such auth_mode %s\n", optarg);
                        myexit(-1);
                    }
                } else if (strcmp(long_options[option_index].name, "cipher-mode") == 0) {
                    string s = optarg;
                    if (s == "aes128cfb_0") {
                        s = "aes128cfb";
                        aes128cfb_old = 1;
                        mylog(log_warn, "aes128cfb_0 is used\n");
                    }
                    for (i = 0; i < cipher_end; i++) {
                        if (strcmp(s.c_str(), cipher_mode_tostring[i]) == 0) {
                            cipher_mode = (cipher_mode_t)i;
                            break;
                        }
                    }
                    if (i == cipher_end) {
                        mylog(log_fatal, "no such cipher_mode %s\n", optarg);
                        myexit(-1);
                    }
                } else if (strcmp(long_options[option_index].name, "log-level") == 0) {
                } else if (strcmp(long_options[option_index].name, "lower-level") == 0) {
                    if (is_udp2raw_mp) {
                        mylog(log_fatal, "--lower-level not supported in this version\n");
                        myexit(-1);
                    }

#ifdef UDP2RAW_LINUX
                    process_lower_level_arg();
#endif
                    // process_lower_level_arg();
                    // lower_level=1;
                    // strcpy(lower_level_arg,optarg);
                } else if (strcmp(long_options[option_index].name, "simple-rule") == 0) {
                    if (is_udp2raw_mp) {
                        mylog(log_fatal, "--simple-rule not supported in this version\n");
                        myexit(-1);
                    }
                    simple_rule = 1;
                } else if (strcmp(long_options[option_index].name, "keep-rule") == 0) {
                    if (is_udp2raw_mp) {
                        mylog(log_fatal, "--keep-rule not supported in this version\n");
                        myexit(-1);
                    }
                    keep_rule = 1;
                } else if (strcmp(long_options[option_index].name, "gen-add") == 0) {
                    if (is_udp2raw_mp) {
                        mylog(log_fatal, "--gen-add not supported in this version\n");
                        myexit(-1);
                    }
                    generate_iptables_rule_add = 1;
                } else if (strcmp(long_options[option_index].name, "disable-color") == 0) {
                    // enable_log_color=0;
                } else if (strcmp(long_options[option_index].name, "enable-color") == 0) {
                    // enable_log_color=0;
                } else if (strcmp(long_options[option_index].name, "debug") == 0) {
                    debug_flag = 1;
                    // enable_log_color=0;
                } else if (strcmp(long_options[option_index].name, "dev") == 0) {
                    sscanf(optarg, "%s", dev);
                    // enable_log_color=0;
                    mylog(log_info, "dev=[%s]\n", dev);
                } else if (strcmp(long_options[option_index].name, "debug-resend") == 0) {
                    // debug_resend=1;
                    // enable_log_color=0;
                } else if (strcmp(long_options[option_index].name, "log-position") == 0) {
                    // enable_log_position=1;
                } else if (strcmp(long_options[option_index].name, "force-sock-buf") == 0) {
                    if (is_udp2raw_mp) {
                        mylog(log_fatal, "--force-sock-buf not supported in this version\n");
                        myexit(-1);
                    }
                    force_socket_buf = 1;
                } else if (strcmp(long_options[option_index].name, "retry-on-error") == 0) {
                    retry_on_error = 1;
                } else if (strcmp(long_options[option_index].name, "wait-lock") == 0) {
                    wait_xtables_lock = 1;
                } else if (strcmp(long_options[option_index].name, "disable-bpf") == 0) {
                    disable_bpf_filter = 1;
                } else if (strcmp(long_options[option_index].name, "disable-anti-replay") == 0) {
                    disable_anti_replay = 1;
                } else if (strcmp(long_options[option_index].name, "sock-buf") == 0) {
                    int tmp = -1;
                    sscanf(optarg, "%d", &tmp);
                    if (10 <= tmp && tmp <= 10 * 1024) {
                        socket_buf_size = tmp * 1024;
                    } else {
                        mylog(log_fatal, "sock-buf value must be between 1 and 10240 (kbyte) \n");
                        myexit(-1);
                    }
                } else if (strcmp(long_options[option_index].name, "seq-mode") == 0) {
                    sscanf(optarg, "%d", &seq_mode);
                    if (0 <= seq_mode && seq_mode <= max_seq_mode) {
                    } else {
                        mylog(log_fatal, "seq_mode value must be  0,1,or 2 \n");
                        myexit(-1);
                    }
                } else if (strcmp(long_options[option_index].name, "random-drop") == 0) {
                    sscanf(optarg, "%d", &random_drop);
                    if (random_drop < 0 || random_drop > 10000) {
                        mylog(log_fatal, "random_drop must be between 0 10000 \n");
                        myexit(-1);
                    }
                    mylog(log_info, "random_drop =%d \n", random_drop);
                } else if (strcmp(long_options[option_index].name, "fifo") == 0) {
                    if (is_udp2raw_mp) {
                        mylog(log_fatal, "--fifo not supported in this version\n");
                        myexit(-1);
                    }
                    sscanf(optarg, "%s", fifo_file);

                    mylog(log_info, "fifo_file =%s \n", fifo_file);
                } else if (strcmp(long_options[option_index].name, "conf-file") == 0) {
                    mylog(log_info, "configuration loaded from %s\n", optarg);
                } else if (strcmp(long_options[option_index].name, "hb-mode") == 0) {
                    sscanf(optarg, "%d", &hb_mode);
                    assert(hb_mode == 0 || hb_mode == 1);
                    mylog(log_info, "hb_mode =%d \n", hb_mode);
                } else if (strcmp(long_options[option_index].name, "hb-len") == 0) {
                    sscanf(optarg, "%d", &hb_len);
                    assert(hb_len >= 0 && hb_len <= 1500);
                    mylog(log_info, "hb_len =%d \n", hb_len);
                } else if (strcmp(long_options[option_index].name, "mtu-warn") == 0) {
                    sscanf(optarg, "%d", &mtu_warn);
                    assert(mtu_warn > 0);
                    mylog(log_info, "mtu_warn=%d \n", mtu_warn);
                } else if (strcmp(long_options[option_index].name, "max-rst-to-show") == 0) {
                    sscanf(optarg, "%d", &max_rst_to_show);
                    assert(max_rst_to_show >= -1);
                    mylog(log_info, "max_rst_to_show=%d \n", max_rst_to_show);
                } else if (strcmp(long_options[option_index].name, "max-rst-allowed") == 0) {
                    sscanf(optarg, "%d", &max_rst_allowed);
                    assert(max_rst_allowed >= -1);
                    mylog(log_info, "max_rst_allowed=%d \n", max_rst_allowed);
                } else if (strcmp(long_options[option_index].name, "set-ttl") == 0) {
                    sscanf(optarg, "%d", &ttl_value);
                    assert(ttl_value >= 0 && ttl_value <= 255);
                    mylog(log_info, "ttl_value=%d\n", ttl_value);
                }

                else if (strcmp(long_options[option_index].name, "dns-resolve") == 0)  // currently not used
                {
                    enable_dns_resolve = 1;
                    mylog(log_info, "dns-resolve enabled\n");
                }
#ifdef UDP2RAW_MP
                else if (strcmp(long_options[option_index].name, "pcap-send") == 0) {
                    send_with_pcap = 1;
                    mylog(log_info, "--pcap-send enabled, now pcap will be used for sending packet instead of libnet\n");
                } else if (strcmp(long_options[option_index].name, "no-pcap-mutex") == 0) {
                    use_pcap_mutex = 0;
                    mylog(log_warn, "--no-pcap-mutex enabled, we will assume the underlying pcap calls are threadsafe\n");
                }
#endif
                else if (strcmp(long_options[option_index].name, "easy-tcp") == 0) {
                    use_tcp_dummy_socket = 1;
                    mylog(log_info, "--easy-tcp enabled, now a dummy tcp socket will be created for handshake and block rst\n");
                } else if (strcmp(long_options[option_index].name, "fix-gro") == 0) {
                    mylog(log_info, "--fix-gro enabled\n");
                    g_fix_gro = 1;
                } else {
                    mylog(log_warn, "ignored unknown long option ,option_index:%d code:<%x>\n", option_index, optopt);
                }
                break;
            default:
                mylog(log_fatal, "unknown option ,code:<%c>,<%x>\n", optopt, optopt);
                myexit(-1);
        }
    }

    if (no_l)
        mylog(log_fatal, "error: -l not found\n");
    if (no_r)
        mylog(log_fatal, "error: -r not found\n");
    if (program_mode == 0)
        mylog(log_fatal, "error: -c /-s  hasnt been set\n");
    if (no_l || no_r || program_mode == 0) {
        print_help();
        myexit(-1);
    }
    if (program_mode == client_mode) {
        raw_ip_version = remote_addr.get_type();
    } else {
        raw_ip_version = local_addr.get_type();
    }

    if (auto_add_iptables_rule && use_tcp_dummy_socket) {
        mylog(log_error, "-a,--auto-rule is not supposed to be used with easyfaketcp mode, you are likely making a mistake, but we can try to continue\n");
    }

    if (keep_rule && use_tcp_dummy_socket) {
        mylog(log_error, "--keep-rule is not supposed to be used with easyfaketcp mode, you are likely making a mistake, but we can try to continue\n");
    }

    mylog(log_info, "important variables: ");

    log_bare(log_info, "log_level=%d:%s ", log_level, log_text[log_level]);
    log_bare(log_info, "raw_mode=%s ", raw_mode_tostring[raw_mode]);
    log_bare(log_info, "cipher_mode=%s ", cipher_mode_tostring[cipher_mode]);
    log_bare(log_info, "auth_mode=%s ", auth_mode_tostring[auth_mode]);

    log_bare(log_info, "key=%s ", key_string);

    log_bare(log_info, "local_addr=%s ", local_addr.get_str());
    log_bare(log_info, "remote_addr=%s ", remote_addr.get_str());

    if (force_source_ip)
        log_bare(log_info, "source_addr=%s ", source_addr.get_ip());

    if (force_source_port)
        log_bare(log_info, "source_port=%d ", source_port);

    log_bare(log_info, "socket_buf_size=%d ", socket_buf_size);

    log_bare(log_info, "\n");
}

void pre_process_arg(int argc, char *argv[])  // mainly for load conf file
{
    int i, j, k;
    for (i = 0; i < argc; i++) {
        if (strcmp(argv[i], "--unit-test") == 0) {
            unit_test();
            myexit(0);
        }
    }

    for (i = 0; i < argc; i++) {
        if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
            print_help();
            myexit(0);
        }
    }

    if (argc == 1) {
        print_help();
        myexit(-1);
    }

    process_log_level(argc, argv);

    int new_argc = 0;
    vector<string> new_argv;

    int count = 0;
    int pos = -1;

    for (i = 0; i < argc; i++) {
        if (strcmp(argv[i], "--conf-file") == 0) {
            count++;
            pos = i;
            if (i == argc) {
                mylog(log_fatal, "--conf-file need a parameter\n");
                myexit(-1);
            }
            if (argv[i + 1][0] == '-') {
                mylog(log_fatal, "--conf-file need a parameter\n");
                myexit(-1);
            }
            i++;
        } else {
            // printf("<%s>",argv[i]);
            new_argc++;
            new_argv.push_back(argv[i]);
        }
    }
    if (count > 1) {
        mylog(log_fatal, "duplicated --conf-file option\n");
        myexit(-1);
    }

    if (count > 0) {
        load_config(argv[pos + 1], new_argc, new_argv);
    }
    char *new_argv_char[new_argv.size()];

    new_argc = 0;
    for (i = 0; i < (int)new_argv.size(); i++) {
        if (strcmp(new_argv[i].c_str(), "--conf-file") == 0) {
            mylog(log_fatal, "cant have --conf-file in a config file\n");
            myexit(-1);
        }
        new_argv_char[new_argc++] = (char *)new_argv[i].c_str();
    }
    process_arg(new_argc, new_argv_char);
}
#ifdef UDP2RAW_LINUX
void *run_keep(void *none)  // called in a new thread for --keep-rule option
{
    while (1) {
        sleep(iptables_rule_keep_interval);
        keep_iptables_rule();
        if (about_to_exit)  // just incase it runs forever if there is some bug,not necessary
        {
            sleep(10);
            keep_thread_running = 0;  // not thread safe ,but wont cause problem
            break;
        }
    }
    return NULL;
}
void iptables_rule()  // handles -a -g --gen-add  --keep-rule --clear --wait-lock
{
    assert(raw_ip_version == AF_INET || raw_ip_version == AF_INET6);

    if (raw_ip_version == AF_INET) {
        iptables_command0 = "iptables ";
    } else
        iptables_command0 = "ip6tables ";
    if (!wait_xtables_lock) {
        iptables_command = iptables_command0;
    } else {
        iptables_command = iptables_command0 + "-w ";
    }

    if (clear_iptables) {
        char *output;
        // int ret =system("iptables-save |grep udp2raw_dWRwMnJhdw|sed -n 's/^-A/iptables -D/p'|sh");
        int ret = run_command(iptables_command + "-S|sed -n '/udp2rawDwrW/p'|sed -n 's/^-A/" + iptables_command + "-D/p'|sh", output);

        int ret2 = run_command(iptables_command + "-S|sed -n '/udp2rawDwrW/p'|sed -n 's/^-N/" + iptables_command + "-X/p'|sh", output);
        // system("iptables-save |grep udp2raw_dWRwMnJhdw|sed 's/^-A/iptables -D/'|sh");
        // system("iptables-save|grep -v udp2raw_dWRwMnJhdw|iptables-restore");
        mylog(log_info, "tried to clear all iptables rule created previously,return value %d %d\n", ret, ret2);
        myexit(-1);
    }

    if (auto_add_iptables_rule && generate_iptables_rule) {
        mylog(log_warn, " -g overrides -a\n");
        auto_add_iptables_rule = 0;
        // myexit(-1);
    }
    if (generate_iptables_rule_add && generate_iptables_rule) {
        mylog(log_warn, " --gen-add overrides -g\n");
        generate_iptables_rule = 0;
        // myexit(-1);
    }

    if (keep_rule && auto_add_iptables_rule == 0) {
        auto_add_iptables_rule = 1;
        mylog(log_warn, " --keep_rule implys -a\n");
        generate_iptables_rule = 0;
        // myexit(-1);
    }
    char tmp_pattern[200];
    string pattern = "";

    if (program_mode == client_mode) {
        tmp_pattern[0] = 0;
        if (raw_mode == mode_faketcp) {
            sprintf(tmp_pattern, "-s %s -p tcp -m tcp --sport %d", remote_addr.get_ip(), remote_addr.get_port());
        }
        if (raw_mode == mode_udp) {
            sprintf(tmp_pattern, "-s %s -p udp -m udp --sport %d", remote_addr.get_ip(), remote_addr.get_port());
        }
        if (raw_mode == mode_icmp) {
            if (raw_ip_version == AF_INET)
                sprintf(tmp_pattern, "-s %s -p icmp --icmp-type 0", remote_addr.get_ip());
            else
                sprintf(tmp_pattern, "-s %s -p icmpv6 --icmpv6-type 129", remote_addr.get_ip());
        }
        pattern += tmp_pattern;
    }
    if (program_mode == server_mode) {
        tmp_pattern[0] = 0;
        if (raw_ip_version == AF_INET) {
            if (local_addr.inner.ipv4.sin_addr.s_addr != 0) {
                sprintf(tmp_pattern, "-d %s ", local_addr.get_ip());
            }
        } else {
            char zero_arr[16] = {0};
            if (memcmp(&local_addr.inner.ipv6.sin6_addr, zero_arr, 16) != 0) {
                sprintf(tmp_pattern, "-d %s ", local_addr.get_ip());
            }
        }
        pattern += tmp_pattern;

        tmp_pattern[0] = 0;
        if (raw_mode == mode_faketcp) {
            sprintf(tmp_pattern, "-p tcp -m tcp --dport %d", local_addr.get_port());
        }
        if (raw_mode == mode_udp) {
            sprintf(tmp_pattern, "-p udp -m udp --dport %d", local_addr.get_port());
        }
        if (raw_mode == mode_icmp) {
            if (raw_ip_version == AF_INET)
                sprintf(tmp_pattern, "-p icmp --icmp-type 8");
            else
                sprintf(tmp_pattern, "-p icmpv6 --icmpv6-type 128");
        }
        pattern += tmp_pattern;
    }
    /*
            if(!simple_rule)
            {
                    pattern += " -m comment --comment udp2rawDwrW_";

                    char const_id_str[100];
                    sprintf(const_id_str, "%x_", const_id);

                    pattern += const_id_str;

                    time_t timer;
                    char buffer[26];
                    struct tm* tm_info;

                    time(&timer);
                    tm_info = localtime(&timer);

                    strftime(buffer, 26, "%Y-%m-%d-%H:%M:%S", tm_info);

                    pattern += buffer;


            }*/

    if (generate_iptables_rule) {
        string rule = iptables_command + "-I INPUT ";
        rule += pattern;
        rule += " -j DROP";

        printf("generated iptables rule:\n");
        printf("%s\n", rule.c_str());
        myexit(0);
    }
    if (generate_iptables_rule_add) {
        iptables_gen_add(pattern.c_str(), const_id);
        myexit(0);
    }

    if (auto_add_iptables_rule) {
        iptables_rule_init(pattern.c_str(), const_id, keep_rule);
        if (keep_rule) {
            if (pthread_create(&keep_thread, NULL, run_keep, 0)) {
                mylog(log_fatal, "Error creating thread\n");
                myexit(-1);
            }
            keep_thread_running = 1;
        }
    } else {
        mylog(log_warn, " -a has not been set, make sure you have added the needed iptables rules manually\n");
    }
}
#endif

int unit_test() {
    printf("running unit test\n");
    vector<string> conf_lines = {"---aaa", "--aaa bbb", "-a bbb", " \t \t \t-a\t \t \t bbbbb\t \t \t "};
    for (int i = 0; i < int(conf_lines.size()); i++) {
        printf("orign:%s\n", conf_lines[i].c_str());
        auto res = parse_conf_line(conf_lines[i]);
        printf("pasrse_result: size %d", int(res.size()));
        for (int j = 0; j < int(res.size()); j++) {
            printf("<%s>", res[j].c_str());
        }
        printf("\n");
    }

    char s1[] = {1, 2, 3, 4, 5};

    char s2[] = {1};

    short c1 = csum((unsigned short *)s1, 5);
    short c2 = csum((unsigned short *)s2, 1);
    // c2=0;

    printf("%x %x\n", (int)c1, (int)c2);

    const char buf[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 2, 13, 14, 15, 16};
    char key[100] = {0};
    char buf2[100] = {0};
    char buf3[100] = {0};
    char buf4[100] = {0};
    int len = 16;
    for (int i = 0; i < len; i++) {
        printf("<%d>", buf[i]);
    }
    printf("\n");
    cipher_encrypt(buf, buf2, len, key);
    for (int i = 0; i < len; i++) {
        printf("<%d>", buf2[i]);
    }
    printf("\n");
    int temp_len = len;
    cipher_decrypt(buf2, buf3, len, key);
    for (int i = 0; i < len; i++) {
        printf("<%d>", buf3[i]);
    }
    printf("\n");
    cipher_encrypt(buf2, buf4, temp_len, key);
    for (int i = 0; i < temp_len; i++) {
        printf("<%d>", buf4[i]);
    }
    return 0;
}

#ifdef UDP2RAW_LINUX
int set_timer(int epollfd, int &timer_fd)  // put a timer_fd into epoll,general function,used both in client and server
{
    int ret;
    epoll_event ev;

    itimerspec its;
    memset(&its, 0, sizeof(its));

    if ((timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK)) < 0) {
        mylog(log_fatal, "timer_fd create error\n");
        myexit(1);
    }
    its.it_interval.tv_sec = (timer_interval / 1000);
    its.it_interval.tv_nsec = (timer_interval % 1000) * 1000ll * 1000ll;
    its.it_value.tv_nsec = 1;  // imidiately
    timerfd_settime(timer_fd, 0, &its, 0);

    ev.events = EPOLLIN;
    ev.data.u64 = timer_fd;

    ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, timer_fd, &ev);
    if (ret < 0) {
        mylog(log_fatal, "epoll_ctl return %d\n", ret);
        myexit(-1);
    }
    return 0;
}

int set_timer_server(int epollfd, int &timer_fd, fd64_t &fd64)  // only for server
{
    int ret;
    epoll_event ev;

    itimerspec its;
    memset(&its, 0, sizeof(its));

    if ((timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK)) < 0) {
        mylog(log_fatal, "timer_fd create error\n");
        myexit(1);
    }
    its.it_interval.tv_sec = (timer_interval / 1000);
    its.it_interval.tv_nsec = (timer_interval % 1000) * 1000ll * 1000ll;
    its.it_value.tv_nsec = 1;  // imidiately
    timerfd_settime(timer_fd, 0, &its, 0);

    fd64 = fd_manager.create(timer_fd);

    ev.events = EPOLLIN;
    ev.data.u64 = fd64;  ////difference

    ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, timer_fd, &ev);
    if (ret < 0) {
        mylog(log_fatal, "epoll_ctl return %d\n", ret);
        myexit(-1);
    }
    return 0;
}

int handle_lower_level(raw_info_t &raw_info)  // fill lower_level info,when --lower-level is enabled,only for server
{
    packet_info_t &send_info = raw_info.send_info;
    packet_info_t &recv_info = raw_info.recv_info;

    if (lower_level_manual) {
        memset(&send_info.addr_ll, 0, sizeof(send_info.addr_ll));
        send_info.addr_ll.sll_family = AF_PACKET;
        send_info.addr_ll.sll_ifindex = ifindex;
        send_info.addr_ll.sll_halen = ETHER_ADDR_LEN;
        send_info.addr_ll.sll_protocol = htons(ETH_P_IP);
        memcpy(&send_info.addr_ll.sll_addr, dest_hw_addr, ETHER_ADDR_LEN);
        mylog(log_debug, "[manual]lower level info %x %x\n ", send_info.addr_ll.sll_halen, send_info.addr_ll.sll_protocol);
    } else {
        memset(&send_info.addr_ll, 0, sizeof(send_info.addr_ll));
        send_info.addr_ll.sll_family = recv_info.addr_ll.sll_family;
        send_info.addr_ll.sll_ifindex = recv_info.addr_ll.sll_ifindex;
        send_info.addr_ll.sll_protocol = recv_info.addr_ll.sll_protocol;
        send_info.addr_ll.sll_halen = recv_info.addr_ll.sll_halen;
        memcpy(send_info.addr_ll.sll_addr, recv_info.addr_ll.sll_addr, sizeof(send_info.addr_ll.sll_addr));
        // other bytes should be kept zero.

        mylog(log_debug, "[auto]lower level info %x %x\n ", send_info.addr_ll.sll_halen, send_info.addr_ll.sll_protocol);
    }
    return 0;
}

string chain[2];
string rule_keep[2];
string rule_keep_add[2];
string rule_keep_del[2];
u64_t keep_rule_last_time = 0;

pthread_t keep_thread;
int keep_thread_running = 0;
int iptables_gen_add(const char *s, u32_t const_id) {
    string dummy = "";
    iptables_pattern = s;
    chain[0] = dummy + "udp2rawDwrW_C";
    rule_keep[0] = dummy + iptables_pattern + " -j " + chain[0];
    rule_keep_add[0] = iptables_command + "-I INPUT " + rule_keep[0];

    char *output;
    run_command(iptables_command + "-N " + chain[0], output, show_none);
    run_command(iptables_command + "-F " + chain[0], output);
    run_command(iptables_command + "-I " + chain[0] + " -j DROP", output);

    rule_keep_del[0] = iptables_command + "-D INPUT " + rule_keep[0];

    run_command(rule_keep_del[0], output, show_none);
    run_command(rule_keep_del[0], output, show_none);

    if (run_command(rule_keep_add[0], output) != 0) {
        mylog(log_fatal, "auto added iptables failed by: %s\n", rule_keep_add[0].c_str());
        myexit(-1);
    }
    return 0;
}
int iptables_rule_init(const char *s, u32_t const_id, int keep) {
    iptables_pattern = s;
    iptables_rule_added = 1;
    iptables_rule_keeped = keep;

    string dummy = "";
    char const_id_str[100];
    sprintf(const_id_str, "%x", const_id);

    chain[0] = dummy + "udp2rawDwrW_" + const_id_str + "_C0";
    chain[1] = dummy + "udp2rawDwrW_" + const_id_str + "_C1";

    rule_keep[0] = dummy + iptables_pattern + " -j " + chain[0];
    rule_keep[1] = dummy + iptables_pattern + " -j " + chain[1];

    rule_keep_add[0] = iptables_command + "-I INPUT " + rule_keep[0];
    rule_keep_add[1] = iptables_command + "-I INPUT " + rule_keep[1];

    rule_keep_del[0] = iptables_command + "-D INPUT " + rule_keep[0];
    rule_keep_del[1] = iptables_command + "-D INPUT " + rule_keep[1];

    keep_rule_last_time = get_current_time();

    char *output;

    for (int i = 0; i <= iptables_rule_keeped; i++) {
        run_command(iptables_command + "-N " + chain[i], output);
        run_command(iptables_command + "-F " + chain[i], output);
        run_command(iptables_command + "-I " + chain[i] + " -j DROP", output);

        if (run_command(rule_keep_add[i], output) != 0) {
            mylog(log_fatal, "auto added iptables failed by: %s\n", rule_keep_add[i].c_str());
            myexit(-1);
        }
    }
    mylog(log_warn, "auto added iptables rules\n");
    return 0;
}

int keep_iptables_rule()  // magic to work on a machine without grep/iptables --check/-m commment
{
    /*
    if(iptables_rule_keeped==0) return  0;


    uint64_t tmp_current_time=get_current_time();
    if(tmp_current_time-keep_rule_last_time<=iptables_rule_keep_interval)
    {
            return 0;
    }
    else
    {
            keep_rule_last_time=tmp_current_time;
    }*/

    mylog(log_debug, "keep_iptables_rule begin %llu\n", get_current_time());
    iptables_rule_keep_index += 1;
    iptables_rule_keep_index %= 2;

    string dummy = "";
    char *output;

    int i = iptables_rule_keep_index;

    run_command(iptables_command + "-N " + chain[i], output, show_none);

    if (run_command(iptables_command + "-F " + chain[i], output, show_none) != 0)
        mylog(log_warn, "iptables -F failed %d\n", i);

    if (run_command(iptables_command + "-I " + chain[i] + " -j DROP", output, show_none) != 0)
        mylog(log_warn, "iptables -I failed %d\n", i);

    if (run_command(rule_keep_del[i], output, show_none) != 0)
        mylog(log_warn, "rule_keep_del failed %d\n", i);

    run_command(rule_keep_del[i], output, show_none);  // do it twice,incase it fails for unknown random reason

    if (run_command(rule_keep_add[i], output, show_log) != 0)
        mylog(log_warn, "rule_keep_del failed %d\n", i);

    mylog(log_debug, "keep_iptables_rule end %llu\n", get_current_time());
    return 0;
}

int clear_iptables_rule() {
    char *output;
    string dummy = "";
    if (!iptables_rule_added) return 0;

    for (int i = 0; i <= iptables_rule_keeped; i++) {
        run_command(rule_keep_del[i], output);
        run_command(iptables_command + "-F " + chain[i], output);
        run_command(iptables_command + "-X " + chain[i], output);
    }
    return 0;
}
#endif

#ifdef UDP2RAW_MP
void iptables_rule()  // handles -a -g --gen-add  --keep-rule --clear --wait-lock
{
    if (generate_iptables_rule) {
        if (raw_mode == mode_faketcp && use_tcp_dummy_socket == 1) {
            mylog(log_fatal, "failed,-g doesnt work with easy-faketcp mode\n");
            myexit(-1);
        }
        if (raw_mode == mode_udp) {
            mylog(log_warn, "It not necessary to use iptables/firewall rule in udp mode\n");
        }
        log_bare(log_warn, "for linux, use:\n");
        if (raw_ip_version == AF_INET) {
            if (raw_mode == mode_faketcp)
                printf("iptables -I INPUT -s %s -p tcp -m tcp --sport %d -j DROP\n", remote_addr.get_ip(), remote_addr.get_port());
            if (raw_mode == mode_udp)
                printf("iptables -I INPUT -s %s -p udp -m udp --sport %d -j DROP\n", remote_addr.get_ip(), remote_addr.get_port());
            if (raw_mode == mode_icmp)
                printf("iptables -I INPUT -s %s -p icmp --icmp-type 0 -j DROP\n", remote_addr.get_ip());
            printf("\n");
        } else {
            assert(raw_ip_version == AF_INET6);
            if (raw_mode == mode_faketcp)
                printf("ip6tables -I INPUT -s %s -p tcp -m tcp --sport %d -j DROP\n", remote_addr.get_ip(), remote_addr.get_port());
            if (raw_mode == mode_udp)
                printf("ip6tables -I INPUT -s %s -p udp -m udp --sport %d -j DROP\n", remote_addr.get_ip(), remote_addr.get_port());
            if (raw_mode == mode_icmp)
                printf("ip6tables -I INPUT -s %s -p -p icmpv6 --icmpv6-type 129 -j DROP\n", remote_addr.get_ip());
            printf("\n");
        }

        log_bare(log_warn, "for mac/bsd use:\n");
        if (raw_ip_version == AF_INET) {
            if (raw_mode == mode_faketcp)
                printf("echo 'block drop inet proto tcp from %s port %d to any' > ./1.conf\n", remote_addr.get_ip(), remote_addr.get_port());
            if (raw_mode == mode_udp)
                printf("echo 'block drop inet proto udp from %s port %d to any' > ./1.conf\n", remote_addr.get_ip(), remote_addr.get_port());
            if (raw_mode == mode_icmp)
                printf("echo 'block drop inet proto icmp from %s to any' > ./1.conf\n", remote_addr.get_ip());
        } else {
            assert(raw_ip_version == AF_INET6);
            if (raw_mode == mode_faketcp)
                printf("echo 'block drop inet6 proto tcp from %s port %d to any' > ./1.conf\n", remote_addr.get_ip(), remote_addr.get_port());
            if (raw_mode == mode_udp)
                printf("echo 'block drop inet6 proto udp from %s port %d to any' > ./1.conf\n", remote_addr.get_ip(), remote_addr.get_port());
            if (raw_mode == mode_icmp)
                printf("echo 'block drop inet6 proto icmp6 from %s to any' > ./1.conf\n", remote_addr.get_ip());
        }
        printf("pfctl -f ./1.conf\n");
        printf("pfctl -e\n");
        printf("\n");

        log_bare(log_warn, "for windows vista and above use:\n");
        if (raw_ip_version == AF_INET) {
            if (raw_mode == mode_faketcp) {
                printf("netsh advfirewall firewall add rule name=udp2raw protocol=TCP dir=in remoteip=%s remoteport=%d action=block\n", remote_addr.get_ip(), remote_addr.get_port());
                printf("netsh advfirewall firewall add rule name=udp2raw protocol=TCP dir=out remoteip=%s remoteport=%d action=block\n", remote_addr.get_ip(), remote_addr.get_port());
            }
            if (raw_mode == mode_udp) {
                printf("netsh advfirewall firewall add rule name=udp2raw protocol=UDP dir=in remoteip=%s remoteport=%d action=block\n", remote_addr.get_ip(), remote_addr.get_port());
                printf("netsh advfirewall firewall add rule name=udp2raw protocol=UDP dir=out remoteip=%s remoteport=%d action=block\n", remote_addr.get_ip(), remote_addr.get_port());
            }

            if (raw_mode == mode_icmp) {
                printf("netsh advfirewall firewall add rule name=udp2raw protocol=ICMPV4 dir=in remoteip=%s action=block\n", remote_addr.get_ip());
                printf("netsh advfirewall firewall add rule name=udp2raw protocol=ICMPV4 dir=out remoteip=%s action=block\n", remote_addr.get_ip());
            }
        } else {
            assert(raw_ip_version == AF_INET6);
            if (raw_mode == mode_faketcp) {
                printf("netsh advfirewall firewall add rule name=udp2raw protocol=TCP dir=in remoteip=%s remoteport=%d action=block\n", remote_addr.get_ip(), remote_addr.get_port());
                printf("netsh advfirewall firewall add rule name=udp2raw protocol=TCP dir=out remoteip=%s remoteport=%d action=block\n", remote_addr.get_ip(), remote_addr.get_port());
            }
            if (raw_mode == mode_udp) {
                printf("netsh advfirewall firewall add rule name=udp2raw protocol=UDP dir=in remoteip=%s remoteport=%d action=block\n", remote_addr.get_ip(), remote_addr.get_port());
                printf("netsh advfirewall firewall add rule name=udp2raw protocol=UDP dir=out remoteip=%s remoteport=%d action=block\n", remote_addr.get_ip(), remote_addr.get_port());
            }

            if (raw_mode == mode_icmp) {
                printf("netsh advfirewall firewall add rule name=udp2raw protocol=ICMPV6 dir=in remoteip=%s action=block\n", remote_addr.get_ip());
                printf("netsh advfirewall firewall add rule name=udp2raw protocol=ICMPV6 dir=out remoteip=%s action=block\n", remote_addr.get_ip());
            }
        }

        myexit(0);
    }
}
#endif

void signal_handler(int sig) {
    about_to_exit = 1;
    // myexit(0);
}


================================================
FILE: misc.h
================================================
/*
 * misc.h
 *
 *  Created on: Sep 23, 2017
 *      Author: root
 */

#ifndef MISC_H_
#define MISC_H_

#include "common.h"
#include "log.h"
#include "network.h"

extern int hb_mode;
extern int hb_len;
extern char hb_buf[buf_len];

extern int mtu_warn;

extern int max_rst_allowed;
extern int max_rst_to_show;

extern int enable_dns_resolve;

extern int ttl_value;

const u32_t max_handshake_conn_num = 10000;
const u32_t max_ready_conn_num = 1000;
const u32_t anti_replay_window_size = 4000;
const int max_conv_num = 10000;

const u32_t client_handshake_timeout = 5000;  // unit ms
const u32_t client_retry_interval = 1000;     // ms

const u32_t server_handshake_timeout = client_handshake_timeout + 5000;  // this should be longer than clients. client retry initially ,server retry passtively

const int conv_clear_ratio = 30;  // conv grabage collecter check 1/30 of all conv one time
const int conn_clear_ratio = 50;
const int conv_clear_min = 1;
const int conn_clear_min = 1;

const u32_t conv_clear_interval = 1000;  // ms
const u32_t conn_clear_interval = 1000;  // ms

const i32_t max_fail_time = 0;  // disable

const u32_t heartbeat_interval = 600;  // ms

const u32_t timer_interval = 400;  // ms. this should be smaller than heartbeat_interval and retry interval;

const uint32_t conv_timeout = 180000;  // ms. 120 second
// const u32_t conv_timeout=30000; //for test

const u32_t client_conn_timeout = 10000;                              // ms.
const u32_t client_conn_uplink_timeout = client_conn_timeout + 2000;  // ms

const uint32_t server_conn_timeout = conv_timeout + 60000;  // ms. this should be 60s+ longer than conv_timeout,so that conv_manager can destruct convs gradually,to avoid latency glicth
// const u32_t server_conn_timeout=conv_timeout+10000;//for test

const u32_t iptables_rule_keep_interval = 20;  // unit: second;

enum server_current_state_t { server_idle = 0,
                              server_handshake1,
                              server_ready };  // server state machine
enum client_current_state_t { client_idle = 0,
                              client_tcp_handshake,
                              client_handshake1,
                              client_handshake2,
                              client_ready,
                              client_tcp_handshake_dummy };  // client state machine

enum raw_mode_t { mode_faketcp = 0,
                  mode_udp,
                  mode_icmp,
                  mode_end };
enum program_mode_t { unset_mode = 0,
                      client_mode,
                      server_mode };

union current_state_t {
    server_current_state_t server_current_state;
    client_current_state_t client_current_state;
};

// extern char remote_address[max_address_len];
// extern char local_ip[100], remote_ip[100],source_ip[100];//local_ip is for -l option,remote_ip for -r option,source for --source-ip
// extern u32_t local_ip_uint32,remote_ip_uint32,source_ip_uint32;//convert from last line.
// extern int local_port , remote_port,source_port;//similiar to local_ip  remote_ip,buf for port.source_port=0 indicates --source-port is not enabled

extern address_t local_addr, remote_addr, source_addr;

extern my_ip_t bind_addr;

extern int bind_addr_used;
extern int force_source_ip;  // if --source-ip is enabled
extern int force_source_port;
extern int source_port;

extern my_id_t const_id;  // an id used for connection recovery,its generated randomly,it never change since its generated

extern int udp_fd;                 // for client only. client use this fd to listen and handle udp connection
extern int bind_fd;                // bind only,never send or recv.  its just a dummy fd for bind,so that other program wont occupy the same port
extern int epollfd;                // fd for epoll
extern int timer_fd;               // the general timer fd for client and server.for server this is not the only timer find,every connection has a timer fd.
extern int fail_time_counter;      // determine if the max_fail_time is reached
extern int epoll_trigger_counter;  // for debug only
extern int debug_flag;             // for debug only

extern int simple_rule;                 // deprecated.
extern int keep_rule;                   // whether to monitor the iptables rule periodly,re-add if losted
extern int auto_add_iptables_rule;      // if -a is set
extern int generate_iptables_rule;      // if -g is set
extern int generate_iptables_rule_add;  // if --gen-add is set
extern int retry_on_error;
const int retry_on_error_interval = 10;

extern int debug_resend;  // debug only

extern char key_string[1000];  // -k option
extern char fifo_file[1000];

extern raw_mode_t raw_mode;
extern u32_t raw_ip_version;

extern program_mode_t program_mode;
extern unordered_map<int, const char *> raw_mode_tostring;

extern int about_to_exit;

extern int socket_buf_size;

extern pthread_t keep_thread;
extern int keep_thread_running;

int process_lower_level_arg();
void print_help();
void iptables_rule();
void pre_process_arg(int argc, char *argv[]);  // mainly for load conf file;
int unit_test();
int set_timer(int epollfd, int &timer_fd);
int set_timer_server(int epollfd, int &timer_fd, fd64_t &fd64);
int handle_lower_level(raw_info_t &raw_info);

int add_iptables_rule(const char *);

int clear_iptables_rule();

int iptables_gen_add(const char *s, u32_t const_id);
int iptables_rule_init(const char *s, u32_t const_id, int keep);
int keep_iptables_rule();

void signal_handler(int sig);

#endif /* MISC_H_ */


================================================
FILE: my_ev.cpp
================================================
#pragma GCC diagnostic push

#pragma GCC diagnostic ignored "-Wextra"
#pragma GCC diagnostic ignored "-Wsign-compare"
#pragma GCC diagnostic ignored "-Wcomment"
#pragma GCC diagnostic ignored "-Wparentheses"
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#pragma GCC diagnostic ignored "-Wunused-value"

#pragma GCC diagnostic ignored "-Wall"
#pragma GCC diagnostic ignored "-W"

#include "my_ev_common.h"
#include "ev.c"

#pragma GCC diagnostic pop


================================================
FILE: my_ev.h
================================================
#pragma once

#include "my_ev_common.h"
#include "ev.h"


================================================
FILE: my_ev_common.h
================================================

#define EV_STANDALONE 1
#define EV_COMMON \
    void *data;   \
    unsigned long long u64;
#define EV_COMPAT3 0

//#include <wepoll.h>
#if defined(__MINGW32__)
//#define EV_USE_SELECT 1
//#define EV_SELECT_IS_WINSOCKET 1

#define EV_FD_TO_WIN32_HANDLE(fd) (fd)
#define EV_WIN32_HANDLE_TO_FD(handle) (handle)
#define EV_WIN32_CLOSE_FD(fd) closesocket(fd)
#define FD_SETSIZE 4096

#endif
//#define EV_VERIFY 2


================================================
FILE: network.cpp
================================================
/*
 * network.cpp
 *
 *  Created on: Jul 29, 2017
 *      Author: wangyu
 */
#include "common.h"
#include "network.h"
#include "log.h"
#include "misc.h"

int g_fix_gro = 0;

int raw_recv_fd = -1;
int raw_send_fd = -1;
u32_t link_level_header_len = 0;  // set it to 14 if SOCK_RAW is used in socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP));
int use_tcp_dummy_socket = 0;

int seq_mode = 3;
int max_seq_mode = 4;
int random_drop = 0;

int filter_port = -1;

int disable_bpf_filter = 0;  // for test only,most time no need to disable this

// u32_t bind_address_uint32=0;

int lower_level = 0;
int lower_level_manual = 0;
int ifindex = -1;
char if_name[100] = "";

char dev[100] = "";

unsigned short g_ip_id_counter = 0;
#ifdef UDP2RAW_LINUX
unsigned char dest_hw_addr[sizeof(sockaddr_ll::sll_addr)] =
    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0};
#endif

//{0x00,0x23,0x45,0x67,0x89,0xb9};

const u32_t receive_window_lower_bound = 40960;
const u32_t receive_window_random_range = 512;
const unsigned char wscale = 0x05;

char g_packet_buf[huge_buf_len];  // looks dirty but works well
int g_packet_buf_len = -1;
int g_packet_buf_cnt = 0;

#ifdef UDP2RAW_LINUX
union {
    sockaddr_ll ll;
    sockaddr_in ipv4;
    sockaddr_in6 ipv6;
} g_sockaddr;
socklen_t g_sockaddr_len = -1;
#endif

#ifdef UDP2RAW_MP

#ifndef NO_LIBNET
libnet_t *libnet_handle;
libnet_ptag_t g_ptag = 0;
int send_with_pcap = 0;
#else
int send_with_pcap = 1;
#endif

int pcap_header_captured = 0;
int pcap_header_buf[buf_len];
int pcap_captured_full_len = -1;

pcap_t *pcap_handle;
int pcap_link_header_len = -1;
// int pcap_cnt=0;
queue_t my_queue;

pthread_mutex_t queue_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t pcap_mutex = PTHREAD_MUTEX_INITIALIZER;
int use_pcap_mutex = 1;

ev_async async_watcher;

struct ev_loop *g_default_loop;

pthread_t pcap_recv_thread;

struct bpf_program g_filter;
long long g_filter_compile_cnt = 0;

#endif

#ifdef UDP2RAW_LINUX

struct sock_filter code_tcp_old[] = {
    {0x28, 0, 0, 0x0000000c},   // 0
    {0x15, 0, 10, 0x00000800},  // 1
    {0x30, 0, 0, 0x00000017},   // 2
    {0x15, 0, 8, 0x00000006},   // 3
    {0x28, 0, 0, 0x00000014},   // 4
    {0x45, 6, 0, 0x00001fff},   // 5
    {0xb1, 0, 0, 0x0000000e},   // 6
    {0x48, 0, 0, 0x0000000e},   // 7
    {0x15, 2, 0, 0x0000ef32},   // 8
    {0x48, 0, 0, 0x00000010},   // 9
    {0x15, 0, 1, 0x0000ef32},   // 10
    {0x6, 0, 0, 0x0000ffff},    // 11
    {0x6, 0, 0, 0x00000000},    // 12
};
struct sock_filter code_tcp[] = {
    //{ 0x5, 0, 0, 0x00000001 },//0    //jump to 2,dirty hack from tcpdump -d's output
    //{ 0x5, 0, 0, 0x00000000 },//1
    {0x30, 0, 0, 0x00000009},  // 2
    {0x15, 0, 6, 0x00000006},  // 3
    {0x28, 0, 0, 0x00000006},  // 4
    {0x45, 4, 0, 0x00001fff},  // 5
    {0xb1, 0, 0, 0x00000000},  // 6
    {0x48, 0, 0, 0x00000002},  // 7
    {0x15, 0, 1, 0x0000fffe},  // 8   //modify this fffe to the port you listen on
    {0x6, 0, 0, 0x0000ffff},   // 9
    {0x6, 0, 0, 0x00000000},   // 10
};
/*
{ 0x28, 0, 0, 0x0000000c },
{ 0x15, 0, 8, 0x00000800 },
{ 0x30, 0, 0, 0x00000017 },
{ 0x15, 0, 6, 0x00000006 },
{ 0x28, 0, 0, 0x00000014 },
{ 0x45, 4, 0, 0x00001fff },
{ 0xb1, 0, 0, 0x0000000e },
{ 0x48, 0, 0, 0x00000010 },
{ 0x15, 0, 1, 0x0000fffe },
{ 0x6, 0, 0, 0x0000ffff },
{ 0x6, 0, 0, 0x00000000 },
*/

int code_tcp_port_index = 6;

// tcpdump -i ens33 ip6 and tcp and dst port 65534 -dd
struct sock_filter code_tcp6[] = {
    //{ 0x28, 0, 0, 0x0000000c },//0
    //{ 0x15, 0, 5, 0x000086dd },//1
    {0x30, 0, 0, 0x00000006},  // 2
    {0x15, 0, 3, 0x00000006},  // 3
    {0x28, 0, 0, 0x0000002a},  // 4
    {0x15, 0, 1, 0x0000fffe},  // 5
    {0x6, 0, 0, 0x00040000},   // 6
    {0x6, 0, 0, 0x00000000},   // 7
};                             // note: this filter doesnt support extension headers
/*
 { 0x30, 0, 0, 0x00000014 },//2
{ 0x15, 0, 3, 0x00000006 },//3
{ 0x28, 0, 0, 0x00000038 },//4
{ 0x15, 0, 1, 0x0000fffe },//5
{ 0x6, 0, 0, 0x00040000 },//6
{ 0x6, 0, 0, 0x00000000 },//7
*/

int code_tcp6_port_index = 3;

struct sock_filter code_udp[] = {
    //{ 0x5, 0, 0, 0x00000001 },
    //{ 0x5, 0, 0, 0x00000000 },
    {0x30, 0, 0, 0x00000009},
    {0x15, 0, 6, 0x00000011},
    {0x28, 0, 0, 0x00000006},
    {0x45, 4, 0, 0x00001fff},
    {0xb1, 0, 0, 0x00000000},
    {0x48, 0, 0, 0x00000002},
    {0x15, 0, 1, 0x0000fffe},  // modify this fffe to the port you listen on
    {0x6, 0, 0, 0x0000ffff},
    {0x6, 0, 0, 0x00000000},
};
int code_udp_port_index = 6;

struct sock_filter code_udp6[] = {
    //		{ 0x28, 0, 0, 0x0000000c },
    //		{ 0x15, 0, 5, 0x000086dd },
    {0x30, 0, 0, 0x00000006},
    {0x15, 0, 3, 0x00000011},
    {0x28, 0, 0, 0x0000002a},
    {0x15, 0, 1, 0x0000fffe},
    {0x6, 0, 0, 0x00040000},
    {0x6, 0, 0, 0x00000000},

};
int code_udp6_port_index = 3;

struct sock_filter code_icmp[] = {
    //{ 0x5, 0, 0, 0x00000001 },
    //{ 0x5, 0, 0, 0x00000000 },
    {0x30, 0, 0, 0x00000009},
    {0x15, 0, 1, 0x00000001},
    {0x6, 0, 0, 0x0000ffff},
    {0x6, 0, 0, 0x00000000},
};

struct sock_filter code_icmp6[] = {
    //		{ 0x28, 0, 0, 0x0000000c },
    //		{ 0x15, 0, 6, 0x000086dd },
    {0x30, 0, 0, 0x00000006},
    {0x15, 3, 0, 0x0000003a},
    {0x15, 0, 3, 0x0000002c},
    {0x30, 0, 0, 0x00000028},
    {0x15, 0, 1, 0x0000003a},
    {0x6, 0, 0, 0x00040000},
    {0x6, 0, 0, 0x00000000},

};
/*

tcpdump -i eth1  ip and icmp -d
(000) ldh      [12]
(001) jeq      #0x800           jt 2    jf 5
(002) ldb      [23]
(003) jeq      #0x1             jt 4    jf 5
(004) ret      #65535
(005) ret      #0

tcpdump -i eth1  ip and icmp -dd
{ 0x28, 0, 0, 0x0000000c },
{ 0x15, 0, 3, 0x00000800 },
{ 0x30, 0, 0, 0x00000017 },
{ 0x15, 0, 1, 0x00000001 },
{ 0x6, 0, 0, 0x0000ffff },
{ 0x6, 0, 0, 0x00000000 },


 */
/*
  tcpdump -i eth1 ip and tcp and dst port 65534 -dd

{ 0x28, 0, 0, 0x0000000c },
{ 0x15, 0, 8, 0x00000800 },
{ 0x30, 0, 0, 0x00000017 },
{ 0x15, 0, 6, 0x00000006 },
{ 0x28, 0, 0, 0x00000014 },
{ 0x45, 4, 0, 0x00001fff },
{ 0xb1, 0, 0, 0x0000000e },
{ 0x48, 0, 0, 0x00000010 },
{ 0x15, 0, 1, 0x0000fffe },
{ 0x6, 0, 0, 0x0000ffff },
{ 0x6, 0, 0, 0x00000000 },

 (000) ldh      [12]
(001) jeq      #0x800           jt 2    jf 10
(002) ldb      [23]
(003) jeq      #0x6             jt 4    jf 10
(004) ldh      [20]
(005) jset     #0x1fff          jt 10   jf 6
(006) ldxb     4*([14]&0xf)
(007) ldh      [x + 16]
(008) jeq      #0xfffe          jt 9    jf 10
(009) ret      #65535
(010) ret      #0

 */
#endif

packet_info_t::packet_info_t() {
    src_port = 0;
    dst_port = 0;
    if (raw_mode == mode_faketcp) {
        protocol = IPPROTO_TCP;
        ack_seq = get_true_random_number();
        seq = get_true_random_number();
        has_ts = 0;
        ts_ack = 0;
        syn = 0;
        ack = 1;
        ack_seq_counter = 0;

        // mylog(log_info,"<cons ,ts_ack= %u>\n",ts_ack);
    } else if (raw_mode == mode_udp) {
        protocol = IPPROTO_UDP;
    } else if (raw_mode == mode_icmp) {
        if (raw_ip_version == AF_INET) {
            protocol = IPPROTO_ICMP;
        } else {
            assert(raw_ip_version == AF_INET6);
            protocol = IPPROTO_ICMPV6;
        }
        my_icmp_seq = 0;
    }
}
#ifdef UDP2RAW_MP
void my_packet_handler(
    u_char *args,
    const struct pcap_pkthdr *packet_header,
    const u_char *pkt_data) {
    /*printf("<%d %d>\n",(int)packet_header->caplen,(int)packet_header->len );
    for(int i=0;i<sizeof(pcap_pkthdr);i++)
    {
            char *p=(char *) packet_header;
            printf("<%x>",int( p[i] ));
    }
    printf("\n");*/
    // mylog(log_debug,"received a packet!\n");
    assert(packet_header->caplen <= packet_header->len);
    assert(packet_header->caplen <= huge_data_len);
    // if(packet_header->caplen > max_data_len) return ;
    if (g_fix_gro == 0 && packet_header->caplen < packet_header->len) return;

    if ((int)packet_header->caplen < pcap_link_header_len) return;
    // mylog(log_debug,"and its vaild!\n");

    pthread_mutex_lock(&queue_mutex);
    if (!my_queue.full())
        my_queue.push_back((char *)pkt_data, (int)(packet_header->caplen));
    pthread_mutex_unlock(&queue_mutex);

    // pcap_cnt++;

    ev_async_send(g_default_loop, &async_watcher);
    return;
}

void *pcap_recv_thread_entry(void *none) {
    struct pcap_pkthdr *packet_header;
    const u_char *pkt_data;

    while (1) {
        if (use_pcap_mutex) pthread_mutex_lock(&pcap_mutex);
        int ret = pcap_loop(pcap_handle, -1, my_packet_handler, NULL);  // use -1 instead of 0 as cnt, since 0 is undefined in old versions
        if (use_pcap_mutex) pthread_mutex_unlock(&pcap_mutex);
        if (ret == -1)
            mylog(log_warn, "pcap_loop exited with value %d\n", ret);
        else {
            mylog(log_debug, "pcap_loop exited with value %d\n", ret);
        }
        ev_sleep(1.0);
        // myexit(-1);
    }
    /*
    while(1)
    {
            //printf("!!!\n");
            pthread_mutex_lock(&pcap_mutex);
            int ret=pcap_next_ex(pcap_handle,&packet_header,&pkt_data);
            pthread_mutex_unlock(&pcap_mutex);

            switch (ret)
            {
                    case 0:
                            continue;
                    case 1:

                            break;

                    case -1:
                            mylog(log_fatal,"pcap_next_ex error [%s]\n",pcap_geterr(pcap_handle));
                            myexit(-1);
                            break;
                    case -2:
                            assert(0==1);//
                            break;
                    default:
                            assert(0==1);//
            }
    }
    myexit(-1);*/
    return 0;
}

extern void async_cb(struct ev_loop *loop, struct ev_async *watcher, int revents);
#endif

#ifdef UDP2RAW_LINUX
int init_raw_socket() {
    assert(raw_ip_version == AF_INET || raw_ip_version == AF_INET6);

    g_ip_id_counter = get_true_random_number() % 65535;
    if (lower_level == 0) {
        raw_send_fd = socket(raw_ip_version, SOCK_RAW, IPPROTO_RAW);  // IPPROTO_TCP??

        if (raw_send_fd == -1) {
            mylog(log_fatal, "Failed to create raw_send_fd\n");
            // perror("Failed to create raw_send_fd");
            myexit(1);
        }

        /*ETH_P_IP
        int one = 1;
        const int *val = &one;
        if (setsockopt (raw_send_fd, IPPROTO_IP, IP_HDRINCL, val, sizeof (one)) < 0) {
            mylog(log_fatal,"Error setting IP_HDRINCL %d\n",errno);
            //perror("Error setting IP_HDRINCL");
            myexit(2);
        }*/

    } else {
        raw_send_fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));  // todo  how to create a recv only raw socket?

        if (raw_send_fd == -1) {
            mylog(log_fatal, "Failed to create raw_send_fd\n");
            // perror("Failed to create raw_send_fd");
            myexit(1);
        }
        // init_ifindex(if_name);
    }

    int opt = 0;
    assert(setsockopt(raw_send_fd, SOL_SOCKET, SO_RCVBUF, &opt, sizeof(opt)) == 0);  // raw_send_fd is for send only, set its recv buffer to zero

    if (force_socket_buf) {
        if (setsockopt(raw_send_fd, SOL_SOCKET, SO_SNDBUFFORCE, &socket_buf_size, sizeof(socket_buf_size)) < 0) {
            mylog(log_fatal, "SO_SNDBUFFORCE fail  socket_buf_size=%d  errno=%s\n", socket_buf_size, strerror(errno));
            myexit(1);
        }
    } else {
        if (setsockopt(raw_send_fd, SOL_SOCKET, SO_SNDBUF, &socket_buf_size, sizeof(socket_buf_size)) < 0) {
            mylog(log_fatal, "SO_SNDBUF fail  socket_buf_size=%d  errno=%s\n", socket_buf_size, strerror(errno));
            myexit(1);
        }
    }

    // raw_fd = socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_ALL));

    if (raw_ip_version == AF_INET)
        raw_recv_fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));
    else
        raw_recv_fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IPV6));
    // ETH_P_IP doesnt read outgoing packets
    //    https://stackoverflow.com/questions/20264895/eth-p-ip-is-not-working-as-expected-i-can-only-receive-incoming-packets
    //    to capture both incoming and outgoing packets use ETH_P_ALL

    if (raw_recv_fd == -1) {
        mylog(log_fatal, "Failed to create raw_recv_fd\n");
        // perror("");
        myexit(1);
    }
    if (strlen(dev) != 0) {
        struct sockaddr_ll bind_address;
        memset(&bind_address, 0, sizeof(bind_address));

        int index = -1;
        assert(init_ifindex(dev, raw_recv_fd, index) == 0);

        bind_address.sll_family = AF_PACKET;
        if (raw_ip_version == AF_INET)
            bind_address.sll_protocol = htons(ETH_P_IP);
        else
            bind_address.sll_protocol = htons(ETH_P_IPV6);
        bind_address.sll_ifindex = index;

        if (bind(raw_recv_fd, (struct sockaddr *)&bind_address, sizeof(bind_address)) == -1) {
            mylog(log_fatal, "bind to dev [%s] failed\n", dev);
            myexit(1);
        }
    }

    if (force_socket_buf) {
        if (setsockopt(raw_recv_fd, SOL_SOCKET, SO_RCVBUFFORCE, &socket_buf_size, sizeof(socket_buf_size)) < 0) {
            mylog(log_fatal, "SO_RCVBUFFORCE fail  socket_buf_size=%d  errno=%s\n", socket_buf_size, strerror(errno));
            myexit(1);
        }
    } else {
        if (setsockopt(raw_recv_fd, SOL_SOCKET, SO_RCVBUF, &socket_buf_size, sizeof(socket_buf_size)) < 0) {
            mylog(log_fatal, "SO_RCVBUF fail  socket_buf_size=%d  errno=%s\n", socket_buf_size, strerror(errno));
            myexit(1);
        }
    }

    // IP_HDRINCL to tell the kernel that headers are included in the packet

    setnonblocking(raw_send_fd);  // not really necessary
    setnonblocking(raw_recv_fd);

    return 0;
}
#endif
#ifdef UDP2RAW_MP
int init_raw_socket() {
#ifndef NO_LIBNET
    char libnet_errbuf[LIBNET_ERRBUF_SIZE];

    if (raw_ip_version == AF_INET) {
        libnet_handle = libnet_init(LIBNET_RAW4, dev, libnet_errbuf);
    } else {
        assert(raw_ip_version == AF_INET6);
        libnet_handle = libnet_init(LIBNET_RAW6, dev, libnet_errbuf);
    }

    if (libnet_handle == 0) {
        mylog(log_fatal, "libnet_init failed bc of [%s]\n", libnet_errbuf);
        myexit(-1);
    }
    g_ptag = 0;
    libnet_clear_packet(libnet_handle);
#endif

    char pcap_errbuf[PCAP_ERRBUF_SIZE];

    // pcap_handle=pcap_open_live(dev,max_data_len,0,1000,pcap_errbuf);

    pcap_handle = pcap_create(dev, pcap_errbuf);

    if (pcap_handle == 0) {
        mylog(log_fatal, "pcap_create failed bc of [%s]\n", pcap_errbuf);
        myexit(-1);
    }

    assert(pcap_set_snaplen(pcap_handle, huge_data_len) == 0);
    assert(pcap_set_promisc(pcap_handle, 0) == 0);
    assert(pcap_set_timeout(pcap_handle, 1) == 0);
    assert(pcap_set_immediate_mode(pcap_handle, 1) == 0);

    int ret = pcap_activate(pcap_handle);
    if (ret < 0) {
        printf("pcap_activate failed  %s\n", pcap_geterr(pcap_handle));
        myexit(-1);
    }

    if (send_with_pcap) {
        ret = pcap_setdirection(pcap_handle, PCAP_D_INOUT);  // must be used after being actived
        if (ret != 0) mylog(log_debug, "pcap_setdirection(pcap_handle,PCAP_D_INOUT) failed with value %d, %s\n", ret, pcap_geterr(pcap_handle));
    } else {
        ret = pcap_setdirection(pcap_handle, PCAP_D_IN);
        if (ret != 0) mylog(log_debug, "pcap_setdirection(pcap_handle,PCAP_D_IN) failed with value %d, %s\n", ret, pcap_geterr(pcap_handle));
    }

    ret = pcap_datalink(pcap_handle);

    if (ret == DLT_EN10MB) {
        pcap_link_header_len = 14;
    } else if (ret == DLT_NULL) {
        pcap_link_header_len = 4;
    } else if (ret == DLT_LINUX_SLL) {
        pcap_link_header_len = 16;
    } else {
        mylog(log_fatal, "unknown pcap link type : %d\n", ret);
        myexit(-1);
    }

    char filter_exp[1000];

    address_t tmp_addr;
    if (get_src_adress2(tmp_addr, remote_addr) != 0) {
        mylog(log_error, "get_src_adress() failed, maybe you dont have internet\n");
        myexit(-1);
    }

    string src = tmp_addr.get_ip();
    string dst = remote_addr.get_ip();
    if (raw_ip_version == AF_INET) {
        // sprintf(filter_exp,"ip and src %s and dst %s and (tcp or udp or icmp)",my_ntoa(source_ip_uint32),dst.c_str());
        sprintf(filter_exp, "ip and src %s and dst %s and (tcp or udp or icmp)", src.c_str(), dst.c_str());
    } else {
        assert(raw_ip_version == AF_INET6);
        sprintf(filter_exp, "ip6 and src %s and dst %s and (tcp or udp or icmp6)", src.c_str(), dst.c_str());
    }

    if (pcap_compile(pcap_handle, &g_filter, filter_exp, 0, PCAP_NETMASK_UNKNOWN) == -1) {
        printf("Bad filter - %s\n", pcap_geterr(pcap_handle));
        myexit(-1);
    }
    g_filter_compile_cnt++;

    if (pcap_setfilter(pcap_handle, &g_filter) == -1) {
        printf("Error setting filter - %s\n", pcap_geterr(pcap_handle));
        myexit(-1);
    }

    ///////////////////////////////////////////////////////////////new thread created here
    if (pthread_create(&pcap_recv_thread, NULL, pcap_recv_thread_entry, 0)) {
        mylog(log_fatal, "Error creating thread\n");
        myexit(-1);
    }
    ////////////////////////////////////////////////////////////////////////////////

    g_ip_id_counter = get_true_random_number() % 65535;

    /*
    if(lower_level==0)
    {
            raw_send_fd = socket(AF_INET , SOCK_RAW , IPPROTO_TCP);

        if(raw_send_fd == -1) {
            mylog(log_fatal,"Failed to create raw_send_fd\n");
            //perror("Failed to create raw_send_fd");
            myexit(1);
        }

        int one = 1;
        const int *val = &one;
        if (setsockopt (raw_send_fd, IPPROTO_IP, IP_HDRINCL, val, sizeof (one)) < 0) {
            mylog(log_fatal,"Error setting IP_HDRINCL %d\n",errno);
            //perror("Error setting IP_HDRINCL");
            myexit(2);
        }


    }
    else
    {
            raw_send_fd = socket(PF_PACKET , SOCK_DGRAM , htons(ETH_P_IP));

        if(raw_send_fd == -1) {
            mylog(log_fatal,"Failed to create raw_send_fd\n");
            //perror("Failed to create raw_send_fd");
            myexit(1);
        }
            //init_ifindex(if_name);

    }

    if(force_socket_buf)
    {
            if(setsockopt(raw_send_fd, SOL_SOCKET, SO_SNDBUFFORCE, &socket_buf_size, sizeof(socket_buf_size))<0)
            {
                    mylog(log_fatal,"SO_SNDBUFFORCE fail  socket_buf_size=%d  errno=%s\n",socket_buf_size,strerror(errno));
                    myexit(1);
            }
    }
    else
    {
            if(setsockopt(raw_send_fd, SOL_SOCKET, SO_SNDBUF, &socket_buf_size, sizeof(socket_buf_size))<0)
            {
                    mylog(log_fatal,"SO_SNDBUF fail  socket_buf_size=%d  errno=%s\n",socket_buf_size,strerror(errno));
                    myexit(1);
            }
    }


    //raw_fd = socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_ALL));

    raw_recv_fd= socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));

if(raw_recv_fd == -1) {
    mylog(log_fatal,"Failed to create raw_recv_fd\n");
    //perror("");
    myexit(1);
}

    if(force_socket_buf)
    {
            if(setsockopt(raw_recv_fd, SOL_SOCKET, SO_RCVBUFFORCE, &socket_buf_size, sizeof(socket_buf_size))<0)
            {
                    mylog(log_fatal,"SO_RCVBUFFORCE fail  socket_buf_size=%d  errno=%s\n",socket_buf_size,strerror(errno));
                    myexit(1);
            }
    }
    else
    {
            if(setsockopt(raw_recv_fd, SOL_SOCKET, SO_RCVBUF, &socket_buf_size, sizeof(socket_buf_size))<0)
            {
                    mylog(log_fatal,"SO_RCVBUF fail  socket_buf_size=%d  errno=%s\n",socket_buf_size,strerror(errno));
                    myexit(1);
            }
    }

//IP_HDRINCL to tell the kernel that headers are included in the packet


setnonblocking(raw_send_fd); //not really necessary
setnonblocking(raw_recv_fd);*/

    return 0;
}
#endif
#ifdef UDP2RAW_LINUX
void init_filter(int port) {
    sock_fprog bpf;
    assert(raw_ip_version == AF_INET || raw_ip_version == AF_INET6);
    if (raw_mode == mode_faketcp || raw_mode == mode_udp) {
        filter_port = port;
    }
    if (disable_bpf_filter) return;
    // if(raw_mode==mode_icmp) return ;
    // code_tcp[8].k=code_tcp[10].k=port;
    if (raw_mode == mode_faketcp) {
        if (raw_ip_version == AF_INET) {
            bpf.len = sizeof(code_tcp) / sizeof(code_tcp[0]);
            code_tcp[code_tcp_port_index].k = port;
            bpf.filter = code_tcp;
        } else {
            bpf.len = sizeof(code_tcp6) / sizeof(code_tcp6[0]);
            code_tcp6[code_tcp6_port_index].k = port;
            bpf.filter = code_tcp6;
        }
    } else if (raw_mode == mode_udp) {
        if (raw_ip_version == AF_INET) {
            bpf.len = sizeof(code_udp) / sizeof(code_udp[0]);
            code_udp[code_udp_port_index].k = port;
            bpf.filter = code_udp;
        } else {
            bpf.len = sizeof(code_udp6) / sizeof(code_udp6[0]);
            code_udp6[code_udp6_port_index].k = port;
            bpf.filter = code_udp6;
        }
    } else if (raw_mode == mode_icmp) {
        if (raw_ip_version == AF_INET) {
            bpf.len = sizeof(code_icmp) / sizeof(code_icmp[0]);
            bpf.filter = code_icmp;
        } else {
            bpf.len = sizeof(code_icmp6) / sizeof(code_icmp6[0]);
            bpf.filter = code_icmp6;
        }
    }

    int dummy=0;

    int ret = setsockopt(raw_recv_fd, SOL_SOCKET, SO_DETACH_FILTER, &dummy, sizeof(dummy));  // in case i forgot to remove
    if (ret != 0) {
        mylog(log_debug, "error remove fiter\n");
        // perror("filter");
        // exit(-1);
    }
    ret = setsockopt(raw_recv_fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf));
    if (ret != 0) {
        mylog(log_fatal, "error set fiter\n");
        // perror("filter");
        myexit(-1);
    }
}
#endif

#ifdef UDP2RAW_MP
void init_filter(int port) {
    /*
    sock_fprog bpf;*/
    if (raw_mode == mode_faketcp || raw_mode == mode_udp) {
        filter_port = port;
    }

    char filter_exp[1000];

    if (raw_ip_version == AF_INET) {
        if (raw_mode == mode_faketcp) {
            sprintf(filter_exp, "ip and tcp and src %s and src port %d and dst port %d", remote_addr.get_ip(), remote_addr.get_port(), port);
        } else if (raw_mode == mode_udp) {
            sprintf(filter_exp, "ip and udp and src %s and src port %d and dst port %d", remote_addr.get_ip(), remote_addr.get_port(), port);
        } else if (raw_mode == mode_icmp) {
            sprintf(filter_exp, "ip and icmp and src %s", remote_addr.get_ip());
        } else {
            mylog(log_fatal, "unknow raw mode\n");
            myexit(-1);
        }
    } else {
        assert(raw_ip_version == AF_INET6);
        if (raw_mode == mode_faketcp) {
            sprintf(filter_exp, "ip6 and tcp and src %s and src port %d and dst port %d", remote_addr.get_ip(), remote_addr.get_port(), port);
        } else if (raw_mode == mode_udp) {
            sprintf(filter_exp, "ip6 and udp and src %s and src port %d and dst port %d", remote_addr.get_ip(), remote_addr.get_port(), port);
        } else if (raw_mode == mode_icmp) {
            sprintf(filter_exp, "ip6 and icmp6 and src %s", remote_addr.get_ip());
        } else {
            mylog(log_fatal, "unknow raw mode\n");
            myexit(-1);
        }
    }

    mylog(log_info, "filter expression is [%s]\n", filter_exp);

    // pthread_mutex_lock(&pcap_mutex);//not sure if mutex is needed here

    long long tmp_cnt = 0;
    if (use_pcap_mutex) {
        while (pthread_mutex_trylock(&pcap_mutex) != 0) {
            tmp_cnt++;
            pcap_breakloop(pcap_handle);
            if (tmp_cnt == 100) {
                mylog(log_warn, "%lld attempts of pcap_breakloop()\n", tmp_cnt);
            }
            if (tmp_cnt % 1000 == 0) {
                mylog(log_warn, "%lld attempts of pcap_breakloop()\n", tmp_cnt);
                if (tmp_cnt > 5000) {
                    mylog(log_fatal, "we might have already run into a deadlock\n");
                }
            }
            ev_sleep(0.001);
        }
        mylog(log_info, "breakloop() succeed after %lld attempt(s)\n", tmp_cnt);
    }

    if (1) {
        int ret = pcap_setdirection(pcap_handle, PCAP_D_IN);
        if (ret != 0) mylog(log_debug, "pcap_setdirection(pcap_handle,PCAP_D_IN) failed with value %d, %s\n", ret, pcap_geterr(pcap_handle));
    }

    assert(g_filter_compile_cnt != 0);
    pcap_freecode(&g_filter);

    if (pcap_compile(pcap_handle, &g_filter, filter_exp, 0, PCAP_NETMASK_UNKNOWN) == -1) {
        mylog(log_fatal, "Bad filter - %s\n", pcap_geterr(pcap_handle));
        myexit(-1);
    }
    g_filter_compile_cnt++;

    if (pcap_setfilter(pcap_handle, &g_filter) == -1) {
        mylog(log_fatal, "Error setting filter - %s\n", pcap_geterr(pcap_handle));
        myexit(-1);
    }

    if (use_pcap_mutex) pthread_mutex_unlock(&pcap_mutex);
    /*
    if(disable_bpf_filter) return;
    //if(raw_mode==mode_icmp) return ;
    //code_tcp[8].k=code_tcp[10].k=port;
    if(raw_mode==mode_faketcp)
    {
            bpf.len = sizeof(code_tcp)/sizeof(code_tcp[0]);
            code_tcp[code_tcp_port_index].k=port;
            bpf.filter = code_tcp;
    }
    else if(raw_mode==mode_udp)
    {
            bpf.len = sizeof(code_udp)/sizeof(code_udp[0]);
            code_udp[code_udp_port_index].k=port;
            bpf.filter = code_udp;
    }
    else if(raw_mode==mode_icmp)
    {
            bpf.len = sizeof(code_icmp)/sizeof(code_icmp[0]);
            bpf.filter = code_icmp;
    }

    int dummy;

    int ret=setsockopt(raw_recv_fd, SOL_SOCKET, SO_DETACH_FILTER, &dummy, sizeof(dummy)); //in case i forgot to remove
    if (ret != 0)
    {
            mylog(log_debug,"error remove fiter\n");
            //perror("filter");
            //exit(-1);
    }
    ret = setsockopt(raw_recv_fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf));
    if (ret != 0)
    {
            mylog(log_fatal,"error set fiter\n");
            //perror("filter");
            myexit(-1);
    }*/
}
#endif

void remove_filter() {
    filter_port = 0;
#ifdef UDP2RAW_LINUX
    int dummy=0;
    int ret = setsockopt(raw_recv_fd, SOL_SOCKET, SO_DETACH_FILTER, &dummy, sizeof(dummy));
    if (ret != 0) {
        mylog(log_debug, "error remove fiter\n");
        // perror("filter");
        // exit(-1);
    }
#endif
}

int init_ifindex(const char *if_name, int fd, int &index) {
#ifdef UDP2RAW_LINUX
    struct ifreq ifr;
    size_t if_name_len = strlen(if_name);
    if (if_name_len < sizeof(ifr.ifr_name)) {
        memcpy(ifr.ifr_name, if_name, if_name_len);
        ifr.ifr_name[if_name_len] = 0;
    } else {
        mylog(log_fatal, "interface name is too long\n");
        myexit(-1);
    }
    if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
        mylog(log_fatal, "SIOCGIFINDEX fail ,%s\n", strerror(errno));
        myexit(-1);
    }
    index = ifr.ifr_ifindex;
    mylog(log_info, "ifname:%s  ifindex:%d\n", if_name, index);
#endif
    return 0;
}

#ifdef UDP2RAW_LINUX
bool interface_has_arp(const char *interface) {
    struct ifreq ifr;
    // int sock = socket(PF_INET6, SOCK_DGRAM, IPPROTO_IP);
    int sock = raw_send_fd;
    memset(&ifr, 0, sizeof(ifr));
    strcpy(ifr.ifr_name, interface);
    if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) {
        // perror("SIOCGIFFLAGS");
        mylog(log_fatal, "ioctl(sock, SIOCGIFFLAGS, &ifr) failed for interface %s,errno %s\n", interface, strerror(errno));
        myexit(-1);
    }
    // close(sock);
    return !(ifr.ifr_flags & IFF_NOARP);
}
struct route_info_t {
    string if_name;
    u32_t dest;
    u32_t mask;
    u32_t gw;
    u32_t flag;
};
int dest_idx = 1;
int gw_idx = 2;
int if_idx = 0;
int mask_idx = 7;
int flag_idx = 3;
vector<int> find_route_entry(const vector<route_info_t> &route_info_vec, u32_t ip) {
    vector<int> res;
    for (u32_t i = 0; i <= 32; i++) {
        u32_t mask = 0xffffffff;
        // mask >>=i;
        // if(i==32) mask=0;  //why 0xffffffff>>32  equals 0xffffffff??

        mask <<= i;
        if (i == 32) mask = 0;
        log_bare(log_debug, "(mask:%x)", mask);
        for (u32_t j = 0; j < route_info_vec.size(); j++) {
            const route_info_t &info = route_info_vec[j];
            if (info.mask != mask)
                continue;
            log_bare(log_debug, "<<%d,%d>>", i, j);
            if ((info.dest & mask) == (ip & mask)) {
                log_bare(log_debug, "found!");
                res.push_back(j);
            }
        }
        if (res.size() != 0) {
            return res;
        }
    }
    return res;
}
int find_direct_dest(const vector<route_info_t> &route_info_vec, u32_t ip, u32_t &dest_ip, string &if_name) {
    vector<int> res;
    for (int i = 0; i < 1000; i++) {
        res = find_route_entry(route_info_vec, ip);
        log_bare(log_debug, "<entry:%u>", (u32_t)res.size());
        if (res.size() == 0) {
            mylog(log_error, "cant find route entry\n");
            return -1;
        }
        if (res.size() > 1) {
            mylog(log_error, "found duplicated entries\n");
            return -1;
        }
        if ((route_info_vec[res[0]].flag & 2) == 0) {
            dest_ip = ip;
            if_name = route_info_vec[res[0]].if_name;
            return 0;
        } else {
            ip = route_info_vec[res[0]].gw;
        }
    }
    mylog(log_error, "dead loop in find_direct_dest\n");
    return -1;
}
struct arp_info_t {
    u32_t ip;
    string hw;
    string if_name;
};
int arp_ip_idx = 0;
int arp_hw_idx = 3;
int arp_if_idx = 5;

int find_arp(const vector<arp_info_t> &arp_info_vec, u32_t ip, string if_name, string &hw) {
    int pos = -1;
    int count = 0;
    for (u32_t i = 0; i < arp_info_vec.size(); i++) {
        const arp_info_t &info = arp_info_vec[i];
        if (info.if_name != if_name) continue;
        if (info.ip == ip) {
            count++;
            pos = i;
        }
    }
    if (count == 0) {
        // mylog(log_warn,"cant find arp entry for %s %s,using 00:00:00:00:00:00\n",my_ntoa(ip),if_name.c_str());
        // hw="00:00:00:00:00:00";
        mylog(log_error, "cant find arp entry for %s %s\n", my_ntoa(ip), if_name.c_str());
        return -1;
    }
    if (count > 1) {
        mylog(log_error, "find multiple arp entry for %s %s\n", my_ntoa(ip), if_name.c_str());
        return -1;
    }
    hw = arp_info_vec[pos].hw;
    return 0;
}
int find_lower_level_info(u32_t ip, u32_t &dest_ip, string &if_name, string &hw) {
    ip = htonl(ip);
    if (ip == htonl(inet_addr("127.0.0.1"))) {
        dest_ip = ntohl(ip);
        if_name = "lo";
        hw = "00:00:00:00:00:00";
        return 0;
    }

    string route_file;
    if (read_file("/proc/net/route", route_file) != 0) return -1;
    string arp_file;
    if (read_file("/proc/net/arp", arp_file) != 0) return -1;

    log_bare(log_debug, "/proc/net/route:<<%s>>\n", route_file.c_str());
    log_bare(log_debug, "/proc/net/arp:<<%s>>\n", route_file.c_str());

    auto route_vec2 = string_to_vec2(route_file.c_str());
    vector<route_info_t> route_info_vec;
    for (u32_t i = 1; i < route_vec2.size(); i++) {
        log_bare(log_debug, "<size:%u>", (u32_t)route_vec2[i].size());
        if (route_vec2[i].size() != 11) {
            mylog(log_error, "route coloum %d !=11 \n", int(route_vec2[i].size()));
            return -1;
        }
        route_info_t tmp;
        tmp.if_name = route_vec2[i][if_idx];
        if (hex_to_u32_with_endian(route_vec2[i][dest_idx], tmp.dest) != 0) return -1;
        if (hex_to_u32_with_endian(route_vec2[i][gw_idx], tmp.gw) != 0) return -1;
        if (hex_to_u32_with_endian(route_vec2[i][mask_idx], tmp.mask) != 0) return -1;
        if (hex_to_u32(route_vec2[i][flag_idx], tmp.flag) != 0) return -1;
        route_info_vec.push_back(tmp);
        for (u32_t j = 0; j < route_vec2[i].size(); j++) {
            log_bare(log_debug, "<%s>", route_vec2[i][j].c_str());
        }
        log_bare(log_debug, "%s dest:%x mask:%x gw:%x flag:%x", tmp.if_name.c_str(), tmp.dest, tmp.mask, tmp.gw, tmp.flag);
        log_bare(log_debug, "\n");
    }

    if (find_direct_dest(route_info_vec, ip, dest_ip, if_name) != 0) {
        mylog(log_error, "find_direct_dest failed for ip %s\n", my_ntoa(ntohl(ip)));
        return -1;
    }

    log_bare(log_debug, "========\n");
    auto arp_vec2 = string_to_vec2(arp_file.c_str());
    vector<arp_info_t> arp_info_vec;
    for (u32_t i = 1; i < arp_vec2.size(); i++) {
        log_bare(log_debug, "<<arp_vec2[i].size(): %d>>", (int)arp_vec2[i].size());

        for (u32_t j = 0; j < arp_vec2[i].size(); j++) {
            log_bare(log_debug, "<%s>", arp_vec2[i][j].c_str());
        }
        if (arp_vec2[i].size() != 6) {
            mylog(log_error, "arp coloum %d !=11 \n", int(arp_vec2[i].size()));
            return -1;
        }
        arp_info_t tmp;
        tmp.if_name = arp_vec2[i][arp_if_idx];
        tmp.hw = arp_vec2[i][arp_hw_idx];
        tmp.ip = htonl(inet_addr(arp_vec2[i][arp_ip_idx].c_str()));
        arp_info_vec.push_back(tmp);
        log_bare(log_debug, "\n");
    }
    if (!interface_has_arp(if_name.c_str())) {
        mylog(log_info, "%s is a noarp interface,using 00:00:00:00:00:00\n", if_name.c_str());
        hw = "00:00:00:00:00:00";
    } else if (find_arp(arp_info_vec, dest_ip, if_name, hw) != 0) {
        mylog(log_error, "find_arp failed for dest_ip %s ,if_name %s\n", my_ntoa(ntohl(ip)), if_name.c_str());
        return -1;
    }
    // printf("%s\n",hw.c_str());

    dest_ip = ntohl(dest_ip);
    return 0;
}
#endif

#ifdef UDP2RAW_LINUX
int send_raw_packet(raw_info_t &raw_info, const char *packet, int len) {
    const packet_info_t &send_info = raw_info.send_info;
    const packet_info_t &recv_info = raw_info.recv_info;

    int ret;
    if (lower_level == 0) {
        if (raw_ip_version == AF_INET) {
            struct sockaddr_in sin = {0};
            sin.sin_family = raw_ip_version;
            // sin.sin_port = htons(info.dst_port); //dont need this
            sin.sin_addr.s_addr = send_info.new_dst_ip.v4;
            ret = sendto(raw_send_fd, packet, len, 0, (struct sockaddr *)&sin, sizeof(sin));
        } else if (raw_ip_version == AF_INET6) {
            struct sockaddr_in6 sin = {0};
            sin.sin6_family = raw_ip_version;
            // sin.sin_port = htons(info.dst_port); //dont need this
            sin.sin6_addr = send_info.new_dst_ip.v6;
            ret = sendto(raw_send_fd, packet, len, 0, (struct sockaddr *)&sin, sizeof(sin));
        } else {
            assert(0 == 1);
        }

    } else {
        struct sockaddr_ll addr = {0};  //={0} not necessary
        memcpy(&addr, &send_info.addr_ll, sizeof(addr));

        ret = sendto(raw_send_fd, packet, len, 0, (struct sockaddr *)&addr, sizeof(addr));
    }
    if (ret == -1) {
        mylog(log_trace, "sendto failed\n");
        // perror("why?");
        return -1;
    } else {
        // mylog(log_info,"sendto succ\n");
    }
    return 0;
}
#endif

#ifdef UDP2RAW_MP

int send_raw_packet(raw_info_t &raw_info, const char *packet, int len) {
    const packet_info_t &send_info = raw_info.send_info;
    const packet_info_t &recv_info = raw_info.recv_info;

    if (!send_with_pcap) {
#ifndef NO_LIBNET

        // g_ptag=libnet_build_ipv4(ip_tot_len, iph->tos, ntohs(iph->id), ntohs(iph->frag_off),
        //	iph->ttl , iph->protocol , iph->check , iph->saddr, iph->daddr,
        //	(const unsigned char *)payload, payloadlen, libnet_handle, g_ptag);

        // assert(g_ptag!=-1 &&g_ptag!=0);

        // int ret;
        // ret= libnet_write(libnet_handle);

        // assert(ret!=-1);

        // iph->tot_len=htons(ip_tot_len);
        // iph->check =csum ((unsigned short *) send_raw_ip_buf, iph->ihl*4);
        if (raw_ip_version == AF_INET) {
            libnet_write_raw_ipv4(libnet_handle, (const unsigned char *)packet, len);  // todo, this api is marked as internal, maybe we should avoid using it.
        } else {
            assert(raw_ip_version == AF_INET6);
            libnet_write_raw_ipv6(libnet_handle, (const unsigned char *)packet, len);
        }
#endif
    } else {
        char buf[buf_len];
        assert(pcap_header_captured == 1);
        assert(pcap_link_header_len != -1);
        memcpy(buf, pcap_header_buf, pcap_link_header_len);
        memcpy(buf + pcap_link_header_len, packet, len);
        // pthread_mutex_lock(&pcap_mutex); looks like this is not necessary, and it harms performance
        int ret = pcap_sendpacket(pcap_handle, (const unsigned char *)buf, len + pcap_link_header_len);
        if (ret != 0) {
            mylog(log_warn, "pcap_sendpcaket failed with vaule %d,%s, data_len=%d\n", ret, pcap_geterr(pcap_handle), len);
            // pthread_mutex_unlock(&pcap_mutex);
            // myexit(-1);
        }
        // pthread_mutex_unlock(&pcap_mutex);
        /*
unsigned char *p=(unsigned char *)send_raw_ip_buf0;
for(int i=0;i<ip_tot_len+pcap_link_header_len;i++)
        printf("<%02x>",int(p[i]));
printf("\n");
assert(pcap_sendpacket(pcap_handle,(const unsigned char *)pcap_header_buf,cap_len)==0);
p=(unsigned char *)pcap_header_buf;
for(int i=0;i<cap_len;i++)
        printf("<%02x>",int(p[i]));
printf("\n");
printf("pcap send!\n");*/
    }
    return 0;
}
#endif

int send_raw_ip(raw_info_t &raw_info, const char *payload, int payloadlen) {
    const packet_info_t &send_info = raw_info.send_info;
    const packet_info_t &recv_info = raw_info.recv_info;
    char send_raw_ip_buf[buf_len];

    if (raw_info.disabled) {
        mylog(log_debug, "[%s,%d]connection disabled, no packet will be sent\n", recv_info.new_src_ip.get_str1(), recv_info.src_port);
        assert(max_rst_allowed >= 0);
        return 0;
    }

    uint16_t ip_tot_len;
    if (raw_ip_version == AF_INET) {
        struct my_iphdr *iph = (struct my_iphdr *)send_raw_ip_buf;
        memset(iph, 0, sizeof(my_iphdr));

        iph->ihl = sizeof(my_iphdr) / 4;  // we dont use ip options,so the length is just sizeof(iphdr)
        iph->version = 4;
        iph->tos = 0;

        if (lower_level) {
            // iph->id=0;
            iph->id = htons(g_ip_id_counter++);  // Id of this packet
        } else                                   // no need to else?
        {
            iph->id = htons(g_ip_id_counter++);  // Id of this packet
            // iph->id = 0; //Id of this packet  ,kernel will auto fill this if id is zero  ,or really?????// todo //seems like there is a problem
        }

        iph->frag_off = htons(0x4000);  // DF set,others are zero
        // iph->frag_off = htons(0x0000); //DF set,others are zero
        iph->ttl = (unsigned char)ttl_value;
        iph->protocol = send_info.protocol;
        iph->check = 0;                        // Set to 0 before calculating checksum
        iph->saddr = send_info.new_src_ip.v4;  // Spoof the source ip address
        iph->daddr = send_info.new_dst_ip.v4;

        ip_tot_len = sizeof(struct my_iphdr) + payloadlen;
#ifdef UDP2RAW_LINUX
        if (lower_level)
            iph->tot_len = htons(ip_tot_len);  // this is not necessary ,kernel will always auto fill this  //http://man7.org/linux/man-pages/man7/raw.7.html
        else
            iph->tot_len = 0;
#endif

#ifdef UDP2RAW_MP
        iph->tot_len = htons(ip_tot_len);  // always fill for mp version
#endif

        memcpy(send_raw_ip_buf + sizeof(my_iphdr), payload, payloadlen);

#ifdef UDP2RAW_LINUX
        if (lower_level)
            iph->check =
                csum((unsigned short *)send_raw_ip_buf, iph->ihl * 4);  // this is not necessary ,kernel will always auto fill this
        else
            iph->check = 0;
#endif

#ifdef UDP2RAW_MP
        iph->check = csum((unsigned short *)send_raw_ip_buf, iph->ihl * 4);  // always cal checksum for mp version
#endif
    } else {
        assert(raw_ip_version == AF_INET6);

        struct my_ip6hdr *ip6h = (struct my_ip6hdr *)send_raw_ip_buf;
        memset(ip6h, 0, sizeof(my_ip6hdr));

        ip6h->version = 6;
        ip6h->payload_len = htons(payloadlen);
        ip6h->next_header = send_info.protocol;
        ip6h->hop_limit = (unsigned char)ttl_value;
        ip6h->src = send_info.new_src_ip.v6;
        ip6h->dst = send_info.new_dst_ip.v6;

        ip_tot_len = sizeof(struct my_ip6hdr) + payloadlen;
        memcpy(send_raw_ip_buf + sizeof(my_ip6hdr), payload, payloadlen);
    }

    return send_raw_packet(raw_info, send_raw_ip_buf, ip_tot_len);
}

int pre_recv_raw_packet() {
#ifdef UDP2RAW_LINUX
    assert(g_packet_buf_cnt == 0);

    g_sockaddr_len = sizeof(g_sockaddr.ll);
    g_packet_buf_len = recvfrom(raw_recv_fd, g_packet_buf, huge_data_len + 1, 0, (sockaddr *)&g_sockaddr, &g_sockaddr_len);
    // assert(g_sockaddr_len==sizeof(g_sockaddr.ll)); //g_sockaddr_len=18, sizeof(g_sockaddr.ll)=20, why its not equal? maybe its bc sll_halen is 6?

    // assert(g_addr_ll_size==sizeof(g_addr_ll));

    if (g_packet_buf_len == huge_data_len + 1) {
        if (g_fix_gro == 0) {
            mylog(log_warn, "huge packet, data_len %d > %d,dropped\n", g_packet_buf_len, huge_data_len);
            return -1;
        } else {
            mylog(log_debug, "huge packet, data_len %d > %d,not dropped\n", g_packet_buf_len, huge_data_len);
            g_packet_buf_len = huge_data_len;
        }
    }

    if (g_packet_buf_len >= max_data_len + 1) {
        if (g_fix_gro == 0) {
            mylog(log_warn, "huge packet, data_len %d > %d(max_data_len) dropped, maybe you need to turn down mtu at upper level, or you may take a look at --fix-gro\n", g_packet_buf_len,
                  max_data_len);
            return -1;
        } else {
            mylog(log_debug, "huge packet, data_len %d > %d(max_data_len) not dropped\n", g_packet_buf_len,
                  max_data_len);
            // return -1;
        }
    }

    if (g_packet_buf_len < 0) {
        mylog(log_trace, "recv_len %d\n", g_packet_buf_len);
        return -1;
    }
    g_packet_buf_cnt++;
#endif
    return 0;
}
int discard_raw_packet() {
    assert(g_packet_buf_cnt == 1);
    g_packet_buf_cnt--;
    return 0;
}
#ifdef UDP2RAW_LINUX
int recv_raw_packet(char *&packet, int &len, int peek) {
    assert(g_packet_buf_cnt == 1);
    if (!peek)
        g_packet_buf_cnt--;

    if (g_packet_buf_len < int(link_level_header_len)) {
        mylog(log_trace, "packet len %d shorter than link_level_header_len %d\n", g_packet_buf_len, int(link_level_header_len));
        return -1;
    }

    if (link_level_header_len == 14) {
        unsigned char a = g_packet_buf[12];
        unsigned char b = g_packet_buf[13];

        if (!((a == 0x08 && b == 0x00) || (a == 0x86 && b == 0xdd))) {
            mylog(log_trace, "not an ipv4 or ipv6 packet!\n");
            return -1;
        }
    }
    packet = g_packet_buf + int(link_level_header_len);
    len = g_packet_buf_len - int(link_level_header_len);
    return 0;
}
#endif
#ifdef UDP2RAW_MP
int recv_raw_packet(char *&packet, int &len, int peek) {
    assert(g_packet_buf_cnt == 1);
    if (!peek)
        g_packet_buf_cnt--;

    packet = g_packet_buf;
    len = g_packet_buf_len;
    return 0;
}
#endif
int recv_raw_ip(raw_info_t &raw_info, char *&payload, int &payloadlen) {
    char *raw_packet_buf;
    // static char recv_raw_ip_buf[buf_len];
    int raw_packet_len;

    if (recv_raw_packet(raw_packet_buf, raw_packet_len, raw_info.peek) != 0) return -1;

    // const packet_info_t &send_info=raw_info.send_info;
    packet_info_t &recv_info = raw_info.recv_info;

    my_iphdr *iph;
    my_ip6hdr *ip6h;
    int flag = 0;
    // int recv_len = recvfrom(raw_recv_fd, recv_raw_ip_buf, max_data_len+1, flag ,(sockaddr*)&saddr , &saddr_size);

    char *ip_begin = raw_packet_buf;  // 14 is eth net header

    if (raw_packet_len < 1) {
        mylog(log_trace, "raw_packet_len <1, dropped\n");
        return -1;
    }
    iph = (struct my_iphdr *)(ip_begin);
    ip6h = (struct my_ip6hdr *)(ip_begin);
    if (raw_ip_version == AF_INET) {
        if (iph->version != 4) {
            mylog(log_trace, "expect ipv4 packet, but got something else: %02x\n", iph->version);
            return -1;
        }
        if (raw_packet_len < (int)sizeof(my_iphdr)) {
            mylog(log_trace, "raw_packet_len<sizeof(iphdr)\n");
            return -1;
        }
    } else {
        assert(raw_ip_version == AF_INET6);
        if (ip6h->version != 6) {
            mylog(log_trace, "expect ipv6 packet, but got something else: %02x\n", ip6h->version);
            return -1;
        }
        if (raw_packet_len < (int)sizeof(my_ip6hdr)) {
            mylog(log_trace, "raw_packet_len<sizeof(ip6_hdr)\n");
            return -1;
        }
    }
#ifdef UDP2RAW_LINUX
    if (lower_level) {
        memcpy(&recv_info.addr_ll, &g_sockaddr.ll, sizeof(recv_info.addr_ll));
    }
#endif

    unsigned short iphdrlen;
    int ip_len;
    if (raw_ip_version == AF_INET) {
        recv_info.new_src_ip.v4 = iph->saddr;
        recv_info.new_dst_ip.v4 = iph->daddr;
        recv_info.protocol = iph->protocol;
        iphdrlen = iph->ihl * 4;
        ip_len = ntohs(iph->tot_len);
    } else {
        // todo flow id
        assert(raw_ip_version == AF_INET6);
        recv_info.new_src_ip.v6 = ip6h->src;
        recv_info.new_dst_ip.v6 = ip6h->dst;
        iphdrlen = 40;
        recv_info.protocol = ip6h->next_header;  // todo handle extension headers;
        ip_len = ntohs(ip6h->payload_len) + iphdrlen;
    }

    if (bind_addr_used && !recv_info.new_dst_ip.equal(bind_addr)) {
        mylog(log_trace, "bind adress doenst match %s %s, dropped\n", recv_info.new_dst_ip.get_str1(), bind_addr.get_str2());
        // printf(" bind adress doenst match, dropped\n");
        return -1;
    }

    // if (!(iph->ihl > 0 && iph->ihl <=60)) {
    //	mylog(log_trace,"iph ihl error\n");
    //    return -1;
    //  }

    if (raw_packet_len < ip_len) {
        mylog(log_debug, "incomplete packet\n");
        return -1;
    }

    if (raw_ip_version == AF_INET) {
        if (raw_info.peek == 0)  // avoid cal it twice
        {
            u32_t ip_chk = csum((unsigned short *)ip_begin, iphdrlen);

            if (ip_chk != 0) {
                mylog(log_debug, "ip header error %x\n", ip_chk);
                return -1;
            }
        }
    } else {
        // do nothing
    }

    payload = ip_begin + iphdrlen;

    payloadlen = ip_len - iphdrlen;

    if (payloadlen < 0) {
        mylog(log_warn, "error payload len\n");
        return -1;
    }

    return 0;
}

int peek_raw(raw_info_t &raw_info) {
    // static char peek_raw_buf[buf_len];
    // assert(g_packet_buf_cnt==1);
    // g_packet_buf_cnt--;
    // char * peek_raw_buf=g_packet_buf;
    // int recv_len=g_packet_buf_len;

    // char *ip_begin=peek_raw_buf+link_level_header_len;
    // struct sockaddr saddr={0};
    // socklen_t saddr_size=sizeof(saddr);
    // int recv_len = recvfrom(raw_recv_fd, peek_raw_buf,max_data_len, MSG_PEEK ,&saddr , &saddr_size);//change max_data_len to something smaller,we only need header here
    // iphdr * iph = (struct iphdr *) (ip_begin);
    // mylog(log_info,"recv_len %d\n",recv_len);
    // if(recv_len<int(sizeof(iphdr)))
    //{
    //	mylog(log_trace,"failed here %d %d\n",recv_len,int(sizeof(iphdr)));
    //	mylog(log_trace,"%s\n ",strerror(errno));
    //	return -1;
    // }
    // peek_info.new_src_ip.v4=iph->saddr;
    // unsigned short iphdrlen =iph->ihl*4;
    // char *payload=ip_begin+iphdrlen;

    packet_info_t &recv_info = raw_info.recv_info;

    char *payload;
    int payload_len;
    if (recv_raw_ip(raw_info, payload, payload_len) != 0)
        return -1;
    // mylog(log_info,"protocol %d\n",iph->protocol);
    switch (raw_mode) {
        case mode_faketcp: {
            if (recv_info.protocol != IPPROTO_TCP) {
                mylog(log_trace, "failed here");
                return -1;
            }
            struct my_tcphdr *tcph = (my_tcphdr *)payload;
            if (payload_len < int(sizeof(my_tcphdr))) {
                mylog(log_trace, "failed here");
                return -1;
            }
            recv_info.src_port = ntohs(tcph->source);
            recv_info.syn = tcph->syn;
            break;
        }
        case mode_udp: {
            if (recv_info.protocol != IPPROTO_UDP) return -1;
            struct my_udphdr *udph = (my_udphdr *)payload;
            if (payload_len < int(sizeof(my_udphdr)))
                return -1;
            recv_info.src_port = ntohs(udph->source);
            break;
        }
        case mode_icmp: {
            if (raw_ip_version == AF_INET) {
                if (recv_info.protocol != IPPROTO_ICMP) return -1;
            } else {
                assert(raw_ip_version == AF_INET6);
                if (recv_info.protocol != IPPROTO_ICMPV6) return -1;
            }
            struct my_icmphdr *icmph = (my_icmphdr *)payload;
            if (payload_len < int(sizeof(my_udphdr)))
                return -1;
            recv_info.src_port = ntohs(icmph->id);
            break;
        }
        default:
            return -1;
    }
    return 0;
}
int send_raw_icmp(raw_info_t &raw_info, const char *payload, int payloadlen) {
    const packet_info_t &send_info = raw_info.send_info;
    const packet_info_t &recv_info = raw_info.recv_info;

    char send_raw_icmp_buf[buf_len];
    my_icmphdr *icmph = (struct my_icmphdr *)(send_raw_icmp_buf);
    memset(icmph, 0, sizeof(my_icmphdr));
    if (raw_ip_version == AF_INET) {
        if (program_mode == client_mode) {
            icmph->type = 8;
        } else {
            icmph->type = 0;
        }
    } else {
        assert(raw_ip_version == AF_INET6);
        if (program_mode == client_mode) {
            icmph->type = 128;
        } else {
            icmph->type = 129;
        }
    }
    icmph->code = 0;
    icmph->id = htons(send_info.src_port);

    icmph->seq = htons(send_info.my_icmp_seq);  /////////////modify

    memcpy(send_raw_icmp_buf + sizeof(my_icmphdr), payload, payloadlen);

    if (raw_ip_version == AF_INET) {
        icmph->check_sum = csum((unsigned short *)send_raw_icmp_buf, sizeof(my_icmphdr) + payloadlen);
    } else {
        assert(raw_ip_version == AF_INET6);

        pseudo_header6 v6;
        struct pseudo_header6 *psh = &v6;

        psh->src = send_info.new_src_ip.v6;
        psh->dst = send_info.new_dst_ip.v6;
        psh->next_header = IPPROTO_ICMPV6;
        psh->tcp_length = htons(sizeof(my_icmphdr) + payloadlen);
        psh->placeholder1 = 0;
        psh->placeholder2 = 0;

        icmph->check_sum = csum_with_header((char *)psh, sizeof(pseudo_header6), (unsigned short *)send_raw_icmp_buf, sizeof(my_icmphdr) + payloadlen);
    }
    if (send_raw_ip(raw_info, send_raw_icmp_buf, sizeof(my_icmphdr) + payloadlen) != 0) {
        return -1;
    }

    /*if(program_mode==client_mode)
    {
            send_info.icmp_seq++;
    }*/

    return 0;
}

int send_raw_udp(raw_info_t &raw_info, const char *payload, int payloadlen) {
    const packet_info_t &send_info = raw_info.send_info;
    const packet_info_t &recv_info = raw_info.recv_info;

    char send_raw_udp_buf[buf_len];

    my_udphdr *udph = (struct my_udphdr *)(send_raw_udp_buf);

    memset(udph, 0, sizeof(my_udphdr));

    udph->source = htons(send_info.src_port);
    udph->dest = htons(send_info.dst_port);

    int udp_tot_len = payloadlen + sizeof(my_udphdr);

    if (udp_tot_len > 65535) {
        mylog(log_debug, "invalid len\n");
        return -1;
    }
    mylog(log_trace, "udp_len:%d %d\n", udp_tot_len, udph->len);
    udph->len = htons(uint16_t(udp_tot_len));

    memcpy(send_raw_udp_buf + sizeof(my_udphdr), payload, payloadlen);

    if (raw_ip_version == AF_INET) {
        pseudo_header v4;
        struct pseudo_header *psh = &v4;

        psh->source_address = send_info.new_src_ip.v4;
        psh->dest_address = send_info.new_dst_ip.v4;
        psh->placeholder = 0;
        psh->protocol = IPPROTO_UDP;
        psh->tcp_length = htons(udp_tot_len);

        udph->check = csum_with_header((char *)psh, sizeof(pseudo_header), (unsigned short *)send_raw_udp_buf, udp_tot_len);
    } else {
        assert(raw_ip_version == AF_INET6);
        pseudo_header6 v6;
        struct pseudo_header6 *psh = &v6;

        psh->src = send_info.new_src_ip.v6;
        psh->dst = send_info.new_dst_ip.v6;
        psh->next_header = IPPROTO_UDP;
        psh->tcp_length = htons(udp_tot_len);

        psh->placeholder1 = 0;
        psh->placeholder2 = 0;

        udph->check = csum_with_header((char *)psh, sizeof(pseudo_header6), (unsigned short *)send_raw_udp_buf, udp_tot_len);
    }

    if (send_raw_ip(raw_info, send_raw_udp_buf, udp_tot_len) != 0) {
        return -1;
    }
    return 0;
}

int send_raw_tcp(raw_info_t &raw_info, const char *payload, int payloadlen) {  // TODO seq increase

    const packet_info_t &send_info = raw_info.send_info;
    const packet_info_t &recv_info = raw_info.recv_info;

    // mylog(log_debug,"syn %d\n",send_info.syn);

    char send_raw_tcp_buf[buf_len];
    // char *send_raw_tcp_buf=send_raw_tcp_buf0;

    struct my_tcphdr *tcph = (struct my_tcphdr *)(send_raw_tcp_buf);

    memset(tcph, 0, sizeof(my_tcphdr));

    // TCP Header
    tcph->source = htons(send_info.src_port);
    tcph->dest = htons(send_info.dst_port);

    tcph->seq = htonl(send_info.seq);
    tcph->ack_seq = htonl(send_info.ack_seq);

    tcph->fin = 0;
    tcph->syn = send_info.syn;
    tcph->rst = 0;
    tcph->psh = send_info.psh;
    tcph->ack = send_info.ack;

    if (tcph->syn == 1) {
        tcph->doff = 10;  // tcp header size
        int i = sizeof(my_tcphdr);
        send_raw_tcp_buf[i++] = 0x02;  // mss
        send_raw_tcp_buf[i++] = 0x04;
        send_raw_tcp_buf[i++] = 0x05;
        send_raw_tcp_buf[i++] = (char)0xb4;

        // raw_send_buf[i++]=0x01;
        // raw_send_buf[i++]=0x01;
        send_raw_tcp_buf[i++] = 0x04;  // sack ok
        send_raw_tcp_buf[i++] = 0x02;  // sack ok

        send_raw_tcp_buf[i++] = 0x08;  // ts   i=6
        send_raw_tcp_buf[i++] = 0x0a;  // i=7

        //*(u32_t*) (&send_raw_tcp_buf[i]) = htonl(
        //	(u32_t) get_current_time());

        u32_t ts = htonl((u32_t)get_current_time());
        memcpy(&send_raw_tcp_buf[i], &ts, sizeof(ts));

        i += 4;

        // mylog(log_info,"[syn]<send_info.ts_ack= %u>\n",send_info.ts_ack);

        //*(u32_t*) (&send_raw_tcp_buf[i]) = htonl(send_info.ts_ack);
        u32_t ts_ack = htonl(send_info.ts_ack);
        memcpy(&send_raw_tcp_buf[i], &ts_ack, sizeof(ts_ack));

        i += 4;

        send_raw_tcp_buf[i++] = 0x01;
        send_raw_tcp_buf[i++] = 0x03;
        send_raw_tcp_buf[i++] = 0x03;
        send_raw_tcp_buf[i++] = wscale;
    } else {
        tcph->doff = 8;
        int i = sizeof(my_tcphdr);

        send_raw_tcp_buf[i++] = 0x01;
        send_raw_tcp_buf[i++] = 0x01;

        send_raw_tcp_buf[i++] = 0x08;  // ts   //i=2
        send_raw_tcp_buf[i++] = 0x0a;  // i=3;

        //*(u32_t*) (&send_raw_tcp_buf[i]) = htonl(
        //	(u32_t) get_current_time());

        u32_t ts = htonl((u32_t)get_current_time());
        memcpy(&send_raw_tcp_buf[i], &ts, sizeof(ts));

        i += 4;

        // mylog(log_info,"<send_info.ts_ack= %u>\n",send_info.ts_ack);

        //*(u32_t*) (&send_raw_tcp_buf[i]) = htonl(send_info.ts_ack);
        u32_t ts_ack = htonl(send_info.ts_ack);
        memcpy(&send_raw_tcp_buf[i], &ts_ack, sizeof(ts_ack));
        i += 4;
    }

    tcph->urg = 0;
    // tcph->window = htons((uint16_t)(1024));
    tcph->window = htons((uint16_t)(receive_window_lower_bound + get_true_random_number() % receive_window_random_range));

    tcph->check = 0;  // leave checksum 0 now, filled later by pseudo header
    tcph->urg_ptr = 0;

    char *tcp_data = send_raw_tcp_buf + +tcph->doff * 4;

    memcpy(tcp_data, payload, payloadlen);
    int tcp_totlen = tcph->doff * 4 + payloadlen;

    if (raw_ip_version == AF_INET) {
        pseudo_header v4;
        struct pseudo_header *psh = &v4;

        psh->source_address = send_info.new_src_ip.v4;
        psh->dest_address = send_info.new_dst_ip.v4;
        psh->placeholder = 0;
        psh->protocol = IPPROTO_TCP;
        psh->tcp_length = htons(tcp_totlen);

        tcph->check = csum_with_header((char *)psh, sizeof(pseudo_header), (unsigned short *)send_raw_tcp_buf, tcp_totlen);
    } else {
        assert(raw_ip_version == AF_INET6);

        pseudo_header6 v6;
        struct pseudo_header6 *psh = &v6;

        psh->src = send_info.new_src_ip.v6;
        psh->dst = send_info.new_dst_ip.v6;
        psh->next_header = IPPROTO_TCP;
        psh->tcp_length = htons(tcp_totlen);
        psh->placeholder1 = 0;
        psh->placeholder2 = 0;

        tcph->check = csum_with_header((char *)psh, sizeof(pseudo_header6), (unsigned short *)send_raw_tcp_buf, tcp_totlen);
    }

    if (send_raw_ip(raw_info, send_raw_tcp_buf, tcp_totlen) != 0) {
        return -1;
    }

    raw_info.send_info.data_len = payloadlen;
    return 0;
}
/*
int send_raw_tcp_deprecated(const packet_info_t &info,const char * payload,int payloadlen)
{
        static uint16_t ip_id=1;
        char raw_send_buf[buf_len];
        char raw_send_buf2[buf_len];

        //if((prog_mode==client_mode&& payloadlen!=9)  ||(prog_mode==server_mode&& payloadlen!=5 )  )
        mylog(log_trace,"send raw from to %d %d %d %d\n",info.src_ip,info.src_port,info.dst_ip,info.dst_port);

        char *data;

    memset(raw_send_buf,0,payloadlen+100);

    struct iphdr *iph = (struct iphdr *) raw_send_buf;

    //TCP header
    struct tcphdr *tcph = (struct tcphdr *) (raw_send_buf + sizeof (struct ip));

    struct sockaddr_in sin;
    struct pseudo_header psh;

    //some address resolution
    sin.sin_family = AF_INET;
    sin.sin_port = htons(info.dst_port);
    sin.sin_addr.s_addr = info.dst_ip;

    //Fill in the IP Header
    iph->ihl = 5;
    iph->version = 4;
    iph->tos = 0;

    iph->id = htonl (ip_id++); //Id of this packet
    iph->frag_off = htons(0x4000); //DF set,others are zero
    iph->ttl = (unsigned char)ttl_value;
    iph->protocol = IPPROTO_TCP;
    iph->check = 0; //Set to 0 before calculating checksum
    iph->saddr = info.src_ip;    //Spoof the source ip address
    iph->daddr = info.dst_ip;

    //TCP Header
    tcph->source = htons(info.src_port);
    tcph->dest = htons(info.dst_port);

    tcph->seq =htonl(info.seq);
    tcph->ack_seq = htonl(info.ack_seq);

    tcph->fin=0;
    tcph->syn=info.syn;
    tcph->rst=0;
    tcph->psh=info.psh;
    tcph->ack=info.ack;

    if(tcph->syn==1)
    {
        tcph->doff = 10;  //tcp header size
        int i=sizeof (struct iphdr)+20;
        raw_send_buf[i++]=0x02;//mss
        raw_send_buf[i++]=0x04;
        raw_send_buf[i++]=0x05;
        raw_send_buf[i++]=0xb4;

        //raw_send_buf[i++]=0x01;
        //raw_send_buf[i++]=0x01;
        raw_send_buf[i++]=0x04; //sack ok
        raw_send_buf[i++]=0x02; //sack ok


        raw_send_buf[i++]=0x08;   //i=6;
        raw_send_buf[i++]=0x0a;

        *(uint32_t*)(& raw_send_buf[i])=htonl((uint32_t)get_current_time());

        i+=4;

        *(uint32_t*)(& raw_send_buf[i])=htonl(info.ts_ack);
        i+=4;

        raw_send_buf[i++]=0x01;
        raw_send_buf[i++]=0x03;
        raw_send_buf[i++]=0x03;
        raw_send_buf[i++]=0x05;
    }
    else
    {
        tcph->doff=8;
        int i=sizeof (struct iphdr)+20;

        raw_send_buf[i++]=0x01;
        raw_send_buf[i++]=0x01;

        raw_send_buf[i++]=0x08;   //i=0;
        raw_send_buf[i++]=0x0a;

        *(uint32_t*)(& raw_send_buf[i])=htonl((uint32_t)get_current_time());

        i+=4;

        *(uint32_t*)(& raw_send_buf[i])=htonl(info.ts_ack);
        i+=4;


    }


    tcph->urg=0;
    //tcph->window = htons((uint16_t)(1024));
    tcph->window = htons((uint16_t)(10240+random()%100));


    tcph->check = 0; //leave checksum 0 now, filled later by pseudo header
    tcph->urg_ptr = 0;


    //Data part
    data = raw_send_buf + sizeof(struct iphdr) + tcph->doff*4;

    iph->tot_len = sizeof (struct iphdr) + tcph->doff*4 + payloadlen;

    memcpy(data , payload, payloadlen);

    psh.source_address = info.src_ip;
    psh.dest_address = sin.sin_addr.s_addr;
    psh.placeholder = 0;
    psh.protocol = IPPROTO_TCP;
    psh.tcp_length = htons(tcph->doff*4 + payloadlen );

    int psize = sizeof(struct pseudo_header) + tcph->doff*4 + payloadlen;

     memcpy(raw_send_buf2 , (char*) &psh , sizeof (struct pseudo_header));
     memcpy(raw_send_buf2 + sizeof(struct pseudo_header) , tcph , tcph->doff*4 + payloadlen);

     tcph->check = csum( (unsigned short*) raw_send_buf2, psize);

     //Ip checksum
     iph->check = csum ((unsigned short *) raw_send_buf, iph->tot_len);

     mylog(log_trace,"sent seq  ack_seq len<%u %u %d>\n",g_packet_info_send.seq,g_packet_info_send.ack_seq,payloadlen);

     int ret = sendto(raw_send_fd, raw_send_buf, iph->tot_len ,  0, (struct sockaddr *) &sin, sizeof (sin));

     if(g_packet_info_send.syn==0&&g_packet_info_send.ack==1&&payloadlen!=0)
     {
         if(seq_mode==0)
         {


         }
         else if(seq_mode==1)
         {
                 g_packet_info_send.seq+=payloadlen;
         }
         else if(seq_mode==2)
         {
                 if(random()% 5==3 )
                         g_packet_info_send.seq+=payloadlen;
         }
     }
     mylog(log_trace,"<ret:%d>\n",ret);
         if(ret<0)
     {
                mylog(log_fatal,"");
         perror("raw send error\n");
         //printf("send error\n");
     }
     return 0;
}
*/

int recv_raw_icmp(raw_info_t &raw_info, char *&payload, int &payloadlen) {
    const packet_info_t &send_info = raw_info.send_info;
    packet_info_t &recv_info = raw_info.recv_info;
    static char recv_raw_icmp_buf[buf_len];

    char *ip_payload;
    int ip_payloadlen;

    if (recv_raw_ip(raw_info, ip_payload, ip_payloadlen) != 0) {
        mylog(log_debug, "recv_raw_ip error\n");
        return -1;
    }
    mylog(log_trace, "ip_payloadlen=%d\n", ip_payloadlen);
    if (raw_ip_version == AF_INET) {
        if (recv_info.protocol != IPPROTO_ICMP) {
            // printf("not udp protocol\n");
            return -1;
        }
    } else {
        assert(raw_ip_version == AF_INET6);
        if (recv_info.protocol != IPPROTO_ICMPV6) {
            // printf("not udp protocol\n");
            return -1;
        }
    }

    if (ip_payloadlen < int(sizeof(my_icmphdr))) {
        mylog(log_debug, "too short to hold icmp header\n");
        return -1;
    }

    my_icmphdr *icmph = (struct my_icmphdr *)(ip_payload);

    if (ntohs(icmph->id) != send_info.src_port) {
        mylog(log_debug, "icmp id mis-match,ignored\n");
        return -1;
    }

    recv_info.src_port = recv_info.dst_port = ntohs(icmph->id);
    recv_info.my_icmp_seq = ntohs(icmph->seq);

    if (icmph->code != 0)
        return -1;

    unsigned short check;
    if (raw_ip_version == AF_INET) {
        if (program_mode == client_mode) {
            if (icmph->type != 0)
                return -1;
        } else {
            if (icmph->type != 8)
                return -1;
        }
        check = csum((unsigned short *)ip_payload, ip_payloadlen);
    } else {
        assert(raw_ip_version == AF_INET6);
        if (program_mode == client_mode) {
            if (icmph->type != 129)
                return -1;
        } else {
            if (icmph->type != 128)
                return -1;
        }

        pseudo_header6 tmp_header;
        struct pseudo_header6 *psh = &tmp_header;

        psh->src = recv_info.new_src_ip.v6;
        psh->dst = recv_info.new_dst_ip.v6;
        psh->placeholder1 = 0;
        psh->placeholder2 = 0;
        psh->next_header = IPPROTO_ICMPV6;
        psh->tcp_length = htons(ip_payloadlen);

        check = csum_with_header((char *)psh, sizeof(pseudo_header6), (unsigned short *)ip_payload, ip_payloadlen);
    }

    if (check != 0) {
        mylog(log_debug, "icmp checksum fail %x\n", check);
        return -1;
    }
    // mylog(log_info,"program_mode=%d\n",program_mode);
    /*
            if(program_mode==server_mode)
            {
                    send_info.icmp_seq=ntohs(icmph->seq);
                    //mylog(log_info,"send_info.seq=%d\n",send_info.seq);
            }*/

    payload = ip_payload + sizeof(my_icmphdr);
    payloadlen = ip_payloadlen - sizeof(my_icmphdr);
    mylog(log_trace, "get a packet len=%d\n", payloadlen);

    return 0;
}

int recv_raw_udp(raw_info_t &raw_info, char *&payload, int &payloadlen) {
    const packet_info_t &send_info = raw_info.send_info;
    packet_info_t &recv_info = raw_info.recv_info;
    // static char recv_raw_udp_buf[buf_len];
    char *ip_payload;
    int ip_payloadlen;

    if (recv_raw_ip(raw_info, ip_payload, ip_payloadlen) != 0) {
        mylog(log_debug, "recv_raw_ip error\n");
        return -1;
    }
    if (recv_info.protocol != IPPROTO_UDP) {
        // printf("not udp protocol\n");
        return -1;
    }
    if (ip_payloadlen < int(sizeof(my_udphdr))) {
        mylog(log_debug, "too short to hold udpheader\n");
        return -1;
    }
    my_udphdr *udph = (struct my_udphdr *)ip_payload;

    if (int(ntohs(udph->len)) != ip_payloadlen) {
        mylog(log_debug, "udp length error %d %d \n", ntohs(udph->len), ip_payloadlen);
        return -1;
    }

    if (udph->dest != ntohs(uint16_t(filter_port))) {
        // printf("%x %x",tcph->dest,);
        return -1;
    }

    // memcpy(recv_raw_udp_buf+ sizeof(struct pseudo_header) , ip_payload , ip_payloadlen);

    /*
    pseudo_header tmp_header={0};
    struct pseudo_header *psh=&tmp_header ;

    psh->source_address = recv_info.new_src_ip.v4;
    psh->dest_address = recv_info.new_dst_ip.v4;
    psh->placeholder = 0;
    psh->protocol = IPPROTO_UDP;
    psh->tcp_length = htons(ip_payloadlen);

    int csum_len=ip_payloadlen;
    uint16_t udp_chk = csum_with_header((char *)psh,sizeof(pseudo_header), (unsigned short*) ip_payload, csum_len);
    */
    uint16_t udp_chk;
    int csum_len = ip_payloadlen;
    if (raw_ip_version == AF_INET) {
        pseudo_header tmp_header;
        struct pseudo_header *psh = &tmp_header;

        psh->source_address = recv_info.new_src_ip.v4;
        psh->dest_address = recv_info.new_dst_ip.v4;
        psh->placeholder = 0;
        psh->protocol = IPPROTO_UDP;
        psh->tcp_length = htons(ip_payloadlen);

        udp_chk = csum_with_header((char *)psh, sizeof(pseudo_header), (unsigned short *)ip_payload, csum_len);
    } else {
        assert(raw_ip_version == AF_INET6);

        pseudo_header6 tmp_header;
        struct pseudo_header6 *psh = &tmp_header;

        psh->src = recv_info.new_src_ip.v6;
        psh->dst = recv_info.new_dst_ip.v6;
        psh->placeholder1 = 0;
        psh->placeholder2 = 0;
        psh->next_header = IPPROTO_UDP;
        psh->tcp_length = htons(ip_payloadlen);

        udp_chk = csum_with_header((char *)psh, sizeof(pseudo_header6), (unsigned short *)ip_payload, csum_len);
    }

    if (udp_chk != 0) {
        mylog(log_debug, "udp_chk:%x\n", udp_chk);
        mylog(log_debug, "udp header error\n");
        return -1;
    }

    char *udp_begin = ip_payload;

    recv_info.src_port = ntohs(udph->source);
    recv_info.dst_port = ntohs(udph->dest);

    payloadlen = ip_payloadlen - sizeof(my_udphdr);

    payload = udp_begin + sizeof(my_udphdr);

    return 0;
}
int parse_tcp_option(char *option_begin, char *option_end, packet_info_t &recv_info) {
    recv_info.has_ts = 0;
    recv_info.ts = 0;

    char *ptr = option_begin;
    // char *option_end=tcp_begin+tcp_hdr_len;
    while (ptr < option_end) {
        if (*ptr == 0) {
            return 0;
        } else if (*ptr == 1) {
            ptr++;
        } else if (*ptr == 8) {
            if (ptr + 1 >= option_end) {
                mylog(log_trace, "invaild option ptr+1==option_end,for ts\n");
                return -1;
            }
            if (*(ptr + 1) != 10) {
                mylog(log_trace, "invaild ts len\n");
                return -1;
            }
            if (ptr + 10 > option_end) {
                mylog(log_trace, "ptr+10>option_end for ts\n");
                return -1;
            }

            recv_info.has_ts = 1;

            recv_info.ts = read_u32(ptr + 2);
            recv_info.ts_ack = read_u32(ptr + 6);

            // printf("<%d %d>!\n",recv_info.ts,recv_info.ts_ack);

            // return 0;//we currently only parse ts, so just return after its found
            ptr += 10;
        } else {
            if (ptr + 1 >= option_end) {
                mylog(log_trace, "invaild option ptr+1==option_end\n");
                return -1;
            } else {
                int len = (unsigned char)*(ptr + 1);
                if (len <= 1) {
                    mylog(log_trace, "invaild option len %d\n", len);
                    return -1;
                }
                // omit check
                ptr += len;
            }
        }
        // printf("!");
    }
    // printf("\n");

    return 0;
}
int recv_raw_tcp(raw_info_t &raw_info, char *&payload, int &payloadlen) {
    const packet_info_t &send_info = raw_info.send_info;
    packet_info_t &recv_info = raw_info.recv_info;

    // static char recv_raw_tcp_buf[buf_len];

    char *ip_payload;
    int ip_payloadlen;

    if (recv_raw_ip(raw_info, ip_payload, ip_payloadlen) != 0) {
        mylog(log_debug, "recv_raw_ip error\n");
        return -1;
    }

    if (recv_info.protocol != IPPROTO_TCP) {
        // printf("not tcp protocol\n");
        return -1;
    }

    my_tcphdr *tcph = (struct my_tcphdr *)ip_payload;

    unsigned short tcphdrlen = tcph->doff * 4;

    if (!(tcphdrlen > 0 && tcphdrlen <= 60)) {
        mylog(log_debug, "tcph error\n");
        return 0;
    }

    if (tcphdrlen > ip_payloadlen) {
        mylog(log_debug, "error,tcphdrlen >ip_payloadlen\n");
        return 0;
    }

    if (tcph->dest != ntohs(uint16_t(filter_port))) {
        // printf("%x %x",tcph->dest,);
        return -1;
    }

    // memcpy(recv_raw_tcp_buf+ sizeof(struct pseudo_header) , ip_payload , ip_payloadlen);
    uint16_t tcp_chk;
    int csum_len = ip_payloadlen;
    if (raw_ip_version == AF_INET) {
        pseudo_header tmp_header;
        struct pseudo_header *psh = &tmp_header;

        psh->source_address = recv_info.new_src_ip.v4;
        psh->dest_address = recv_info.new_dst_ip.v4;
        psh->placeholder = 0;
        psh->protocol = IPPROTO_TCP;
        psh->tcp_length = htons(ip_payloadlen);

        tcp_chk = csum_with_header((char *)psh, sizeof(pseudo_header), (unsigned short *)ip_payload, csum_len);
    } else {
        assert(raw_ip_version == AF_INET6);

        pseudo_header6 tmp_header;
        struct pseudo_header6 *psh = &tmp_header;

        psh->src = recv_info.new_src_ip.v6;
        psh->dst = recv_info.new_dst_ip.v6;
        psh->placeholder1 = 0;
        psh->placeholder2 = 0;
        psh->next_header = IPPROTO_TCP;
        psh->tcp_length = htons(ip_payloadlen);

        tcp_chk = csum_with_header((char *)psh, sizeof(pseudo_header6), (unsigned short *)ip_payload, csum_len);
    }
    /*for(int i=0;i<csum_len;i++)
    {
        printf("<%d>",int(ip_payload[i]));
    }
    printf("\n");*/

    if (tcp_chk != 0) {
        mylog(log_debug, "tcp_chk:%x, tcp checksum failed, ignored\n", tcp_chk);
        // return -1;
    }

    char *tcp_begin = ip_payload;  // ip packet's data part

    char *tcp_option = ip_payload + sizeof(my_tcphdr);
    char *option_end = ip_payload + tcphdrlen;

    /*
    //old ts parse code
    recv_info.has_ts=0;
    recv_info.ts=0;
    if(tcph->doff==10)
    {
        if(tcp_option[6]==0x08 &&tcp_option[7]==0x0a)
        {
                recv_info.has_ts=1;
                //recv_info.ts=ntohl(*(u32_t*)(&tcp_option[8]));
                memcpy(&recv_info.ts,&tcp_option[8],sizeof(recv_info.ts));
                recv_info.ts=ntohl(recv_info.ts);

                //recv_info.ts_ack=ntohl(*(u32_t*)(&tcp_option[12]));
                memcpy(&recv_info.ts_ack,&tcp_option[12],sizeof(recv_info.ts_ack));
                recv_info.ts_ack=ntohl(recv_info.ts_ack);

                //g_packet_info_send.ts_ack= ntohl(*(uint32_t*)(&tcp_option[8]));
        }
        else
        {
        //	mylog(log_info,"\n");
        }
    }
    else if(tcph->doff==8)
    {
        if(tcp_option[2]==0x08 &&tcp_option[3]==0x0a)
        {
                recv_info.has_ts=1;
                //recv_info.ts=ntohl(*(u32_t*)(&tcp_option[4]));
                memcpy(&recv_info.ts,&tcp_option[4],sizeof(recv_info.ts));
                recv_info.ts=ntohl(recv_info.ts);
                //recv_info.ts_ack=ntohl(*(u32_t*)(&tcp_option[8]));
                memcpy(&recv_info.ts_ack,&tcp_option[8],sizeof(recv_info.ts_ack));
                recv_info.ts_ack=ntohl(recv_info.ts_ack);
                //g_packet_info_send.ts_ack= ntohl(*(uint32_t*)(&tcp_option[0]));
        }
        else
        {
                //mylog(log_info,"!!!\n");
        }
    }
    else
    {
        //mylog(log_info,"tcph->doff= %u\n",tcph->doff);
    }
    printf("<%d %d>\n",recv_info.ts,recv_info.ts_ack);
    */
    parse_tcp_option(tcp_option, option_end, recv_info);

    recv_info.ack = tcph->ack;
    recv_info.syn = tcph->syn;
    recv_info.rst = tcph->rst;
    recv_info.src_port = ntohs(tcph->source);
    recv_info.dst_port = ntohs(tcph->dest);

    recv_info.seq = ntohl(tcph->seq);

    // recv_info.last_last_ack_seq=recv_info.last_ack_seq;
    // recv_info.last_ack_seq=recv_info.ack_seq;
    u32_t last_ack_seq = recv_info.ack_seq;
    recv_info.ack_seq = ntohl(tcph->ack_seq);
    if (recv_info.ack_seq == last_ack_seq) {
        recv_info.ack_seq_counter++;
    } else {
        recv_info.ack_seq_counter = 0;
    }

    recv_info.psh = tcph->psh;

    if (tcph->rst == 1) {
        raw_info.rst_received++;

        if (max_rst_to_show > 0) {
            if (raw_info.rst_received < max_rst_to_show) {
                mylog(log_warn, "[%s,%d]rst==1,cnt=%d\n", recv_info.new_src_ip.get_str1(), recv_info.src_port, (int)raw_info.rst_received);
            } else if (raw_info.rst_received == max_rst_to_show) {
                mylog(log_warn, "[%s,%d]rst==1,cnt=%d >=max_rst_to_show, this log will be muted for current connection\n", recv_info.new_src_ip.get_str1(), recv_info.src_port, (int)raw_info.rst_received);
            } else {
                mylog(log_debug, "[%s,%d]rst==1,cnt=%d\n", recv_info.new_src_ip.get_str1(), recv_info.src_port, (int)raw_info.rst_received);
            }
        } else if (max_rst_to_show == 0) {
            mylog(log_debug, "[%s,%d]rst==1,cnt=%d\n", recv_info.new_src_ip.get_str1(), recv_info.src_port, (int)raw_info.rst_received);
        } else {
            mylog(log_warn, "[%s,%d]rst==1,cnt=%d\n", recv_info.new_src_ip.get_str1(), recv_info.src_port, (int)raw_info.rst_received);
        }

        if (max_rst_allowed >= 0 && raw_info.rst_received == max_rst_allowed + 1) {
            mylog(log_warn, "[%s,%d]connection disabled because of rst_received=%d > max_rst_allow=%d\n", recv_info.new_src_ip.get_str1(), recv_info.src_port, (int)raw_info.rst_received, (int)max_rst_allowed);
            raw_info.disabled = 1;
        }
    }

    /* if(recv_info.has_ts)
     {
         send_info.ts_ack=recv_info.ts;   //////////////////////////////////////////////modify
     }*/

    payloadlen = ip_payloadlen - tcphdrlen;

    payload = tcp_begin + tcphdrlen;

    /*if (recv_info.syn == 0 && recv_info.ack == 1&& payloadlen != 0)   //only modify   send_info when the packet is not part of handshake
    {
            send_info.ack_seq=recv_info.seq;
    }*/
    raw_info.recv_info.data_len = payloadlen;
    return 0;
}
/*
int recv_raw_tcp_deprecated(packet_info_t &info,char * &payload,int &payloadlen)
{
        static char buf[buf_len];

        char raw_recv_buf[buf_len];
        char raw_recv_buf2[buf_len];
        char raw_recv_buf3[buf_len];

        iphdr *  iph;
        tcphdr * tcph;
        int size;
        struct sockaddr saddr;
        socklen_t saddr_size;
        saddr_size = sizeof(saddr);

        mylog(log_trace,"raw!\n");

        size = recvfrom(raw_recv_fd, buf, max_data_len, 0 ,&saddr , &saddr_size);

        if(buf[12]!=8||buf[13]!=0)
        {
                mylog(log_debug,"not an ipv4 packet!\n");
                return -1;
        }

        char *ip_begin=buf+14;

        iph = (struct iphdr *) (ip_begin);


    if (!(iph->ihl > 0 && iph->ihl <=60)) {
        mylog(log_debug,"iph ihl error");
        return -1;
    }

    if (iph->protocol != IPPROTO_TCP) {
        mylog(log_debug,"iph protocal != tcp\n");
        return -1;
    }


        int ip_len=ntohs(iph->tot_len);

    unsigned short iphdrlen =iph->ihl*4;
    tcph=(struct tcphdr*)(ip_begin+ iphdrlen);
    unsigned short tcphdrlen = tcph->doff*4;

    if (!(tcph->doff > 0 && tcph->doff <=60)) {
        mylog(log_debug,"tcph error");
        return 0;
    }


    if(tcph->dest!=ntohs(uint16_t(filter_port)))
    {
        //printf("%x %x",tcph->dest,);
        return -1;
    }
    /////ip
    uint32_t ip_chk=csum ((unsigned short *) ip_begin, iphdrlen);

    int psize = sizeof(struct pseudo_header) + ip_len-iphdrlen;
    /////ip end


    ///tcp
    struct pseudo_header psh;

    psh.source_address = iph->saddr;
    psh.dest_address = iph->daddr;
    psh.placeholder = 0;
    psh.protocol = IPPROTO_TCP;
    psh.tcp_length = htons(ip_len-iphdrlen);

    memcpy(raw_recv_buf2 , (char*) &psh , sizeof (struct pseudo_header));
    memcpy(raw_recv_buf2 + sizeof(struct pseudo_header) , ip_begin+ iphdrlen , ip_len-iphdrlen);

    uint16_t tcp_chk = csum( (unsigned short*) raw_recv_buf2, psize);


   if(ip_chk!=0)
    {
           mylog(log_debug,"ip header error %d\n",ip_chk);
        return -1;
    }
    if(tcp_chk!=0)
    {
        mylog(log_debug,"tcp_chk:%x\n",tcp_chk);
        mylog(log_debug,"tcp header error\n");
        return -1;

    }
    char *tcp_begin=raw_recv_buf2+sizeof(struct pseudo_header);  //ip packet's data part

    char *tcp_option=raw_recv_buf2+sizeof(struct pseudo_header)+sizeof(tcphdr);

    info.has_ts=0;

    if(tcph->doff==10)
    {
        if(tcp_option[6]==0x08 &&tcp_option[7]==0x0a)
        {
                info.has_ts=1;
                info.ts=ntohl(*(uint32_t*)(&tcp_option[8]));
                info.ts_ack=ntohl(*(uint32_t*)(&tcp_option[12]));
                //g_packet_info_send.ts_ack= ntohl(*(uint32_t*)(&tcp_option[8]));
        }
    }
    else if(tcph->doff==8)
    {
        if(tcp_option[3]==0x08 &&tcp_option[4]==0x0a)
        {
                info.has_ts=1;
                info.ts=ntohl(*(uint32_t*)(&tcp_option[0]));
                info.ts_ack=ntohl(*(uint32_t*)(&tcp_option[4]));
                //g_packet_info_send.ts_ack= ntohl(*(uint32_t*)(&tcp_option[0]));
        }
    }

    if(tcph->rst==1)
    {
        mylog(log_warn,"%%%%%%%%%%rst==1%%%%%%%%%%%%%\n");
    }


    info.ack=tcph->ack;
    info.syn=tcph->syn;
    info.rst=tcph->rst;
    info.src_port=ntohs(tcph->source);
    info.src_ip=iph->saddr;
    info.seq=ntohl(tcph->seq);
    info.ack_seq=ntohl(tcph->ack_seq);
    info.psh=tcph->psh;
    if(info.has_ts)
    {
        g_packet_info_send.ts_ack=info.ts;
    }
    ////tcp end


    payloadlen = ip_len-tcphdrlen-iphdrlen;

    payload=ip_begin+tcphdrlen+iphdrlen;

    if(payloadlen>0&&payload[0]=='h')
    {
        mylog(log_debug,"recvd <%u %u %d>\n",ntohl(tcph->seq ),ntohl(tcph->ack_seq), payloadlen);
    }

    if(payloadlen>0&&tcph->syn==0&&tcph->ack==1)
    {
        //if(seq_increse)
                g_packet_info_send.ack_seq=ntohl(tcph->seq)+(uint32_t)payloadlen;
    }


    //printf("%d\n",ip_len);

    mylog(log_trace,"<%u,%u,%u,%u,%d>\n",(unsigned int)iphdrlen,(unsigned int)tcphdrlen,(unsigned int)tcph->syn,(unsigned int)tcph->ack,payloadlen);


        return 0;
}*/
int send_raw0(raw_info_t &raw_info, const char *payload, int payloadlen) {
    if (random_drop != 0) {
        if (get_true_random_number() % 10000 < (u32_t)random_drop) {
            return 0;
        }
    }

    packet_info_t &send_info = raw_info.send_info;
    packet_info_t &recv_info = raw_info.recv_info;
    mylog(log_trace, "send_raw : from %s %d  to %s %d\n", send_info.new_src_ip.get_str1(), send_info.src_port, send_info.new_dst_ip.get_str2(), send_info.dst_port);
    switch (raw_mode) {
        case mode_faketcp:
            return send_raw_tcp(raw_info, payload, payloadlen);
        case mode_udp:
            return send_raw_udp(raw_info, payload, payloadlen);
        case mode_icmp:
            return send_raw_icmp(raw_info, payload, payloadlen);
        default:
            return -1;
    }
}
int recv_raw0(raw_info_t &raw_info, char *&payload, int &payloadlen) {
    packet_info_t &send_info = raw_info.send_info;
    packet_info_t &recv_info = raw_info.recv_info;
    switch (raw_mode) {
        case mode_faketcp:
            return recv_raw_tcp(raw_info, payload, payloadlen);
        case mode_udp:
            return recv_raw_udp(raw_info, payload, payloadlen);
        case mode_icmp:
            return recv_raw_icmp(raw_info, payload, payloadlen);
        default:
            return -1;
    }
}

int after_send_raw0(raw_info_t &raw_info) {
    packet_info_t &send_info = raw_info.send_info;
    packet_info_t &recv_info = raw_info.recv_info;

    if (raw_mode == mode_faketcp) {
        if (send_info.syn == 0 && send_info.ack == 1 && raw_info.send_info.data_len != 0)  // only modify   send_info when the packet is not part of handshake
        {
            if (seq_mode == 0) {
            } else if (seq_mode == 1) {
                send_info.seq += raw_info.send_info.data_len;  //////////////////modify
            } else if (seq_mode == 2) {
                if (get_true_random_number() % 5 == 3)
                    send_info.seq += raw_info.send_info.data_len;  //////////////////modify
            } else if (seq_mode == 3 || seq_mode == 4) {
                send_info.seq += raw_info.send_info.data_len;

                u32_t window_size;

                if (seq_mode == 3) {
                    window_size = (u32_t)((u32_t)receive_window_lower_bound << (u32_t)wscale);
                } else  // seq_mode==4
                {
                    window_size = (u32_t)((u32_t)receive_window_lower_bound);
                }

                if (larger_than_u32(send_info.seq + max_data_len, recv_info.ack_seq + window_size)) {
                    send_info.seq = raw_info.recv_info.ack_seq;
                }
                if (recv_info.ack_seq_counter >= 3)  // simulate tcp fast re-transmit
                {
                    recv_info.ack_seq_counter = 0;
                    send_info.seq = raw_info.recv_info.ack_seq;
                }
                if (larger_than_u32(raw_info.recv_info.ack_seq, send_info.seq))  // for further use,currently no effect.
                {
                    send_info.seq = raw_info.recv_info.ack_seq;
                }
            }
        }
    }
    if (raw_mode == mode_icmp) {
        if (program_mode == client_mode) {
            send_info.my_icmp_seq++;
        }
    }
    return 0;
}
int after_recv_raw0(raw_info_t &raw_info) {
    packet_info_t &send_info = raw_info.send_info;
    packet_info_t &recv_info = raw_info.recv_info;

    if (raw_mode == mode_faketcp) {
        if (recv_info.has_ts)
            send_info.ts_ack = recv_info.ts;
        if (recv_info.syn == 0 && recv_info.ack == 1 && raw_info.recv_info.data_len != 0)  // only modify   send_info when the packet is not part of handshake
        {
            if (seq_mode == 0 || seq_mode == 1 || seq_mode == 2) {
                if (larger_than_u32(recv_info.seq + raw_info.recv_info.data_len, send_info.ack_seq))
                    send_info.ack_seq = recv_info.seq + raw_info.recv_info.data_len;  // TODO only update if its larger
            } else if (seq_mode == 3 || seq_mode == 4) {
                if (recv_info.seq == send_info.ack_seq) {
                    send_info.ack_seq = recv_info.seq + raw_info.recv_info.data_len;  // currently we dont remembr tcp segments,this is the simplest way
                    // TODO implement tcp segment remembering and SACK.
                }
            }
        }
    }
    if (raw_mode == mode_icmp) {
        if (program_mode == server_mode) {
            if (larger_than_u16(recv_info.my_icmp_seq, send_info.my_icmp_seq))
                send_info.my_icmp_seq = recv_info.my_icmp_seq;  // TODO only update if its larger
        }
    }
    return 0;
}

/*
int send_raw(raw_info_t &raw_info,const char * payload,int payloadlen)
{
        packet_info_t &send_info=raw_info.send_info;
        packet_info_t &recv_info=raw_info.recv_info;
        int ret=send_raw0(raw_info,payload,payloadlen);
        if(ret<0) return ret;
        else
        {
                after_send_raw0(raw_info);
                return ret;
        }
}

int recv_raw(raw_info_t &raw_info,char *& payload,int & payloadlen)
{
        packet_info_t &send_info=raw_info.send_info;
        packet_info_t &recv_info=raw_info.recv_info;
        int ret=recv_raw0(raw_info,payload,payloadlen);
        if(ret<0) return ret;
        else
        {
                after_recv_raw0(raw_info);
                return ret;
        }
}*/

/*
int get_src_adress(u32_t &ip,u32_t remote_ip_uint32,int remote_port)  //a trick to get src adress for a dest adress,so that we can use the src address in raw socket as source ip
{
        struct sockaddr_in remote_addr_in={0};

        socklen_t slen = sizeof(sockaddr_in);
        //memset(&remote_addr_in, 0, sizeof(remote_addr_in));
        remote_addr_in.sin_family = AF_INET;
        remote_addr_in.sin_port = htons(remote_port);
        remote_addr_in.sin_addr.s_addr = remote_ip_uint32;


        int new_udp_fd=socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
        if(new_udp_fd<0)
        {
                mylog(log_warn,"create udp_fd error\n");
                return -1;
        }
        //set_buf_size(new_udp_fd);

        mylog(log_debug,"created new udp_fd %d\n",new_udp_fd);
        int ret = connect(new_udp_fd, (struct sockaddr *) &remote_addr_in, slen);
        if(ret!=0)
        {
                mylog(log_warn,"udp fd connect fail\n");
                close(new_udp_fd);
                return -1;
        }

        struct sockaddr_in my_addr={0};
        socklen_t len=sizeof(my_addr);

    if(getsockname(new_udp_fd, (struct sockaddr *) &my_addr, &len)!=0){close(new_udp_fd); return -1;}

    ip=my_addr.sin_addr.s_addr;

    close(new_udp_fd);

    return 0;
}*/

int get_src_adress2(address_t &output_addr, address_t remote_addr) {
    int new_udp_fd = remote_addr.new_connected_udp_fd();
    if (new_udp_fd < 0) {
        mylog(log_warn, "create udp_fd error\n");
        return -1;
    }

    socklen_t len = sizeof(output_addr.inner);

    if (getsockname(new_udp_fd, (struct sockaddr *)&output_addr.inner, &len) != 0) {
        close(new_udp_fd);
        return -1;
    }

    assert(output_addr.get_type() == remote_addr.get_type());

    close(new_udp_fd);

    return 0;
}
/*
int try_to_list_and_bind(int &fd,u32_t local_ip_uint32,int port)  //try to bind to a port,may fail.
{
         int old_bind_fd=fd;

         if(raw_mode==mode_faketcp)
         {
                 fd=socket(AF_INET,SOCK_STREAM,0);
         }
         else  if(raw_mode==mode_udp||raw_mode==mode_icmp)
         {
                 fd=socket(AF_INET,SOCK_DGRAM,0);
         }
     if(old_bind_fd!=-1)
     {
         close(old_bind_fd);
     }

         struct sockaddr_in temp_bind_addr={0};
     //bzero(&temp_bind_addr, sizeof(temp_bind_addr));

     temp_bind_addr.sin_family = AF_INET;
     temp_bind_addr.sin_port = htons(port);
     temp_bind_addr.sin_addr.s_addr = local_ip_uint32;

     if (bind(fd, (struct sockaddr*)&temp_bind_addr, sizeof(temp_bind_addr)) !=0)
     {
         mylog(log_debug,"bind fail\n");
         return -1;
     }
         if(raw_mode==mode_faketcp)
         {

                if (listen(fd, SOMAXCONN) != 0) {
                        mylog(log_warn,"listen fail\n");
                        return -1;
                }
         }
     return 0;
}*/
int try_to_list_and_bind2(int &fd, address_t address)  // try to bind to a port,may fail.
{
    if (fd != -1) {
        close(fd);
    }
    if (raw_mode == mode_faketcp) {
        fd = socket(address.get_type(), SOCK_STREAM, 0);
    } else if (raw_mode == mode_udp || raw_mode == mode_icmp) {
        fd = socket(address.get_type(), SOCK_DGRAM, 0);
    }

    if (fd == -1) {
        mylog(log_debug, "create fd fail\n");
        return -1;
    }
    /*struct sockaddr_in temp_bind_addr={0};
    //bzero(&temp_bind_addr, sizeof(temp_bind_addr));

    temp_bind_addr.sin_family = AF_INET;
    temp_bind_addr.sin_port = htons(port);
    temp_bind_addr.sin_addr.s_addr = local_ip_uint32;*/

    if (::bind(fd, (struct sockaddr *)&address.inner, address.get_len()) != 0) {
        mylog(log_debug, "bind fail\n");
        return -1;
    }
    if (raw_mode == mode_faketcp && !use_tcp_dummy_socket) {
        if (listen(fd, SOMAXCONN) != 0) {
            mylog(log_warn, "listen fail\n");
            return -1;
        }
    }
    return 0;
}
/*
int client_bind_to_a_new_port(int &fd,u32_t local_ip_uint32)//find a free port and bind to it.
{
        int raw_send_port=10000+get_true_random_number()%(65535-10000);
        for(int i=0;i<1000;i++)//try 1000 times at max,this should be enough
        {
                if (try_to_list_and_bind(fd,local_ip_uint32,raw_send_port)==0)
                {
                        return raw_send_port;
                }
        }
        mylog(log_fatal,"bind port fail\n");
        myexit(-1);
        return -1;////for compiler check
}*/

int client_bind_to_a_new_port2(int &fd, const address_t &address)  // find a free port and bind to it.
{
    address_t tmp = address;
    for (int i = 0; i < 1000; i++)  // try 1000 times at max,this should be enough
    {
        int raw_send_port = 10000 + get_true_random_number() % (65535 - 10000);
        tmp.set_port(raw_send_port);
        if (try_to_list_and_bind2(fd, tmp) == 0) {
            return raw_send_port;
        }
    }
    mylog(log_fatal, "bind port fail\n");
    myexit(-1);
    return -1;  ////for compiler check
}


================================================
FILE: network.h
================================================
/*
 * network.h
 *
 *  Created on: Jul 29, 2017
 *      Author: wangyu
 */

#ifndef UDP2RAW_NETWORK_H_
#define UDP2RAW_NETWORK_H_

extern int raw_recv_fd;
extern int raw_send_fd;
extern int use_tcp_dummy_socket;
extern int seq_mode;
extern int max_seq_mode;
extern int filter_port;
// extern u32_t bind_address_uint32;
extern int disable_bpf_filter;

extern int lower_level;
extern int lower_level_manual;
extern char if_name[100];
extern char dev[100];
extern unsigned char dest_hw_addr[];

extern int random_drop;

extern int ifindex;

extern char g_packet_buf[huge_buf_len];
extern int g_packet_buf_len;
extern int g_packet_buf_cnt;
#ifdef UDP2RAW_MP
extern queue_t my_queue;

extern ev_async async_watcher;
extern struct ev_loop *g_default_loop;

extern pthread_mutex_t queue_mutex;
extern int use_pcap_mutex;

extern int pcap_cnt;

extern int pcap_link_header_len;

extern int send_with_pcap;
extern int pcap_header_captured;
extern int pcap_header_buf[buf_len];

struct icmphdr {
    uint8_t type;
    uint8_t code;
    uint16_t check_sum;
    uint16_t id;
    uint16_t seq;
};
#endif

struct my_iphdr {
#ifdef UDP2RAW_LITTLE_ENDIAN
    unsigned char ihl : 4;
    unsigned char version : 4;
#else
    unsigned char version : 4;
    unsigned char ihl : 4;
#endif
    u_int8_t tos;
    u_int16_t tot_len;
    u_int16_t id;
    u_int16_t frag_off;
    u_int8_t ttl;
    u_int8_t protocol;
    u_int16_t check;
    u_int32_t saddr;
    u_int32_t daddr;
    /*The options start here. */
};

struct my_udphdr {
    /*__extension__*/ union {
        struct
        {
            u_int16_t uh_sport; /* source port */
            u_int16_t uh_dport; /* destination port */
            u_int16_t uh_ulen;  /* udp length */
            u_int16_t uh_sum;   /* udp checksum */
        };
        struct
        {
            u_int16_t source;
            u_int16_t dest;
            u_int16_t len;
            u_int16_t check;
        };
    };
};

struct my_tcphdr {
    /*__extension__*/ union {
        struct
        {
            u_int16_t th_sport; /* source port */
            u_int16_t th_dport; /* destination port */
            u_int32_t th_seq;   /* sequence number */
            u_int32_t th_ack;   /* acknowledgement number */
#ifdef UDP2RAW_LITTLE_ENDIAN
            u_int8_t th_x2 : 4;  /* (unused) */
            u_int8_t tc_off : 4; /* data offset */
#else
            u_int8_t th_off : 4; /* data offset */
            u_int8_t th_x2 : 4;  /* (unused) */
#endif
            u_int8_t th_flags;
#define TH_FIN 0x01
#define TH_SYN 0x02
#define TH_RST 0x04
#define TH_PUSH 0x08
#define TH_ACK 0x10
#define TH_URG 0x20
            u_int16_t th_win; /* window */
            u_int16_t th_sum; /* checksum */
            u_int16_t th_urp; /* urgent pointer */
        };
        struct
        {
            u_int16_t source;
            u_int16_t dest;
            u_int32_t seq;
            u_int32_t ack_seq;
#ifdef UDP2RAW_LITTLE_ENDIAN
            u_int16_t res1 : 4;
            u_int16_t doff : 4;
            u_int16_t fin : 1;
            u_int16_t syn : 1;
            u_int16_t rst : 1;
            u_int16_t psh : 1;
            u_int16_t ack : 1;
            u_int16_t urg : 1;
            u_int16_t res2 : 2;
#else
            u_int16_t doff : 4;
            u_int16_t res1 : 4;
            u_int16_t res2 : 2;
            u_int16_t urg : 1;
            u_int16_t ack : 1;
            u_int16_t psh : 1;
            u_int16_t rst : 1;
            u_int16_t syn : 1;
            u_int16_t fin : 1;
#endif
            u_int16_t window;
            u_int16_t check;
            u_int16_t urg_ptr;
        };
    };
};

struct my_ip6hdr {
#ifdef UDP2RAW_LITTLE_ENDIAN
    uint8_t traffic_class_high : 4;
    uint8_t version : 4;
    uint8_t flow_label_high : 4;
    uint8_t traffic_class_low : 4;
#else
    uint8_t version : 4;
    uint8_t traffic_class_high : 4;
    uint8_t traffic_class_low : 4;
    uint8_t flow_label_high : 4;
#endif
    u_int16_t flow_label_low;
    u_int16_t payload_len;
    uint8_t next_header;
    uint8_t hop_limit;

    struct in6_addr src;
    struct in6_addr dst;
};

struct my_icmphdr {
    uint8_t type;
    uint8_t code;
    uint16_t check_sum;
    uint16_t id;
    uint16_t seq;
};

struct pseudo_header {
    u_int32_t source_address;
    u_int32_t dest_address;
    u_int8_t placeholder;
    u_int8_t protocol;
    u_int16_t tcp_length;
};

struct pseudo_header6 {
    struct in6_addr src;
    struct in6_addr dst;
    u_int32_t tcp_length;
    u_int16_t placeholder1;
    u_int8_t placeholder2;
    u_int8_t next_header;
};

struct packet_info_t  // todo change this to union
{
    uint8_t protocol;

    // u32_t src_ip;
    // u32_t dst_ip;
    my_ip_t new_src_ip;
    my_ip_t new_dst_ip;

    uint16_t src_port;
    uint16_t dst_port;

    // tcp_part:
    bool syn, ack, psh, rst;

    u32_t seq, ack_seq;

    u32_t ack_seq_counter;

    u32_t ts, ts_ack;

    uint16_t my_icmp_seq;

    bool has_ts;

    i32_t data_len;

#ifdef UDP2RAW_LINUX
    sockaddr_ll addr_ll;
#endif

    packet_info_t();
};

struct raw_info_t {
    packet_info_t send_info;
    packet_info_t recv_info;
    // int last_send_len;
    // int last_recv_len;
    bool peek = 0;
    // bool csum=1;
    u32_t reserved_send_seq;
    // uint32_t first_seq,first_ack_seq;
    int rst_received = 0;
    bool disabled = 0;

};  // g_raw_info;

int init_raw_socket();

void init_filter(int port);

void remove_filter();

#ifdef UDP2RAW_LINUX
int init_ifindex(const char *if_name, int fd, int &index);
#endif

#ifdef UDP2RAW_MP
int init_ifindex(const char *if_name, int &index);
#endif

int find_lower_level_info(u32_t ip, u32_t &dest_ip, string &if_name, string &hw);

int get_src_adress(u32_t &ip, u32_t remote_ip_uint32, int remote_port);  // a trick to get src adress for a dest adress,so that we can use the src address in raw socket as source ip
int get_src_adress2(address_t &output_addr, address_t remote_addr);

int try_to_list_and_bind(int &bind_fd, u32_t local_ip_uint32, int port);  // try to bind to a port,may fail.
int try_to_list_and_bind2(int &fd, address_t address);

int client_bind_to_a_new_port(int &bind_fd, u32_t local_ip_uint32);  // find a free port and bind to it.
int client_bind_to_a_new_port2(int &fd, const address_t &address);

int discard_raw_packet();
int pre_recv_raw_packet();

int send_raw_ip(raw_info_t &raw_info, const char *payload, int payloadlen);

int peek_raw(raw_info_t &peek_info);

int recv_raw_ip(raw_info_t &raw_info, char *&payload, int &payloadlen);

int send_raw_icmp(raw_info_t &raw_info, const char *payload, int payloadlen);

int send_raw_udp(raw_info_t &raw_info, const char *payload, int payloadlen);

int send_raw_tcp(raw_info_t &raw_info, const char *payload, int payloadlen);

int recv_raw_icmp(raw_info_t &raw_info, char *&payload, int &payloadlen);

int recv_raw_udp(raw_info_t &raw_info, char *&payload, int &payloadlen);

int recv_raw_tcp(raw_info_t &raw_info, char *&payload, int &payloadlen);

// int send_raw(raw_info_t &raw_info,const char * payload,int payloadlen);

// int recv_raw(raw_info_t &raw_info,char * &payload,int &payloadlen);

int send_raw0(raw_info_t &raw_info, const char *payload, int payloadlen);

int recv_raw0(raw_info_t &raw_info, char *&payload, int &payloadlen);

int after_send_raw0(raw_info_t &raw_info);

int after_recv_raw0(raw_info_t &raw_info);

#endif /* NETWORK_H_ */


================================================
FILE: pcap_wrapper.cpp
================================================
#include <windows.h>
#include <pcap_wrapper.h>
#include <assert.h>
#include <stdio.h>
int (*pcap_loop)(pcap_t *, int, pcap_handler, u_char *);
int (*pcap_breakloop)(pcap_t *);

pcap_t *(*pcap_create)(const char *, char *);

int (*pcap_set_snaplen)(pcap_t *, int) = 0;
int (*pcap_set_promisc)(pcap_t *, int) = 0;
int (*pcap_can_set_rfmon)(pcap_t *) = 0;
int (*pcap_set_rfmon)(pcap_t *, int) = 0;
int (*pcap_set_timeout)(pcap_t *, int) = 0;
int (*pcap_set_buffer_size)(pcap_t *, int) = 0;
int (*pcap_activate)(pcap_t *) = 0;

int (*pcap_setfilter)(pcap_t *, struct bpf_program *) = 0;
int (*pcap_setdirection)(pcap_t *, pcap_direction_t) = 0;

int (*pcap_datalink)(pcap_t *) = 0;

void (*pcap_freecode)(struct bpf_program *) = 0;

int (*pcap_compile)(pcap_t *, struct bpf_program *, const char *, int,
                    bpf_u_int32) = 0;

char *(*pcap_geterr)(pcap_t *) = 0;
int (*pcap_sendpacket)(pcap_t *, const u_char *, int) = 0;

char *(*pcap_lookupdev)(char *) = 0;

int (*pcap_findalldevs)(pcap_if_t **, char *) = 0;

struct init_pcap_t {
    init_pcap_t() {
        init_pcap();
    }

} do_it;

static void init_npcap_dll_path() {
    BOOL(WINAPI * SetDllDirectory)
    (LPCTSTR);
    char sysdir_name[512];
    int len;

    SetDllDirectory = (BOOL(WINAPI *)(LPCTSTR))GetProcAddress(GetModuleHandle("kernel32.dll"), "SetDllDirectoryA");
    if (SetDllDirectory == NULL) {
        printf("Error in SetDllDirectory\n");
    } else {
        len = GetSystemDirectory(sysdir_name, 480);  //	be safe
        if (!len)
            printf("Error in GetSystemDirectory (%d)\n", (int)GetLastError());
        strcat(sysdir_name, "\\Npcap");
        if (SetDllDirectory(sysdir_name) == 0)
            printf("Error in SetDllDirectory(\"System32\\Npcap\")\n");
    }
}

#define EXPORT_FUN(XXX)                                     \
    do {                                                    \
        XXX = (__typeof__(XXX))GetProcAddress(wpcap, #XXX); \
    } while (0)
int init_pcap() {
    HMODULE wpcap = LoadLibrary("wpcap.dll");
    if (wpcap != 0) {
        printf("using system32/wpcap.dll\n");
    } else {
        init_npcap_dll_path();
        // SetDllDirectory("C:\\Windows\\System32\\Npcap\\");
        wpcap = LoadLibrary("wpcap.dll");
        if (wpcap != 0)
            printf("using system32/npcap/wpcap.dll\n");
    }
    if (wpcap == 0) {
        printf("cant not open wpcap.dll, make sure winpcap/npcap is installed\n");
        exit(-1);
    }
    assert(wpcap != 0);

    EXPORT_FUN(pcap_loop);
    EXPORT_FUN(pcap_breakloop);
    EXPORT_FUN(pcap_create);
    EXPORT_FUN(pcap_set_snaplen);
    EXPORT_FUN(pcap_set_promisc);
    EXPORT_FUN(pcap_set_timeout);
    EXPORT_FUN(pcap_activate);
    EXPORT_FUN(pcap_setfilter);
    EXPORT_FUN(pcap_setdirection);
    EXPORT_FUN(pcap_datalink);
    EXPORT_FUN(pcap_freecode);
    EXPORT_FUN(pcap_compile);
    EXPORT_FUN(pcap_geterr);
    EXPORT_FUN(pcap_sendpacket);
    EXPORT_FUN(pcap_lookupdev);
    EXPORT_FUN(pcap_findalldevs);
    /*
    pcap_loop = (__typeof__(pcap_loop))GetProcAddress(wpcap, "pcap_loop");
    pcap_create = (__typeof__(pcap_create))GetProcAddress(wpcap, "pcap_create");
    pcap_set_snaplen = (__typeof__(pcap_set_snaplen))GetProcAddress(wpcap, "pcap_set_snaplen");
    pcap_set_promisc = (__typeof__(pcap_set_promisc))GetProcAddress(wpcap, "pcap_set_promisc");
    pcap_set_timeout = (__typeof__(pcap_set_timeout))GetProcAddress(wpcap, "pcap_set_timeout");
    pcap_activate = (__typeof__(pcap_activate))GetProcAddress(wpcap, "pcap_activate");
    pcap_setfilter = (__typeof__(pcap_setfilter))GetProcAddress(wpcap, "pcap_setfilter");
    pcap_setdirection = (__typeof__(pcap_setdirection))GetProcAddress(wpcap, "pcap_setdirection");
    pcap_datalink = (__typeof__(pcap_datalink))GetProcAddress(wpcap, "pcap_datalink");
    pcap_freecode = (__typeof__(pcap_freecode))GetProcAddress(wpcap, "pcap_freecode");
    pcap_compile = (__typeof__(pcap_compile))GetProcAddress(wpcap, "pcap_compile");
    pcap_geterr = (__typeof__(pcap_geterr))GetProcAddress(wpcap, "pcap_geterr");
    pcap_sendpacket = (__typeof__(pcap_sendpacket))GetProcAddress(wpcap, "pcap_sendpacket");
    pcap_lookupdev = (__typeof__(pcap_lookupdev))GetProcAddress(wpcap, "pcap_lookupdev");
    pcap_findalldevs = (__typeof__(pcap_findalldevs))GetProcAddress(wpcap, "pcap_findalldevs");
    //pcap_loop = (__typeof__(pcap_loop))GetProcAddress(wpcap, "pcap_loop");
    //pcap_loop = (__typeof__(pcap_loop))GetProcAddress(wpcap, "pcap_loop");
    //pcap_loop = (__typeof__(pcap_loop))GetProcAddress(wpcap, "pcap_loop");
    */
    return 0;
}


================================================
FILE: pcap_wrapper.h
================================================
#pragma once

//#ifdef __cplusplus
// extern "C" {
//#endif

//#include <sys/time.h>
//#include <stdint.h>

struct bpf_program {
    char a[4096];
};

struct pcap_t {
    char a[4096];
};

typedef unsigned int bpf_u_int32;

typedef struct my_timeval {
    int tv_sec;
    int tv_usec;
} my_timeval;

struct pcap_pkthdr {
    struct my_timeval ts; /* time stamp */
    bpf_u_int32 caplen;   /* length of portion present */
    bpf_u_int32 len;      /* length this packet (off wire) */
};

typedef enum {
    PCAP_D_INOUT = 0,
    PCAP_D_IN,
    PCAP_D_OUT
} pcap_direction_t;

struct pcap_addr {
    struct pcap_addr *next;
    struct sockaddr *addr;      /* address */
    struct sockaddr *netmask;   /* netmask for that address */
    struct sockaddr *broadaddr; /* broadcast address for that address */
    struct sockaddr *dstaddr;   /* P2P destination address for that address */
};

struct pcap_if {
    struct pcap_if *next;
    char *name;        /* name to hand to "pcap_open_live()" */
    char *description; /* textual description of interface, or NULL */
    struct pcap_addr *addresses;
    bpf_u_int32 flags; /* PCAP_IF_ interface flags */
};

typedef struct pcap_if pcap_if_t;
typedef struct pcap_addr pcap_addr_t;

typedef unsigned char u_char;

#define PCAP_ERRBUF_SIZE 256

#define DLT_NULL 0    /* BSD loopback encapsulation */
#define DLT_EN10MB 1  /* Ethernet (10Mb) */
#define DLT_EN3MB 2   /* Experimental Ethernet (3Mb) */
#define DLT_AX25 3    /* Amateur Radio AX.25 */
#define DLT_PRONET 4  /* Proteon ProNET Token Ring */
#define DLT_CHAOS 5   /* Chaos */
#define DLT_IEEE802 6 /* 802.5 Token Ring */
#define DLT_ARCNET 7  /* ARCNET, with BSD-style header */
#define DLT_SLIP 8    /* Serial Line IP */
#define DLT_PPP 9     /* Point-to-point Protocol */
#define DLT_FDDI 10   /* FDDI */
#define DLT_LINUX_SLL 113

#define PCAP_NETMASK_UNKNOWN 0xffffffff

typedef void (*pcap_handler)(u_char *, const struct pcap_pkthdr *,
                             const u_char *);

extern int (*pcap_loop)(pcap_t *, int, pcap_handler, u_char *);

extern int (*pcap_breakloop)(pcap_t *);

extern pcap_t *(*pcap_create)(const char *, char *);

extern int (*pcap_set_snaplen)(pcap_t *, int);
extern int (*pcap_set_promisc)(pcap_t *, int);
extern int (*pcap_can_set_rfmon)(pcap_t *);
extern int (*pcap_set_rfmon)(pcap_t *, int);
extern int (*pcap_set_timeout)(pcap_t *, int);
extern int (*pcap_set_buffer_size)(pcap_t *, int);
extern int (*pcap_activate)(pcap_t *);

extern int (*pcap_setfilter)(pcap_t *, struct bpf_program *);
extern int (*pcap_setdirection)(pcap_t *, pcap_direction_t);

extern int (*pcap_datalink)(pcap_t *);

extern void (*pcap_freecode)(struct bpf_program *);

extern int (*pcap_compile)(pcap_t *, struct bpf_program *, const char *, int,
                           bpf_u_int32);

extern char *(*pcap_geterr)(pcap_t *);
extern int (*pcap_sendpacket)(pcap_t *, const u_char *, int);

extern char *(*pcap_lookupdev)(char *);

extern int (*pcap_findalldevs)(pcap_if_t **, char *);

inline int pcap_set_immediate_mode(pcap_t *, int) {
    return 0;
}

//#ifdef __cplusplus
//}
//#endif

int init_pcap();


================================================
FILE: server.cpp
================================================
/*
 * server.cpp
 *
 *  Created on: Aug 29, 2018
 *      Author: root
 */

#ifndef UDP2RAW_MP

#include "common.h"
#include "network.h"
#include "connection.h"
#include "misc.h"
#include "log.h"
#include "lib/md5.h"
#include "encrypt.h"
#include "fd_manager.h"

int server_on_timer_multi(conn_info_t &conn_info)  // for server. called when a timer is ready in epoll.for server,there will be one timer for every connection
// there is also a global timer for server,but its not handled here
{
    char ip_port[max_addr_len];
    // u32_t ip=conn_info.raw_info.send_info.dst_ip;
    // u32_t port=conn_info.raw_info.send_info.dst_port;

    address_t tmp_addr;
    tmp_addr.from_ip_port_new(raw_ip_version, &conn_info.raw_info.send_info.new_dst_ip, conn_info.raw_info.send_info.dst_port);
    // sprintf(ip_port,"%s:%d",my_ntoa(ip),port);
    tmp_addr.to_str(ip_port);

    // keep_iptables_rule();
    mylog(log_trace, "server timer!\n");
    raw_info_t &raw_info = conn_info.raw_info;

    assert(conn_info.state.server_current_state == server_ready);

    if (conn_info.state.server_current_state == server_ready) {
        conn_info.blob->conv_manager.s.clear_inactive(ip_port);
        /*
        if( get_current_time()-conn_info.last_hb_recv_time>heartbeat_timeout )
        {
                mylog(log_trace,"%lld %lld\n",get_current_time(),conn_info.last_state_time);
                conn_info.server_current_state=server_nothing;

                //conn_manager.current_ready_ip=0;
                //conn_manager.current_ready_port=0;

                mylog(log_info,"changed state to server_nothing\n");
                return 0;
        }*/
        // dont need to do this at server,conn_manger will clear expired connections

        if (get_current_time() - conn_info.last_hb_sent_time < heartbeat_interval) {
            return 0;
        }

        if (hb_mode == 0)
            send_safer(conn_info, 'h', hb_buf, 0);  /////////////send
        else
            send_safer(conn_info, 'h', hb_buf, hb_len);
        conn_info.last_hb_sent_time = get_current_time();

        mylog(log_debug, "heart beat sent<%x,%x>\n", conn_info.my_id, conn_info.oppsite_id);
    } else {
        mylog(log_fatal, "this shouldnt happen!\n");
        myexit(-1);
    }
    return 0;
}
int server_on_raw_recv_ready(conn_info_t &conn_info, char *ip_port, char type, char *data, int data_len)  // called while the state for a connection is server_ready
// receives data and heart beat by recv_safer.
{
    raw_info_t &raw_info = conn_info.raw_info;
    packet_info_t &send_info = conn_info.raw_info.send_info;
    packet_info_t &recv_info = conn_info.raw_info.recv_info;
    // char ip_port[40];

    // sprintf(ip_port,"%s:%d",my_ntoa(recv_info.src_ip),recv_info.src_port);

    /*
            if (recv_info.src_ip != send_info.dst_ip
                            || recv_info.src_port != send_info.dst_port) {
                    mylog(log_debug, "unexpected adress\n");
                    return 0;
            }*/

    if (type == 'h' && data_len >= 0) {
        // u32_t tmp = ntohl(*((u32_t *) &data[sizeof(u32_t)]));
        mylog(log_debug, "[%s][hb]received hb \n", ip_port);
        conn_info.last_hb_recv_time = get_current_time();
        return 0;
    } else if (type == 'd' && data_len >= int(sizeof(u32_t))) {
        // u32_t tmp_conv_id = ntohl(*((u32_t *) &data[0]));
        my_id_t tmp_conv_id;
        memcpy(&tmp_conv_id, &data[0], sizeof(tmp_conv_id));
        tmp_conv_id = ntohl(tmp_conv_id);

        if (hb_mode == 0)
            conn_info.last_hb_recv_time = get_current_time();

        mylog(log_trace, "conv:%u\n", tmp_conv_id);
        if (!conn_info.blob->conv_manager.s.is_conv_used(tmp_conv_id)) {
            if (conn_info.blob->conv_manager.s.get_size() >= max_conv_num) {
                mylog(log_warn,
                      "[%s]ignored new conv %x connect bc max_conv_num exceed\n", ip_port,
                      tmp_conv_id);
                return 0;
            }

            /*
            struct sockaddr_in remote_addr_in={0};

            socklen_t slen = sizeof(sockaddr_in);
            //memset(&remote_addr_in, 0, sizeof(remote_addr_in));
            remote_addr_in.sin_family = AF_INET;
            remote_addr_in.sin_port = htons(remote_port);
            remote_addr_in.sin_addr.s_addr = remote_ip_uint32;


            int new_udp_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);


            if (new_udp_fd < 0) {
                    mylog(log_warn, "[%s]create udp_fd error\n",ip_port);
                    return -1;
            }
            setnonblocking(new_udp_fd);
            set_buf_size(new_udp_fd,socket_buf_size);

            mylog(log_debug, "[%s]created new udp_fd %d\n",ip_port, new_udp_fd);
            int ret = connect(new_udp_fd, (struct sockaddr *) &remote_addr_in,
                            slen);
            if (ret != 0) {
                    mylog(log_warn, "udp fd connect fail\n");
                    close(new_udp_fd);
                    return -1;
            }*/

            int new_udp_fd = remote_addr.new_connected_udp_fd();
            if (new_udp_fd < 0) {
                mylog(log_warn, "[%s]new_connected_udp_fd() failed\n", ip_port);
                return -1;
            }

            struct epoll_event ev;

            fd64_t new_udp_fd64 = fd_manager.create(new_udp_fd);
            fd_manager.get_info(new_udp_fd64).p_conn_info = &conn_info;

            mylog(log_trace, "[%s]u64: %lld\n", ip_port, new_udp_fd64);
            ev.events = EPOLLIN;

            ev.data.u64 = new_udp_fd64;

            int ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, new_udp_fd, &ev);

            if (ret != 0) {
                mylog(log_warn, "[%s]add udp_fd error\n", ip_port);
                close(new_udp_fd);
                return -1;
            }

            conn_info.blob->conv_manager.s.insert_conv(tmp_conv_id, new_udp_fd64);

            // assert(conn_manager.udp_fd_mp.find(new_udp_fd)==conn_manager.udp_fd_mp.end());

            // conn_manager.udp_fd_mp[new_udp_fd] = &conn_info;

            // pack_u64(conn_info.raw_info.recv_info.src_ip,conn_info.raw_info.recv_info.src_port);

            mylog(log_info, "[%s]new conv conv_id=%x, assigned fd=%d\n", ip_port,
                  tmp_conv_id, new_udp_fd);
        }

        fd64_t fd64 = conn_info.blob->conv_manager.s.find_data_by_conv(tmp_conv_id);

        conn_info.blob->conv_manager.s.update_active_time(tmp_conv_id);

        int fd = fd_manager.to_fd(fd64);

        mylog(log_trace, "[%s]received a data from fake tcp,len:%d\n", ip_port, data_len);
        int ret = send(fd, data + sizeof(u32_t),
                       data_len - (sizeof(u32_t)), 0);

        mylog(log_trace, "[%s]%d byte sent  ,fd :%d\n ", ip_port, ret, fd);
        if (ret < 0) {
            mylog(log_warn, "send returned %d\n", ret);
            // perror("what happened????");
        }
        return 0;
    }
    return 0;
}

int server_on_raw_recv_pre_ready(conn_info_t &conn_info, char *ip_port, u32_t tmp_oppsite_const_id)  // do prepare work before state change to server ready for a specifc connection
// connection recovery is also handle here
{
    // u32_t ip;uint16_t port;
    // ip=conn_info.raw_info.recv_info.src_ip;
    // port=conn_info.raw_info.recv_info.src_port;
    // char ip_port[40];
    // sprintf(ip_port,"%s:%d",my_ntoa(ip),port);

    mylog(log_info, "[%s]received handshake oppsite_id:%x  my_id:%x\n", ip_port, conn_info.oppsite_id, conn_info.my_id);

    mylog(log_info, "[%s]oppsite const_id:%x \n", ip_port, tmp_oppsite_const_id);
    if (conn_manager.const_id_mp.find(tmp_oppsite_const_id) == conn_manager.const_id_mp.end()) {
        // conn_manager.const_id_mp=

        if (conn_manager.ready_num >= max_ready_conn_num) {
            mylog(log_info, "[%s]max_ready_conn_num,cant turn to ready\n", ip_port);
            conn_info.state.server_current_state = server_idle;
            return 0;
        }

        conn_info.prepare();
        conn_info.state.server_current_state = server_ready;
        conn_info.oppsite_const_id = tmp_oppsite_const_id;
        conn_manager.ready_num++;
        conn_manager.const_id_mp[tmp_oppsite_const_id] = &conn_info;

        // conn_info.last_state_time=get_current_time(); //dont change this!!!!!!!!!!!!!!!!!!!!!!!!!

        // conn_manager.current_ready_ip=ip;
        // conn_manager.current_ready_port=port;

        // my_id=conn_info.my_id;
        // oppsite_id=conn_info.oppsite_id;
        conn_info.last_hb_recv_time = get_current_time();

        conn_info.last_hb_sent_time = conn_info.last_hb_recv_time;  //=get_current_time()

        if (hb_mode == 0)
            send_safer(conn_info, 'h', hb_buf, 0);  /////////////send
        else
            send_safer(conn_info, 'h', hb_buf, hb_len);

        mylog(log_info, "[%s]changed state to server_ready\n", ip_port);
        conn_info.blob->anti_replay.re_init();

        // g_conn_info=conn_info;
        int new_timer_fd;
        set_timer_server(epollfd, new_timer_fd, conn_info.timer_fd64);

        fd_manager.get_info(conn_info.timer_fd64).p_conn_info = &conn_info;
        // assert(conn_manager.timer_fd_mp.find(new_timer_fd)==conn_manager.timer_fd_mp.end());
        // conn_manager.timer_fd_mp[new_timer_fd] = &conn_info;//pack_u64(ip,port);

        // timer_fd_mp[new_timer_fd]
        /*
         if(oppsite_const_id!=0&&tmp_oppsite_const_id!=oppsite_const_id)  //TODO MOVE TO READY
         {
         mylog(log_info,"cleared all conv bc of const id doesnt match\n");
         conv_manager.clear();
         }*/
        // oppsite_const_id=tmp_oppsite_const_id;
    } else {
        conn_info_t &ori_conn_info = *conn_manager.const_id_mp[tmp_oppsite_const_id];

        if (ori_conn_info.state.server_current_state == server_ready) {
            if (conn_info.last_state_time < ori_conn_info.last_state_time) {
                mylog(log_info, "[%s]conn_info.last_state_time<ori_conn_info.last_state_time. ignored new handshake\n", ip_port);
                conn_info.state.server_current_state = server_idle;
                conn_info.oppsite_const_id = 0;
                return 0;
            }
            address_t addr1;
            addr1.from_ip_port_new(raw_ip_version, &ori_conn_info.raw_info.recv_info.new_src_ip, ori_conn_info.raw_info.recv_info.src_port);
            if (!conn_manager.exist(addr1))  // TODO remove this
            {
                mylog(log_fatal, "[%s]this shouldnt happen\n", ip_port);
                myexit(-1);
            }
            address_t addr2;
            addr2.from_ip_port_new(raw_ip_version, &conn_info.raw_info.recv_info.new_src_ip, conn_info.raw_info.recv_info.src_port);
            if (!conn_manager.exist(addr2))  // TODO remove this
            {
                mylog(log_fatal, "[%s]this shouldnt happen2\n", ip_port);
                myexit(-1);
            }
            conn_info_t *&p_ori = conn_manager.find_insert_p(addr1);
            conn_info_t *&p = conn_manager.find_insert_p(addr2);
            conn_info_t *tmp = p;
            p = p_ori;
            p_ori = tmp;

            mylog(log_info, "[%s]grabbed a connection\n", ip_port);

            // ori_conn_info.state.server_current_state=server_ready;
            ori_conn_info.recover(conn_info);

            // send_safer(ori_conn_info, 'h',hb_buf, hb_len);
            // ori_conn_info.blob->anti_replay.re_init();
            if (hb_mode == 0)
                send_safer(ori_conn_info, 'h', hb_buf, 0);  /////////////send
            else
                send_safer(ori_conn_info, 'h', hb_buf, hb_len);

            ori_conn_info.last_hb_recv_time = get_current_time();

            conn_info.state.server_current_state = server_idle;
            conn_info.oppsite_const_id = 0;

        } else {
            mylog(log_fatal, "[%s]this should never happen\n", ip_port);
            myexit(-1);
        }
        return 0;
    }
    return 0;
}
int server_on_raw_recv_handshake1(conn_info_t &conn_info, char *ip_port, char *data, int data_len)  // called when server received a handshake1 packet from client
{
    packet_info_t &send_info = conn_info.raw_info.send_info;
    packet_info_t &recv_info = conn_info.raw_info.recv_info;
    raw_info_t &raw_info = conn_info.raw_info;

    // u32_t ip=conn_info.raw_info.recv_info.src_ip;
    // uint16_t port=conn_info.raw_info.recv_info.src_port;

    // char ip_port[40];
    // sprintf(ip_port,"%s:%d",my_ntoa(ip),port);

    if (data_len < int(3 * sizeof(my_id_t))) {
        mylog(log_debug, "[%s] data_len=%d too short to be a handshake\n", ip_port, data_len);
        return -1;
    }
    // id_t tmp_oppsite_id=  ntohl(* ((u32_t *)&data[0]));
    my_id_t tmp_oppsite_id;
    memcpy(&tmp_oppsite_id, (u32_t *)&data[0], sizeof(tmp_oppsite_id));
    tmp_oppsite_id = ntohl(tmp_oppsite_id);

    // id_t tmp_my_id=ntohl(* ((u32_t *)&data[sizeof(id_t)]));
    my_id_t tmp_my_id;
    memcpy(&tmp_my_id, &data[sizeof(my_id_t)], sizeof(tmp_my_id));
    tmp_my_id = ntohl(tmp_my_id);

    if (tmp_my_id == 0)  // received  init handshake again
    {
        if (raw_mode == mode_faketcp) {
            send_info.seq = recv_info.ack_seq;
            send_info.ack_seq = recv_info.seq + raw_info.recv_info.data_len;
            send_info.ts_ack = recv_info.ts;
        }
        if (raw_mode == mode_icmp) {
            send_info.my_icmp_seq = recv_info.my_icmp_seq;
        }
        send_handshake(raw_info, conn_info.my_id, tmp_oppsite_id, const_id);  //////////////send

        mylog(log_info, "[%s]changed state to server_handshake1,my_id is %x\n", ip_port, conn_info.my_id);
    } else if (tmp_my_id == conn_info.my_id) {
        conn_info.oppsite_id = tmp_oppsite_id;
        // id_t tmp_oppsite_const_id=ntohl(* ((u32_t *)&data[sizeof(id_t)*2]));

        my_id_t tmp_oppsite_const_id;
        memcpy(&tmp_oppsite_const_id, &data[sizeof(my_id_t) * 2], sizeof(tmp_oppsite_const_id));
        tmp_oppsite_const_id = ntohl(tmp_oppsite_const_id);

        if (raw_mode == mode_faketcp) {
            send_info.seq = recv_info.ack_seq;
            send_info.ack_seq = recv_info.seq + raw_info.recv_info.data_len;
            send_info.ts_ack = recv_info.ts;
        }

        if (raw_mode == mode_icmp) {
            send_info.my_icmp_seq = recv_info.my_icmp_seq;
        }

        server_on_raw_recv_pre_ready(conn_info, ip_port, tmp_oppsite_const_id);

    } else {
        mylog(log_debug, "[%s]invalid my_id %x,my_id is %x\n", ip_port, tmp_my_id, conn_info.my_id);
    }
    return 0;
}
int server_on_recv_safer_multi(conn_info_t &conn_info, char type, char *data, int data_len) {
    return 0;
}
int server_on_raw_recv_multi()  // called when server received an raw packet
{
    char dummy_buf[buf_len];
    raw_info_t peek_raw_info;
    peek_raw_info.peek = 1;
    packet_info_t &peek_info = peek_raw_info.recv_info;
    mylog(log_trace, "got a packet\n");
    if (pre_recv_raw_packet() < 0) return -1;
    if (peek_raw(peek_raw_info) < 0) {
        discard_raw_packet();
        // recv(raw_recv_fd, 0,0, 0  );//
        // struct sockaddr saddr;
        // socklen_t saddr_size=sizeof(saddr);
        /// recvfrom(raw_recv_fd, 0,0, 0 ,&saddr , &saddr_size);//
        mylog(log_trace, "peek_raw failed\n");
        return -1;
    } else {
        mylog(log_trace, "peek_raw success\n");
    }
    // u32_t ip=peek_info.src_ip;uint16_t port=peek_info.src_port;

    int data_len;
    char *data;

    address_t addr;
    addr.from_ip_port_new(raw_ip_version, &peek_info.new_src_ip, peek_info.src_port);

    char ip_port[max_addr_len];
    addr.to_str(ip_port);
    // sprintf(ip_port,"%s:%d",my_ntoa(ip),port);
    mylog(log_trace, "[%s]peek_raw\n", ip_port);

    if (raw_mode == mode_faketcp && peek_info.syn == 1) {
        if (!conn_manager.exist(addr) || conn_manager.find_insert(addr).state.server_current_state != server_ready) {  // reply any syn ,before state become ready

            raw_info_t tmp_raw_info;
            if (recv_raw0(tmp_raw_info, data, data_len) < 0) {
                return 0;
            }
            if (data_len >= max_data_len + 1) {
                mylog(log_debug, "data_len=%d >= max_data_len+1,ignored", data_len);
                return -1;
            }
            if (use_tcp_dummy_socket != 0)
                return 0;
            raw_info_t &raw_info = tmp_raw_info;
            packet_info_t &send_info = raw_info.send_info;
            packet_info_t &recv_info = raw_info.recv_info;

            send_info.new_src_ip = recv_info.new_dst_ip;
            send_info.src_port = recv_info.dst_port;

            send_info.dst_port = recv_info.src_port;
            send_info.new_dst_ip = recv_info.new_src_ip;

            if (lower_level) {
                handle_lower_level(raw_info);
            }

            if (data_len == 0 && raw_info.recv_info.syn == 1 && raw_info.recv_info.ack == 0) {
                send_info.ack_seq = recv_info.seq + 1;

                send_info.psh = 0;
                send_info.syn = 1;
                send_info.ack = 1;
                send_info.ts_ack = recv_info.ts;

                mylog(log_info, "[%s]received syn,sent syn ack back\n", ip_port);
                send_raw0(raw_info, 0, 0);
                return 0;
            }
        } else {
            discard_raw_packet();
            // recv(raw_recv_fd, 0,0,0);
        }
        return 0;
    }
    if (!conn_manager.exist(addr)) {
        if (conn_manager.mp.size() >= max_handshake_conn_num) {
            mylog(log_info, "[%s]reached max_handshake_conn_num,ignored new handshake\n", ip_port);
            discard_raw_packet();
            // recv(raw_recv_fd, 0,0, 0  );//
            return 0;
        }

        raw_info_t tmp_raw_info;

        if (raw_mode == mode_icmp) {
            tmp_raw_info.send_info.dst_port = tmp_raw_info.send_info.src_port = addr.get_port();
        }
        if (recv_bare(tmp_raw_info, data, data_len) < 0) {
            return 0;
        }
        if (data_len < int(3 * sizeof(my_id_t))) {
            mylog(log_debug, "[%s]too short to be a handshake\n", ip_port);
            return -1;
        }

        // id_t zero=ntohl(* ((u32_t *)&data[sizeof(id_t)]));
        my_id_t zero;
        memcpy(&zero, &data[sizeof(my_id_t)], sizeof(zero));
        zero = ntohl(zero);

        if (zero != 0) {
            mylog(log_debug, "[%s]not a invalid initial handshake\n", ip_port);
            return -1;
        }

        mylog(log_info, "[%s]got packet from a new ip\n", ip_port);

        conn_info_t &conn_info = conn_manager.find_insert(addr);
        conn_info.raw_info = tmp_raw_info;
        raw_info_t &raw_info = conn_info.raw_info;

        packet_info_t &send_info = conn_info.raw_info.send_info;
        packet_info_t &recv_info = conn_info.raw_info.recv_info;

        // conn_info.ip_port.ip=ip;
        // conn_info.ip_port.port=port;

        send_info.new_src_ip = recv_info.new_dst_ip;
        send_info.src_port = recv_info.dst_port;

        send_info.dst_port = recv_info.src_port;
        send_info.new_dst_ip = recv_info.new_src_ip;

        if (lower_level) {
            handle_lower_level(raw_info);
        }

        // id_t tmp_oppsite_id=  ntohl(* ((u32_t *)&data[0]));
        // mylog(log_info,"[%s]handshake1 received %x\n",ip_port,tmp_oppsite_id);

        conn_info.my_id = get_true_random_number_nz();

        mylog(log_info, "[%s]created new conn,state: server_handshake1,my_id is %x\n", ip_port, conn_info.my_id);

        conn_info.state.server_current_state = server_handshake1;
        conn_info.last_state_time = get_current_time();

        server_on_raw_recv_handshake1(conn_info, ip_port, data, data_len);
        return 0;
    }

    conn_info_t &conn_info = conn_manager.find_insert(addr);  // insert if not exist
    packet_info_t &send_info = conn_info.raw_info.send_info;
    packet_info_t &recv_info = conn_info.raw_info.recv_info;
    raw_info_t &raw_info = conn_info.raw_info;

    if (conn_info.state.server_current_state == server_handshake1) {
        if (recv_bare(raw_info, data, data_len) != 0) {
            return -1;
        }
        return server_on_raw_recv_handshake1(conn_info, ip_port, data, data_len);
    }
    if (conn_info.state.server_current_state == server_ready) {
        vector<char> type_vec;
        vector<string> data_vec;
        recv_safer_multi(conn_info, type_vec, data_vec);
        if (data_vec.empty()) {
            mylog(log_debug, "recv_safer failed!\n");
            return -1;
        }

        for (int i = 0; i < (int)type_vec.size(); i++) {
            char type = type_vec[i];
            char *data = (char *)data_vec[i].c_str();  // be careful, do not append data to it
            int data_len = data_vec[i].length();
            server_on_raw_recv_ready(conn_info, ip_port, type, data, data_len);
        }
        return 0;
    }

    if (conn_info.state.server_current_state == server_idle) {
        discard_raw_packet();
        // recv(raw_recv_fd, 0,0, 0  );//
        return 0;
    }
    mylog(log_fatal, "we should never run to here\n");
    myexit(-1);
    return -1;
}

int server_on_udp_recv(conn_info_t &conn_info, fd64_t fd64) {
    char buf[buf_len];

    if (conn_info.state.server_current_state != server_ready)  // TODO remove this for peformance
    {
        mylog(log_fatal, "p_conn_info->state.server_current_state!=server_ready!!!this shouldnt happen\n");
        myexit(-1);
    }

    // conn_info_t &conn_info=*p_conn_info;

    assert(conn_info.blob->conv_manager.s.is_data_used(fd64));

    u32_t conv_id = conn_info.blob->conv_manager.s.find_conv_by_data(fd64);

    int fd = fd_manager.to_fd(fd64);

    int recv_len = recv(fd, buf, max_data_len + 1, 0);

    mylog(log_trace, "received a packet from udp_fd,len:%d\n", recv_len);

    if (recv_len == max_data_len + 1) {
        mylog(log_warn, "huge packet, data_len > %d,dropped\n", max_data_len);
        return -1;
    }

    if (recv_len < 0) {
        mylog(log_debug, "udp fd,recv_len<0 continue,%s\n", strerror(errno));
        return -1;
    }

    if (recv_len >= mtu_warn) {
        mylog(log_warn, "huge packet,data len=%d (>=%d).strongly suggested to set a smaller mtu at upper level,to get rid of this warn\n ", recv_len, mtu_warn);
    }

    // conn_info.conv_manager->update_active_time(conv_id);  server dosnt update from upd side,only update from raw side.  (client updates at both side)

    if (conn_info.state.server_current_state == server_ready) {
        send_data_safer(conn_info, buf, recv_len, conv_id);
        // send_data(g_packet_info_send,buf,recv_len,my_id,oppsite_id,conv_id);
        mylog(log_trace, "send_data_safer ,sent !!\n");
    }

    return 0;
}

int server_event_loop() {
    char buf[buf_len];

    int i, j, k;
    int ret;

    if (raw_ip_version == AF_INET) {
        if (local_addr.inner.ipv4.sin_addr.s_addr != 0) {
            bind_addr_used = 1;
            bind_addr.v4 = local_addr.inner.ipv4.sin_addr.s_addr;
        }
    } else {
        assert(raw_ip_version == AF_INET6);
        char zero_arr[16] = {0};
        if (memcmp(&local_addr.inner.ipv6.sin6_addr, zero_arr, 16) != 0) {
            bind_addr_used = 1;
            bind_addr.v6 = local_addr.inner.ipv6.sin6_addr;
        }
    }
    // bind_address_uint32=local_ip_uint32;//only server has bind adress,client sets it to zero

    if (lower_level) {
        if (lower_level_manual) {
            init_ifindex(if_name, raw_send_fd, ifindex);
            mylog(log_info, "we are running at lower-level (manual) mode\n");
        } else {
            mylog(log_info, "we are running at lower-level (auto) mode\n");
        }
    }

    if (raw_mode == mode_faketcp) {
        bind_fd = socket(local_addr.get_type(), SOCK_STREAM, 0);
    } else if (raw_mode == mode_udp || raw_mode == mode_icmp)  // bind an adress to avoid collision,for icmp,there is no port,just bind a udp port
    {
        bind_fd = socket(local_addr.get_type(), SOCK_DGRAM, 0);
    }

    // struct sockaddr_in temp_bind_addr={0};
    // bzero(&temp_bind_addr, sizeof(temp_bind_addr));

    // temp_bind_addr.sin_family = AF_INET;
    // temp_bind_addr.sin_port = local_addr.get_port();
    // temp_bind_addr.sin_addr.s_addr = local_addr.inner.ipv4.sin_addr.s_addr;

    if (bind(bind_fd, (struct sockaddr *)&local_addr.inner, local_addr.get_len()) != 0) {
        mylog(log_fatal, "bind fail\n");
        myexit(-1);
    }

    if (raw_mode == mode_faketcp) {
        if (listen(bind_fd, SOMAXCONN) != 0) {
            mylog(log_fatal, "listen fail\n");
            myexit(-1);
        }
    }

    // init_raw_socket();
    init_filter(local_addr.get_port());  // bpf filter

    epollfd = epoll_create1(0);
    const int max_events = 4096;

    struct epoll_event ev, events[max_events];
    if (epollfd < 0) {
        mylog(log_fatal, "epoll return %d\n", epollfd);
        myexit(-1);
    }

    ev.events = EPOLLIN;
    ev.data.u64 = raw_recv_fd;

    ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, raw_recv_fd, &ev);
    if (ret != 0) {
        mylog(log_fatal, "add raw_fd error\n");
        myexit(-1);
    }
    int timer_fd;

    set_timer(epollfd, timer_fd);

    u64_t begin_time = 0;
    u64_t end_time = 0;

    mylog(log_info, "now listening at %s\n", local_addr.get_str());

    int fifo_fd = -1;

    if (fifo_file[0] != 0) {
        fifo_fd = create_fifo(fifo_file);
        ev.events = EPOLLIN;
        ev.data.u64 = fifo_fd;

        ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, fifo_fd, &ev);
        if (ret != 0) {
            mylog(log_fatal, "add fifo_fd to epoll error %s\n", strerror(errno));
            myexit(-1);
        }
        mylog(log_info, "fifo_file=%s\n", fifo_file);
    }

    while (1)  ////////////////////////
    {
        if (about_to_exit) myexit(0);

        int nfds = epoll_wait(epollfd, events, max_events, 180 * 1000);
        if (nfds < 0) {  // allow zero
            if (errno == EINTR) {
                mylog(log_info, "epoll interrupted by signal,continue\n");
                // myexit(0);
            } else {
                mylog(log_fatal, "epoll_wait return %d,%s\n", nfds, strerror(errno));
                myexit(-1);
            }
        }
        int idx;
        for (idx = 0; idx < nfds; ++idx) {
            // mylog(log_debug,"ndfs:  %d \n",nfds);
            epoll_trigger_counter++;
            // printf("%d %d %d %d\n",timer_fd,raw_recv_fd,raw_send_fd,n);
            if ((events[idx].data.u64) == (u64_t)timer_fd) {
                if (debug_flag) begin_time = get_current_time();
                conn_manager.clear_inactive();
                u64_t dummy;
                int unused = read(timer_fd, &dummy, 8);
                // current_time_rough=get_current_time();
                if (debug_flag) {
                    end_time = get_current_time();
                    mylog(log_debug, "timer_fd,%llu,%llu,%llu\n", begin_time, end_time, end_time - begin_time);
                }

                mylog(log_trace, "epoll_trigger_counter:  %d \n", epoll_trigger_counter);
                epoll_trigger_counter = 0;

            } else if (events[idx].data.u64 == (u64_t)raw_recv_fd) {
                if (debug_flag) begin_time = get_current_time();
                server_on_raw_recv_multi();
                if (debug_flag) {
                    end_time = get_current_time();
                    mylog(log_debug, "raw_recv_fd,%llu,%llu,%llu  \n", begin_time, end_time, end_time - begin_time);
                }
            } else if (events[idx].data.u64 == (u64_t)fifo_fd) {
                int len = read(fifo_fd, buf, sizeof(buf));
                if (len < 0) {
                    mylog(log_warn, "fifo read failed len=%d,errno=%s\n", len, strerror(errno));
                    continue;
                }
                // assert(len>=0);
                buf[len] = 0;
                while (len >= 1 && buf[len - 1] == '\n')
                    buf[len - 1] = 0;
                mylog(log_info, "got data from fifo,len=%d,s=[%s]\n", len, buf);
                mylog(log_info, "unknown command\n");
            } else if (events[idx].data.u64 > u32_t(-1)) {
                fd64_t fd64 = events[idx].data.u64;
                if (!fd_manager.exist(fd64)) {
                    mylog(log_trace, "fd64 no longer exist\n");
                    return -1;
                }
                assert(fd_manager.exist_info(fd64));
                conn_info_t *p_conn_info = fd_manager.get_info(fd64).p_conn_info;
                conn_info_t &conn_info = *p_conn_info;
                if (fd64 == conn_info.timer_fd64)  //////////timer_fd64
                {
                    if (debug_flag) begin_time = get_current_time();
                    int fd = fd_manager.to_fd(fd64);
                    u64_t dummy;
                    int unused = read(fd, &dummy, 8);
                    assert(conn_info.state.server_current_state == server_ready);  // TODO remove this for peformance
                    server_on_timer_multi(conn_info);
                    if (debug_flag) {
                        end_time = get_current_time();
                        mylog(log_debug, "(events[idx].data.u64 >>32u) == 2u ,%llu,%llu,%llu  \n", begin_time, end_time, end_time - begin_time);
                    }
                } else  // udp_fd64
                {
                    if (debug_flag) begin_time = get_current_time();
                    server_on_udp_recv(conn_info, fd64);
                    if (debug_flag) {
                        end_time = get_current_time();
                        mylog(log_debug, "(events[idx].data.u64 >>32u) == 1u,%lld,%lld,%lld  \n", begin_time, end_time, end_time - begin_time);
                    }
                }
            } else {
                mylog(log_fatal, "unknown fd,this should never happen\n");
                myexit(-1);
            }
        }
    }
    return 0;
}

#endif


================================================
FILE: third-party/luci-app-udp2raw/moved_to_new_repo
================================================
https://github.com/sensec/luci-app-udp2raw


================================================
FILE: third-party/udp2raw-cmake-makefile/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.7)
project(udp2raw_tunnel)

set(CMAKE_CXX_STANDARD 11)
set_source_files_properties(lib/aes_faster_c/aes.c lib/aes_faster_c/wrapper.c lib/md5.c PROPERTIES LANGUAGE CXX )

set(SOURCE_FILES
        lib/aes_faster_c/aes.c
	lib/aes_faster_c/wrapper.c
        lib/md5.c
        common.cpp
        encrypt.cpp
        log.cpp
        main.cpp
        network.cpp
        )
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-parameter -Wno-missing-field-initializers  -static")
#set(CMAKE_LINK_LIBRARY_FLAG "-lrt")
add_executable(udp2raw_cmake ${SOURCE_FILES})
target_link_libraries(udp2raw_cmake rt)
target_link_libraries(udp2raw_cmake pthread)


================================================
FILE: third-party/udp2raw-openwrt-makefile/moved_to_new_repo
================================================
https://github.com/sensec/openwrt-udp2raw