Repository: vipshop/vire Branch: master Commit: fdb7db80f875 Files: 174 Total size: 1.6 MB Directory structure: gitextract_y_mdrj8b/ ├── .gitignore ├── ChangeLog ├── LICENSE ├── Makefile.am ├── NOTICE ├── README.md ├── conf/ │ └── vire.conf ├── configure.ac ├── dep/ │ ├── .gitignore │ ├── Makefile.am │ ├── ae/ │ │ ├── Makefile.am │ │ ├── ae.c │ │ ├── ae.h │ │ ├── ae_epoll.c │ │ ├── ae_evport.c │ │ ├── ae_kqueue.c │ │ └── ae_select.c │ ├── darray/ │ │ ├── Makefile.am │ │ ├── darray.c │ │ └── darray.h │ ├── dhashkit/ │ │ ├── Makefile.am │ │ ├── dcrc16.c │ │ ├── dcrc32.c │ │ ├── dfnv.c │ │ ├── dhashkit.h │ │ ├── dhsieh.c │ │ ├── djenkins.c │ │ ├── dketama.c │ │ ├── dmd5.c │ │ ├── dmodula.c │ │ ├── dmurmur.c │ │ ├── done_at_a_time.c │ │ ├── drandom.c │ │ └── dsha1.c │ ├── dlist/ │ │ ├── Makefile.am │ │ ├── dlist.c │ │ ├── dlist.h │ │ ├── dlockqueue.c │ │ ├── dlockqueue.h │ │ ├── dmtqueue.c │ │ └── dmtqueue.h │ ├── dmalloc/ │ │ ├── Makefile.am │ │ ├── dmalloc.c │ │ └── dmalloc.h │ ├── himemcached-0.1.0/ │ │ ├── Makefile.am │ │ ├── himcdep/ │ │ │ ├── sds.c │ │ │ └── sds.h │ │ ├── himcread.c │ │ ├── himcread.h │ │ ├── himemcached.c │ │ └── himemcached.h │ ├── hiredis-0.13.3/ │ │ └── .gitignore │ ├── jemalloc-4.2.0/ │ │ └── .gitignore │ ├── jemalloc-4.2.0.tar.bz2 │ ├── sds/ │ │ ├── Makefile.am │ │ ├── sds.c │ │ ├── sds.h │ │ └── sdsalloc.h │ └── util/ │ ├── Makefile.am │ ├── dlog.c │ ├── dlog.h │ ├── dspecialconfig.h │ ├── dutil.c │ └── dutil.h ├── m4/ │ └── .gitignore ├── notes/ │ ├── c-styleguide.txt │ ├── debug.txt │ └── socket.txt ├── scripts/ │ └── .gitignore ├── src/ │ ├── Makefile.am │ ├── vr.c │ ├── vr_aof.c │ ├── vr_aof.h │ ├── vr_backend.c │ ├── vr_backend.h │ ├── vr_bitops.c │ ├── vr_bitops.h │ ├── vr_block.c │ ├── vr_block.h │ ├── vr_client.c │ ├── vr_client.h │ ├── vr_command.c │ ├── vr_command.h │ ├── vr_conf.c │ ├── vr_conf.h │ ├── vr_connection.c │ ├── vr_connection.h │ ├── vr_core.c │ ├── vr_core.h │ ├── vr_db.c │ ├── vr_db.h │ ├── vr_dict.c │ ├── vr_dict.h │ ├── vr_eventloop.c │ ├── vr_eventloop.h │ ├── vr_hyperloglog.c │ ├── vr_hyperloglog.h │ ├── vr_intset.c │ ├── vr_intset.h │ ├── vr_listen.c │ ├── vr_listen.h │ ├── vr_lzf.h │ ├── vr_lzfP.h │ ├── vr_lzf_c.c │ ├── vr_lzf_d.c │ ├── vr_master.c │ ├── vr_master.h │ ├── vr_multi.c │ ├── vr_multi.h │ ├── vr_notify.c │ ├── vr_notify.h │ ├── vr_object.c │ ├── vr_object.h │ ├── vr_pubsub.c │ ├── vr_pubsub.h │ ├── vr_quicklist.c │ ├── vr_quicklist.h │ ├── vr_rbtree.c │ ├── vr_rbtree.h │ ├── vr_rdb.c │ ├── vr_rdb.h │ ├── vr_replication.c │ ├── vr_replication.h │ ├── vr_scripting.c │ ├── vr_scripting.h │ ├── vr_server.c │ ├── vr_server.h │ ├── vr_signal.c │ ├── vr_signal.h │ ├── vr_slowlog.c │ ├── vr_slowlog.h │ ├── vr_stats.c │ ├── vr_stats.h │ ├── vr_t_hash.c │ ├── vr_t_hash.h │ ├── vr_t_list.c │ ├── vr_t_list.h │ ├── vr_t_set.c │ ├── vr_t_set.h │ ├── vr_t_string.c │ ├── vr_t_string.h │ ├── vr_t_zset.c │ ├── vr_t_zset.h │ ├── vr_thread.c │ ├── vr_thread.h │ ├── vr_util.c │ ├── vr_util.h │ ├── vr_worker.c │ ├── vr_worker.h │ ├── vr_ziplist.c │ ├── vr_ziplist.h │ ├── vr_zipmap.c │ └── vr_zipmap.h ├── tests/ │ ├── .gitignore │ ├── Makefile.am │ ├── vrabtest.c │ ├── vrabtest.h │ ├── vrt_backend.c │ ├── vrt_backend.h │ ├── vrt_benchmark.c │ ├── vrt_check_data.c │ ├── vrt_check_data.h │ ├── vrt_dispatch_data.c │ ├── vrt_dispatch_data.h │ ├── vrt_produce_data.c │ ├── vrt_produce_data.h │ ├── vrt_public.c │ ├── vrt_public.h │ ├── vrt_simple.c │ ├── vrt_simple.h │ ├── vrt_util.c │ ├── vrt_util.h │ └── vrtest.c └── tools/ └── .gitignore ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # pyc *.pyc # Compiled Object files *.lo *.o # Compiled Dynamic libraries *.so # Compiled Static libraries *.la *.a # Compiled misc *.dep *.gcda *.gcno *.gcov # Packages *.tar.gz *.tar.bz2 # Logs *.log # vire *.swp *.~ *.project *.cproject # Core and executable core* vire # extracted jemalloc !/dep/jemalloc-* # Autotools .deps .libs /aclocal.m4 /autom4te.cache /stamp-h1 /autoscan.log /libtool /config/config.guess /config/config.sub /config/depcomp /config/install-sh /config/ltmain.sh /config/missing /config /config.h /config.h.in /config.h.in~ /config.log /config.status /configure.scan /configure Makefile Makefile.in ================================================ FILE: ChangeLog ================================================ 2016-10-25 deep011 * vire: version 1.0.0 release vire (pronounced "vip-redis") is a multithread redis(based on redis-3.2.0) maintains in vipshop. multi-threads support. command type CONNECTION supported: ping,quit,echo,select,auth,admin. command type SERVER supported: info,flushall,flushdb,time,dbsize,command,config,client,slowlog. command type KEY supported: del,exists,ttl,pttl,expire,expireat,pexpire,pexpireat,persist,randomkey,type,keys,scan,object. command type STRING supported: get,set,setnx,setex,psetex,incr,decr,incrby,decrby,append,strlen,getset,incrbyfloat,setbit,getbit,setrange,getrange,bitcount,bitpos,mget,mset. command type HASH supported: hset,hget,hlen,hdel,hexists,hkeys,hvals,hgetall,hincrby,hincrbyfloat,hmget,hmset,hsetnx,hstrlen,hscan. command type LIST supported: rpush,lpush,lrange,rpop,lpop,llen,lrem,ltrim,lindex,lset. command type SET supported: sadd,smembers,scard,srem,spop,sismember,sscan,sunion,sunionstore,sdiff,sdiffstore,sinter,sinterstore. command type SORTEDSET supported: zadd,zincrby,zrange,zrevrange,zrem,zcard,zcount,zrangebyscore,zrevrangebyscore,zrank,zrevrank,zscore,zremrangebyscore,zremrangebyrank,zremrangebylex,zscan. command type HYPERLOGLOG supported: pfadd,pfcount. config option added(used for config file and 'config get/set' command): port,databases,internal-dbs-per-databases,requirepass,adminpass,commands-need-adminpass,maxclients,maxmemory,maxmemory-policy,maxmemory-samples,max-time-complexity-limit,slowlog-log-slower-than,slowlog-max-len. viretest added that is for unit test. vireabtest added that is for compare command execution and data consistency with redis-3.2.0. vire-benchmark added that is modified from redis-benchmark but multi-threads supported and pressure test vire. ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS ================================================ FILE: Makefile.am ================================================ MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure config.h.in config.h.in~ stamp-h.in ACLOCAL_AMFLAGS = -I m4 SUBDIRS = dep src tests EXTRA_DIST = README.md NOTICE LICENSE ChangeLog conf scripts notes ================================================ FILE: NOTICE ================================================ ================================================ FILE: README.md ================================================ # vire **vire** (pronounced "vip-redis") is a multithread redis(based on redis-3.2.0) maintains in vipshop. ### QQ交流群:276406429 ## Dependence Please install automake, libtool, autoconf and bzip2 at first. ## Build To build vire from source with _debug logs enabled_ and _assertions enabled_: $ git clone https://github.com/vipshop/vire.git $ cd vire $ autoreconf -fvi $ ./configure --enable-debug=full $ make $ src/vire -h A quick checklist: + Use newer version of gcc (older version of gcc has problems) + Use CFLAGS="-O1" ./configure && make + Use CFLAGS="-O3 -fno-strict-aliasing" ./configure && make + `autoreconf -fvi && ./configure` needs `automake` and `libtool` to be installed ## Run $ src/vire -c conf/vire.conf -o log -T 6 -d ## Features + Multithread. + Fast. + Works with Linux, *BSD, OS X and SmartOS (Solaris) ## Help Usage: vire [-?hVdt] [-v verbosity level] [-o output file] [-c conf file] [-p pid file] [-T worker threads number] Options: -h, --help : this help -V, --version : show version and exit -t, --test-conf : test configuration for syntax errors and exit -d, --daemonize : run as a daemon -v, --verbose=N : set logging level (default: 5, min: 0, max: 11) -o, --output=S : set logging file (default: stderr) -c, --conf-file=S : set configuration file (default: conf/vire.conf) -p, --pid-file=S : set pid file (default: off) -T, --thread_num=N : set the worker threads number (default: 6) ## Support redis command so far #### Connection + ping + quit + echo + select + auth + admin #### Server + info + flushall + flushdb + time + dbsize + command + config + client + slowlog #### Key + del + exists + ttl + pttl + expire + expireat + pexpire + pexpireat + persist + randomkey + type + keys + scan + object #### String + get + set + setnx + setex + psetex + incr + decr + incrby + decrby + append + strlen + getset + incrbyfloat + setbit + getbit + setrange + getrange + bitcount + bitpos + mget + mset #### Hash + hset + hget + hlen + hdel + hexists + hkeys + hvals + hgetall + hincrby + hincrbyfloat + hmget + hmset + hsetnx + hstrlen + hscan #### List + rpush + lpush + lrange + rpop + lpop + llen + lrem + ltrim + lindex + lset #### Set + sadd + smembers + scard + srem + spop + sismember + sscan + sunion + sunionstore + sdiff + sdiffstore + sinter + sinterstore #### SortedSet + zadd + zincrby + zrange + zrevrange + zrem + zcard + zcount + zrangebyscore + zrevrangebyscore + zrank + zrevrank + zscore + zremrangebyscore + zremrangebyrank + zremrangebylex + zscan #### HyperLogLog + pfadd + pfcount ## License Copyright © 2016 VIPSHOP Inc. Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 ================================================ FILE: conf/vire.conf ================================================ ################################## NETWORK ##################################### # By default, if no "bind" configuration directive is specified, Vire listens # for connections from all the network interfaces available on the server. # It is possible to listen to just one or multiple selected interfaces using # the "bind" configuration directive, followed by one or more IP addresses. # # Examples: # # bind 192.168.1.100 10.0.0.1 # bind 127.0.0.1 ::1 # # ~~~ WARNING ~~~ If the computer running Vire is directly exposed to the # internet, binding to all the interfaces is dangerous and will expose the # instance to everybody on the internet. So by default we uncomment the # following bind directive, that will force Vire to listen only into # the IPv4 lookback interface address (this means Vire will be able to # accept connections only from clients running into the same computer it # is running). # # IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES # JUST COMMENT THE FOLLOWING LINE. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # bind 127.0.0.1 # Accept connections on the specified port, default is 55555. # If port 0 is specified Vire will not listen on a TCP socket. port 55555 ################################# GENERAL ##################################### # Set the number of databases. The default database is DB 0, you can select # a different one on a per-connection basis using SELECT where # dbid is a number between 0 and 'databases'-1 databases 6 # Set the number of internal dbs for every databases. This is used for # multi-threads avoid too much locker competition. internal-dbs-per-databases 6 ################################## SECURITY ################################### # Require clients to issue AUTH before processing any other # commands. This might be useful in environments in which you do not trust # others with access to the host running redis-server. # # This should stay commented out for backward compatibility and because most # people do not need auth (e.g. they run their own servers). # # Warning: since Redis is pretty fast an outside user can try up to # 150k passwords per second against a good box. This means that you should # use a very strong password otherwise it will be very easy to break. # # requirepass foobared # Require clients to issue ADMIN before processing any other # needed admin right commands. This might be useful to prevent users from # doing some dangerous actions to the host running Vire. # # This should stay commented out for backward compatibility and because most # people do not need auth (e.g. they run their own servers). # # Warning: since Redis is pretty fast an outside user can try up to # 150k passwords per second against a good box. This means that you should # use a very strong password otherwise it will be very easy to break. # # adminpass iamadmin # Make some commands need adminpass to execute. Those commands just allowed # administrator to execute. This might be useful to prevent users from # doing some dangerous actions to the host running Vire. # # commands-need-adminpass flushall flushdb keys config ################################### CLIENTS #################################### # Set the max number of connected clients at the same time. By default # this limit is set to 10000 clients, however if the Redis server is not # able to configure the process file limit to allow for the specified limit # the max number of allowed clients is set to the current file limit # minus 32 (as Redis reserves a few file descriptors for internal uses). # # Once the limit is reached Redis will close all the new connections sending # an error 'max number of clients reached'. # # maxclients 10000 ############################## MEMORY MANAGEMENT ################################ # Don't use more memory than the specified amount of bytes. # When the memory limit is reached Vire will try to remove keys # according to the eviction policy selected (see maxmemory-policy). # # If Vire can't remove keys according to the policy, or if the policy is # set to 'noeviction', Vire will start to reply with errors to commands # that would use more memory, like SET, LPUSH, and so on, and will continue # to reply to read-only commands like GET. # # This option is usually useful when using Vire as an LRU cache, or to set # a hard memory limit for an instance (using the 'noeviction' policy). # # WARNING: If you have slaves attached to an instance with maxmemory on, # the size of the output buffers needed to feed the slaves are subtracted # from the used memory count, so that network problems / resyncs will # not trigger a loop where keys are evicted, and in turn the output # buffer of slaves is full with DELs of keys evicted triggering the deletion # of more keys, and so forth until the database is completely emptied. # # In short... if you have slaves attached it is suggested that you set a lower # limit for maxmemory so that there is some free RAM on the system for slave # output buffers (but this is not needed if the policy is 'noeviction'). # # maxmemory # MAXMEMORY POLICY: how Vire will select what to remove when maxmemory # is reached. You can select among five behaviors: # # volatile-lru -> remove the key with an expire set using an LRU algorithm, not support now # allkeys-lru -> remove any key according to the LRU algorithm, not support now # volatile-random -> remove a random key with an expire set # allkeys-random -> remove a random key, any key # volatile-ttl -> remove the key with the nearest expire time (minor TTL) # noeviction -> don't expire at all, just return an error on write operations # # Note: with any of the above policies, Vire will return an error on write # operations, when there are no suitable keys for eviction. # # At the date of writing these commands are: set setnx setex append # incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd # sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby # zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby # getset mset msetnx exec sort # # The default is: # # maxmemory-policy noeviction # LRU and minimal TTL algorithms are not precise algorithms but approximated # algorithms (in order to save memory), so you can tune it for speed or # accuracy. For default Vire will check five keys and pick the one that was # used less recently, you can change the sample size using the following # configuration directive. # # The default of 5 produces good enough results. 10 Approximates very closely # true LRU but costs a bit more CPU. 3 is very fast but not very accurate. # # maxmemory-samples 5 # Max time complexity limit for the commands that their time complexity is O(n). # # If n is bigger than max-time-complexity-limit, an error is returned for the client. # The default of 0 means unlimited. # # At the date of affected commands are: keys # # max-time-complexity-limit 0 ################################## SLOW LOG ################################### # The Vire Slow Log is a system to log queries that exceeded a specified # execution time. The execution time does not include the I/O operations # like talking with the client, sending the reply and so forth, # but just the time needed to actually execute the command (this is the only # stage of command execution where the thread is blocked and can not serve # other requests in the meantime). # # You can configure the slow log with two parameters: one tells Vire # what is the execution time, in microseconds, to exceed in order for the # command to get logged, and the other parameter is the length of the # slow log. When a new command is logged the oldest one is removed from the # queue of logged commands. # The following time is expressed in microseconds, so 1000000 is equivalent # to one second. Note that a negative number disables the slow log, while # a value of zero forces the logging of every command. slowlog-log-slower-than 10000 # There is no limit to this length. Just be aware that it will consume memory. # You can reclaim memory used by the slow log with SLOWLOG RESET. slowlog-max-len 128 ================================================ FILE: configure.ac ================================================ # Define the package version numbers and the bug reporting address m4_define([VR_MAJOR], 1) m4_define([VR_MINOR], 0) m4_define([VR_PATCH], 0) m4_define([VR_BUGS], [diguo58@gmail.com]) # Initialize autoconf AC_PREREQ([2.63]) AC_INIT([vire], [VR_MAJOR.VR_MINOR.VR_PATCH], [VR_BUGS]) AC_CONFIG_SRCDIR([src/vr.c]) AC_CONFIG_AUX_DIR([config]) AC_CONFIG_HEADERS([config.h:config.h.in]) AC_CONFIG_MACRO_DIR([m4]) # Initialize automake AM_INIT_AUTOMAKE([1.9 foreign]) # Define macro variables for the package version numbers AC_DEFINE(VR_VERSION_MAJOR, VR_MAJOR, [Define the major version number]) AC_DEFINE(VR_VERSION_MINOR, VR_MINOR, [Define the minor version number]) AC_DEFINE(VR_VERSION_PATCH, VR_PATCH, [Define the patch version number]) AC_DEFINE(VR_VERSION_STRING, "VR_MAJOR.VR_MINOR.VR_PATCH", [Define the version string]) # Checks for language AC_LANG([C]) # Checks for programs AC_PROG_AWK AC_PROG_CC AC_PROG_CPP AC_PROG_CXX AC_PROG_INSTALL AC_PROG_LN_S AC_PROG_MAKE_SET AC_PROG_RANLIB AC_PROG_LIBTOOL # Checks for typedefs, structures, and compiler characteristics AC_C_INLINE AC_TYPE_INT8_T AC_TYPE_INT16_T AC_TYPE_INT32_T AC_TYPE_INT64_T AC_TYPE_INTMAX_T AC_TYPE_INTPTR_T AC_TYPE_UINT8_T AC_TYPE_UINT16_T AC_TYPE_UINT32_T AC_TYPE_UINT64_T AC_TYPE_UINTMAX_T AC_TYPE_UINTPTR_T AC_TYPE_OFF_T AC_TYPE_PID_T AC_TYPE_SIZE_T AC_TYPE_SSIZE_T AC_C_BIGENDIAN( [], [AC_DEFINE(HAVE_LITTLE_ENDIAN, 1, [Define to 1 if machine is little endian])], [AC_MSG_ERROR([endianess of this machine is unknown])], [AC_MSG_ERROR([universial endianess not supported])] ) # Checks for header files AC_HEADER_STDBOOL AC_CHECK_HEADERS([fcntl.h float.h limits.h stddef.h stdlib.h string.h unistd.h]) AC_CHECK_HEADERS([inttypes.h stdint.h]) AC_CHECK_HEADERS([sys/ioctl.h sys/time.h sys/uio.h]) AC_CHECK_HEADERS([sys/socket.h sys/un.h netinet/in.h arpa/inet.h netdb.h]) AC_CHECK_HEADERS([execinfo.h], [AC_DEFINE(HAVE_BACKTRACE, [1], [Define to 1 if backtrace is supported])], []) AC_CHECK_HEADERS([sys/epoll.h], [], []) AC_CHECK_HEADERS([sys/event.h], [], []) # Checks for libraries AC_CHECK_LIB([m], [pow]) AC_CHECK_LIB([pthread], [pthread_create]) # Checks for library functions AC_FUNC_FORK AC_FUNC_MALLOC AC_FUNC_REALLOC AC_CHECK_FUNCS([dup2 gethostname gettimeofday strerror]) AC_CHECK_FUNCS([socket]) AC_CHECK_FUNCS([memchr memmove memset]) AC_CHECK_FUNCS([strchr strndup strtoul]) AC_CACHE_CHECK([if epoll works], [ac_cv_epoll_works], AC_TRY_RUN([ #include #include #include int main(int argc, char **argv) { int fd; fd = epoll_create(256); if (fd < 0) { perror("epoll_create:"); exit(1); } exit(0); } ], [ac_cv_epoll_works=yes], [ac_cv_epoll_works=no])) AS_IF([test "x$ac_cv_epoll_works" = "xyes"], [AC_DEFINE([HAVE_EPOLL], [1], [Define to 1 if epoll is supported])], []) AC_CACHE_CHECK([if kqueue works], [ac_cv_kqueue_works], AC_TRY_RUN([ #include #include #include #include #include int main(int argc, char **argv) { int fd; fd = kqueue(); if (fd < 0) { perror("kqueue:"); exit(1); } exit(0); } ], [ac_cv_kqueue_works=yes], [ac_cv_kqueue_works=no])) AS_IF([test "x$ac_cv_kqueue_works" = "xyes"], [AC_DEFINE([HAVE_KQUEUE], [1], [Define to 1 if kqueue is supported])], []) AC_CACHE_CHECK([if event ports works], [ac_cv_evports_works], AC_TRY_RUN([ #include #include #include int main(int argc, char **argv) { int fd; fd = port_create(); if (fd < 0) { perror("port_create:"); exit(1); } exit(0); } ], [ac_cv_evports_works=yes], [ac_cv_evports_works=no])) AS_IF([test "x$ac_cv_evports_works" = "xyes"], [AC_DEFINE([HAVE_EVENT_PORTS], [1], [Define to 1 if event ports is supported])], []) AS_IF([test "x$ac_cv_epoll_works" = "xno" && test "x$ac_cv_kqueue_works" = "xno" && test "x$ac_cv_evports_works" = "xno"], [AC_MSG_ERROR([either epoll or kqueue or event ports support is required])], []) AM_CONDITIONAL([OS_LINUX], [test "x$ac_cv_epoll_works" = "xyes"]) AM_CONDITIONAL([OS_BSD], [test "x$ac_cv_kqueue_works" = "xyes"]) AM_CONDITIONAL([OS_SOLARIS], [test "x$ac_cv_evports_works" = "xyes"]) AM_CONDITIONAL([OS_FREEBSD], [test "$(uname -v | cut -c 1-10)" == "FreeBSD 10"]) AM_CONDITIONAL([OS_DARWIN], [test "$(uname -v | cut -c 1-6)" == "Darwin"]) # Package options AC_MSG_CHECKING([whether to enable debug logs and asserts]) AC_ARG_ENABLE([debug], [AS_HELP_STRING( [--enable-debug=@<:@full|yes|log|no@:>@], [enable debug logs and asserts @<:@default=no@:>@]) ], [], [enable_debug=no]) AS_CASE([x$enable_debug], [xfull], [AC_DEFINE([HAVE_ASSERT_PANIC], [1], [Define to 1 if panic on an assert is enabled]) AC_DEFINE([HAVE_DEBUG_LOG], [1], [Define to 1 if debug log is enabled]) ], [xyes], [AC_DEFINE([HAVE_ASSERT_LOG], [1], [Define to 1 if log on an assert is enabled]) AC_DEFINE([HAVE_DEBUG_LOG], [1], [Define to 1 if debug log is enabled]) ], [xlog], [AC_DEFINE([HAVE_DEBUG_LOG], [1], [Define to 1 if debug log is enabled])], [xno], [], [AC_MSG_FAILURE([invalid value ${enable_debug} for --enable-debug])]) AC_MSG_RESULT($enable_debug) AC_MSG_CHECKING([whether to disable spinlock]) AC_ARG_ENABLE([spinlock], [AS_HELP_STRING( [--disable-spinlock], [disable spinlock]) ], [disable_spinlock=yes], [disable_spinlock=no]) AS_IF([test "x$disable_spinlock" = xyes], [], [AC_DEFINE([HAVE_SPINLOCK], [1], [Define to 1 if spinlock is not disabled])]) AC_MSG_RESULT($disable_spinlock) AC_MSG_CHECKING([whether to use jemalloc]) AC_ARG_WITH([jemalloc], AS_HELP_STRING([--with-jemalloc@<:@=yes|no@:>@], [use jemalloc(default use jemalloc)]), [ if test "$withval" = "no"; then enable_jemalloc=no else enable_jemalloc=yes fi ], [enable_jemalloc=yes]) AS_IF([test "x$enable_jemalloc" = xyes], [AC_DEFINE([HAVE_JEMALLOC], [1], [Define to 1 if jemalloc is used])], []) AC_MSG_RESULT($enable_jemalloc) # Untar the jemalloc-4.2.0.tar.bz2 in dep/ before config.status is rerun # Run configure in dep/jemalloc-4.2.0 AC_CONFIG_COMMANDS_PRE([rm -rf dep/jemalloc-4.2.0]) AC_CONFIG_COMMANDS_PRE([mkdir dep/jemalloc-4.2.0]) AC_CONFIG_COMMANDS_PRE([tar xvjf dep/jemalloc-4.2.0.tar.bz2 -C dep]) AC_CONFIG_COMMANDS_PRE([cd dep/jemalloc-4.2.0]) AC_CONFIG_COMMANDS_PRE([./configure --with-jemalloc-prefix=je_]) AC_CONFIG_COMMANDS_PRE([cd ../..]) # Untar the hiredis-0.13.3.tar.gz in dep/ before config.status is rerun AC_CONFIG_COMMANDS_PRE([rm -rf dep/hiredis-0.13.3]) AC_CONFIG_COMMANDS_PRE([mkdir dep/hiredis-0.13.3]) AC_CONFIG_COMMANDS_PRE([tar zxvf dep/hiredis-0.13.3.tar.gz -C dep]) AC_CONFIG_COMMANDS_PRE([cd dep/hiredis-0.13.3]) AC_CONFIG_COMMANDS_PRE([cd ../..]) # Define Makefiles AC_CONFIG_FILES([Makefile dep/Makefile dep/util/Makefile dep/dhashkit/Makefile dep/dmalloc/Makefile dep/sds/Makefile dep/ae/Makefile dep/dlist/Makefile dep/darray/Makefile dep/himemcached-0.1.0/Makefile src/Makefile tests/Makefile]) # Generate the "configure" script AC_OUTPUT ================================================ FILE: dep/.gitignore ================================================ !*.tar.gz ================================================ FILE: dep/Makefile.am ================================================ SUBDIRS = jemalloc-4.2.0 hiredis-0.13.3 himemcached-0.1.0 util dhashkit dmalloc ae sds dlist darray EXTRA_DIST = jemalloc-4.2.0.tar.bz2 hiredis-0.13.3.tar.gz ================================================ FILE: dep/ae/Makefile.am ================================================ MAINTAINERCLEANFILES = Makefile.in AM_CPPFLAGS = if !OS_SOLARIS AM_CPPFLAGS += -D_GNU_SOURCE endif AM_CPPFLAGS += -I $(top_srcdir)/dep/util AM_CPPFLAGS += -I $(top_srcdir)/dep/jemalloc-4.2.0/include AM_CPPFLAGS += -I $(top_srcdir)/dep/dmalloc AM_CFLAGS = -Wall -Wshadow AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value noinst_LIBRARIES = libae.a noinst_HEADERS = ae.h libae_a_SOURCES = \ ae.c ae.h ================================================ FILE: dep/ae/ae.c ================================================ /* A simple event-driven programming library. Originally I wrote this code * for the Jim's event-loop (Jim is a Tcl interpreter) but later translated * it in form of a library for easy reuse. * * Copyright (c) 2006-2010, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_CONFIG_H # include #endif /* Include the best multiplexing layer supported by this system. * The following should be ordered by performances, descending. */ #ifdef HAVE_EVENT_PORTS #include "ae_evport.c" #else #ifdef HAVE_EPOLL #include "ae_epoll.c" #else #ifdef HAVE_KQUEUE #include "ae_kqueue.c" #else #include "ae_select.c" #endif #endif #endif aeEventLoop *aeCreateEventLoop(int setsize) { aeEventLoop *eventLoop; int i; if ((eventLoop = dalloc(sizeof(*eventLoop))) == NULL) goto err; eventLoop->events = dalloc(sizeof(aeFileEvent)*setsize); eventLoop->fired = dalloc(sizeof(aeFiredEvent)*setsize); if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err; eventLoop->setsize = setsize; eventLoop->lastTime = time(NULL); eventLoop->timeEventHead = NULL; eventLoop->timeEventNextId = 0; eventLoop->stop = 0; eventLoop->maxfd = -1; eventLoop->beforesleep = NULL; eventLoop->bsdata = NULL; if (aeApiCreate(eventLoop) == -1) goto err; /* Events with mask == AE_NONE are not set. So let's initialize the * vector with it. */ for (i = 0; i < setsize; i++) eventLoop->events[i].mask = AE_NONE; return eventLoop; err: if (eventLoop) { dfree(eventLoop->events); dfree(eventLoop->fired); dfree(eventLoop); } return NULL; } /* Return the current set size. */ int aeGetSetSize(aeEventLoop *eventLoop) { return eventLoop->setsize; } /* Resize the maximum set size of the event loop. * If the requested set size is smaller than the current set size, but * there is already a file descriptor in use that is >= the requested * set size minus one, AE_ERR is returned and the operation is not * performed at all. * * Otherwise AE_OK is returned and the operation is successful. */ int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) { int i; if (setsize == eventLoop->setsize) return AE_OK; if (eventLoop->maxfd >= setsize) return AE_ERR; if (aeApiResize(eventLoop,setsize) == -1) return AE_ERR; eventLoop->events = drealloc(eventLoop->events,sizeof(aeFileEvent)*setsize); eventLoop->fired = drealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize); eventLoop->setsize = setsize; /* Make sure that if we created new slots, they are initialized with * an AE_NONE mask. */ for (i = eventLoop->maxfd+1; i < setsize; i++) eventLoop->events[i].mask = AE_NONE; return AE_OK; } void aeDeleteEventLoop(aeEventLoop *eventLoop) { aeApiFree(eventLoop); dfree(eventLoop->events); dfree(eventLoop->fired); dfree(eventLoop); } void aeStop(aeEventLoop *eventLoop) { eventLoop->stop = 1; } int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask, aeFileProc *proc, void *clientData) { if (fd >= eventLoop->setsize) { if (aeResizeSetSize(eventLoop,fd+1000) != AE_OK) { return AE_ERR; } } aeFileEvent *fe = &eventLoop->events[fd]; if (aeApiAddEvent(eventLoop, fd, mask) == -1) return AE_ERR; fe->mask |= mask; if (mask & AE_READABLE) fe->rfileProc = proc; if (mask & AE_WRITABLE) fe->wfileProc = proc; fe->clientData = clientData; if (fd > eventLoop->maxfd) eventLoop->maxfd = fd; return AE_OK; } void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask) { if (fd >= eventLoop->setsize) return; aeFileEvent *fe = &eventLoop->events[fd]; if (fe->mask == AE_NONE) return; aeApiDelEvent(eventLoop, fd, mask); fe->mask = fe->mask & (~mask); if (fd == eventLoop->maxfd && fe->mask == AE_NONE) { /* Update the max fd */ int j; for (j = eventLoop->maxfd-1; j >= 0; j--) if (eventLoop->events[j].mask != AE_NONE) break; eventLoop->maxfd = j; } } int aeGetFileEvents(aeEventLoop *eventLoop, int fd) { if (fd >= eventLoop->setsize) return 0; aeFileEvent *fe = &eventLoop->events[fd]; return fe->mask; } static void aeGetTime(long *seconds, long *milliseconds) { struct timeval tv; gettimeofday(&tv, NULL); *seconds = tv.tv_sec; *milliseconds = tv.tv_usec/1000; } static void aeAddMillisecondsToNow(long long milliseconds, long *sec, long *ms) { long cur_sec, cur_ms, when_sec, when_ms; aeGetTime(&cur_sec, &cur_ms); when_sec = cur_sec + milliseconds/1000; when_ms = cur_ms + milliseconds%1000; if (when_ms >= 1000) { when_sec ++; when_ms -= 1000; } *sec = when_sec; *ms = when_ms; } long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds, aeTimeProc *proc, void *clientData, aeEventFinalizerProc *finalizerProc) { long long id = eventLoop->timeEventNextId++; aeTimeEvent *te; te = dalloc(sizeof(*te)); if (te == NULL) return AE_ERR; te->id = id; aeAddMillisecondsToNow(milliseconds,&te->when_sec,&te->when_ms); te->timeProc = proc; te->finalizerProc = finalizerProc; te->clientData = clientData; te->next = eventLoop->timeEventHead; eventLoop->timeEventHead = te; return id; } int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id) { aeTimeEvent *te = eventLoop->timeEventHead; while(te) { if (te->id == id) { te->id = AE_DELETED_EVENT_ID; return AE_OK; } te = te->next; } return AE_ERR; /* NO event with the specified ID found */ } /* Search the first timer to fire. * This operation is useful to know how many time the select can be * put in sleep without to delay any event. * If there are no timers NULL is returned. * * Note that's O(N) since time events are unsorted. * Possible optimizations (not needed by Redis so far, but...): * 1) Insert the event in order, so that the nearest is just the head. * Much better but still insertion or deletion of timers is O(N). * 2) Use a skiplist to have this operation as O(1) and insertion as O(log(N)). */ static aeTimeEvent *aeSearchNearestTimer(aeEventLoop *eventLoop) { aeTimeEvent *te = eventLoop->timeEventHead; aeTimeEvent *nearest = NULL; while(te) { if (!nearest || te->when_sec < nearest->when_sec || (te->when_sec == nearest->when_sec && te->when_ms < nearest->when_ms)) nearest = te; te = te->next; } return nearest; } /* Process time events */ static int processTimeEvents(aeEventLoop *eventLoop) { int processed = 0; aeTimeEvent *te, *prev; long long maxId; time_t now = time(NULL); /* If the system clock is moved to the future, and then set back to the * right value, time events may be delayed in a random way. Often this * means that scheduled operations will not be performed soon enough. * * Here we try to detect system clock skews, and force all the time * events to be processed ASAP when this happens: the idea is that * processing events earlier is less dangerous than delaying them * indefinitely, and practice suggests it is. */ if (now < eventLoop->lastTime) { te = eventLoop->timeEventHead; while(te) { te->when_sec = 0; te = te->next; } } eventLoop->lastTime = now; prev = NULL; te = eventLoop->timeEventHead; maxId = eventLoop->timeEventNextId-1; while(te) { long now_sec, now_ms; long long id; /* Remove events scheduled for deletion. */ if (te->id == AE_DELETED_EVENT_ID) { aeTimeEvent *next = te->next; if (prev == NULL) eventLoop->timeEventHead = te->next; else prev->next = te->next; if (te->finalizerProc) te->finalizerProc(eventLoop, te->clientData); dfree(te); te = next; continue; } /* Make sure we don't process time events created by time events in * this iteration. Note that this check is currently useless: we always * add new timers on the head, however if we change the implementation * detail, this check may be useful again: we keep it here for future * defense. */ if (te->id > maxId) { te = te->next; continue; } aeGetTime(&now_sec, &now_ms); if (now_sec > te->when_sec || (now_sec == te->when_sec && now_ms >= te->when_ms)) { int retval; id = te->id; retval = te->timeProc(eventLoop, id, te->clientData); processed++; if (retval != AE_NOMORE) { aeAddMillisecondsToNow(retval,&te->when_sec,&te->when_ms); } else { te->id = AE_DELETED_EVENT_ID; } } prev = te; te = te->next; } return processed; } /* Process every pending time event, then every pending file event * (that may be registered by time event callbacks just processed). * Without special flags the function sleeps until some file event * fires, or when the next time event occurs (if any). * * If flags is 0, the function does nothing and returns. * if flags has AE_ALL_EVENTS set, all the kind of events are processed. * if flags has AE_FILE_EVENTS set, file events are processed. * if flags has AE_TIME_EVENTS set, time events are processed. * if flags has AE_DONT_WAIT set the function returns ASAP until all * the events that's possible to process without to wait are processed. * * The function returns the number of events processed. */ int aeProcessEvents(aeEventLoop *eventLoop, int flags) { int processed = 0, numevents; /* Nothing to do? return ASAP */ if (!(flags & AE_TIME_EVENTS) && !(flags & AE_FILE_EVENTS)) return 0; /* Note that we want call select() even if there are no * file events to process as long as we want to process time * events, in order to sleep until the next time event is ready * to fire. */ if (eventLoop->maxfd != -1 || ((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) { int j; aeTimeEvent *shortest = NULL; struct timeval tv, *tvp; if (flags & AE_TIME_EVENTS && !(flags & AE_DONT_WAIT)) shortest = aeSearchNearestTimer(eventLoop); if (shortest) { long now_sec, now_ms; aeGetTime(&now_sec, &now_ms); tvp = &tv; /* How many milliseconds we need to wait for the next * time event to fire? */ long long ms = (shortest->when_sec - now_sec)*1000 + shortest->when_ms - now_ms; if (ms > 0) { tvp->tv_sec = ms/1000; tvp->tv_usec = (ms % 1000)*1000; } else { tvp->tv_sec = 0; tvp->tv_usec = 0; } } else { /* If we have to check for events but need to return * ASAP because of AE_DONT_WAIT we need to set the timeout * to zero */ if (flags & AE_DONT_WAIT) { tv.tv_sec = tv.tv_usec = 0; tvp = &tv; } else { /* Otherwise we can block */ tvp = NULL; /* wait forever */ } } numevents = aeApiPoll(eventLoop, tvp); for (j = 0; j < numevents; j++) { aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd]; int mask = eventLoop->fired[j].mask; int fd = eventLoop->fired[j].fd; int rfired = 0; /* note the fe->mask & mask & ... code: maybe an already processed * event removed an element that fired and we still didn't * processed, so we check if the event is still valid. */ if (fe->mask & mask & AE_READABLE) { rfired = 1; fe->rfileProc(eventLoop,fd,fe->clientData,mask); } if (fe->mask & mask & AE_WRITABLE) { if (!rfired || fe->wfileProc != fe->rfileProc) fe->wfileProc(eventLoop,fd,fe->clientData,mask); } processed++; } } /* Check time events */ if (flags & AE_TIME_EVENTS) processed += processTimeEvents(eventLoop); return processed; /* return the number of processed file/time events */ } /* Wait for milliseconds until the given file descriptor becomes * writable/readable/exception */ int aeWait(int fd, int mask, long long milliseconds) { struct pollfd pfd; int retmask = 0, retval; memset(&pfd, 0, sizeof(pfd)); pfd.fd = fd; if (mask & AE_READABLE) pfd.events |= POLLIN; if (mask & AE_WRITABLE) pfd.events |= POLLOUT; if ((retval = poll(&pfd, 1, milliseconds))== 1) { if (pfd.revents & POLLIN) retmask |= AE_READABLE; if (pfd.revents & POLLOUT) retmask |= AE_WRITABLE; if (pfd.revents & POLLERR) retmask |= AE_WRITABLE; if (pfd.revents & POLLHUP) retmask |= AE_WRITABLE; return retmask; } else { return retval; } } void aeMain(aeEventLoop *eventLoop) { eventLoop->stop = 0; while (!eventLoop->stop) { if (eventLoop->beforesleep != NULL) eventLoop->beforesleep(eventLoop, eventLoop->bsdata); aeProcessEvents(eventLoop, AE_ALL_EVENTS); } } char *aeGetApiName(void) { return aeApiName(); } void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep, void *private_data) { eventLoop->beforesleep = beforesleep; eventLoop->bsdata = private_data; } ================================================ FILE: dep/ae/ae.h ================================================ /* A simple event-driven programming library. Originally I wrote this code * for the Jim's event-loop (Jim is a Tcl interpreter) but later translated * it in form of a library for easy reuse. * * Copyright (c) 2006-2012, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef __AE_H__ #define __AE_H__ #include #define AE_OK 0 #define AE_ERR -1 #define AE_NONE 0 #define AE_READABLE 1 #define AE_WRITABLE 2 #define AE_FILE_EVENTS 1 #define AE_TIME_EVENTS 2 #define AE_ALL_EVENTS (AE_FILE_EVENTS|AE_TIME_EVENTS) #define AE_DONT_WAIT 4 #define AE_NOMORE -1 #define AE_DELETED_EVENT_ID -1 /* Macros */ #define AE_NOTUSED(V) ((void) V) struct aeEventLoop; /* Types and data structures */ typedef void aeFileProc(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask); typedef int aeTimeProc(struct aeEventLoop *eventLoop, long long id, void *clientData); typedef void aeEventFinalizerProc(struct aeEventLoop *eventLoop, void *clientData); typedef void aeBeforeSleepProc(struct aeEventLoop *eventLoop, void *private_data); /* File event structure */ typedef struct aeFileEvent { int mask; /* one of AE_(READABLE|WRITABLE) */ aeFileProc *rfileProc; aeFileProc *wfileProc; void *clientData; } aeFileEvent; /* Time event structure */ typedef struct aeTimeEvent { long long id; /* time event identifier. */ long when_sec; /* seconds */ long when_ms; /* milliseconds */ aeTimeProc *timeProc; aeEventFinalizerProc *finalizerProc; void *clientData; struct aeTimeEvent *next; } aeTimeEvent; /* A fired event */ typedef struct aeFiredEvent { int fd; int mask; } aeFiredEvent; /* State of an event based program */ typedef struct aeEventLoop { int maxfd; /* highest file descriptor currently registered */ int setsize; /* max number of file descriptors tracked */ long long timeEventNextId; time_t lastTime; /* Used to detect system clock skew */ aeFileEvent *events; /* Registered events */ aeFiredEvent *fired; /* Fired events */ aeTimeEvent *timeEventHead; int stop; void *apidata; /* This is used for polling API specific data */ aeBeforeSleepProc *beforesleep; void *bsdata; /* This is used for beforesleep private data */ } aeEventLoop; /* Prototypes */ aeEventLoop *aeCreateEventLoop(int setsize); void aeDeleteEventLoop(aeEventLoop *eventLoop); void aeStop(aeEventLoop *eventLoop); int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask, aeFileProc *proc, void *clientData); void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask); int aeGetFileEvents(aeEventLoop *eventLoop, int fd); long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds, aeTimeProc *proc, void *clientData, aeEventFinalizerProc *finalizerProc); int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id); int aeProcessEvents(aeEventLoop *eventLoop, int flags); int aeWait(int fd, int mask, long long milliseconds); void aeMain(aeEventLoop *eventLoop); char *aeGetApiName(void); void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep, void *private_data); int aeGetSetSize(aeEventLoop *eventLoop); int aeResizeSetSize(aeEventLoop *eventLoop, int setsize); #endif ================================================ FILE: dep/ae/ae_epoll.c ================================================ /* Linux epoll(2) based ae.c module * * Copyright (c) 2009-2012, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include typedef struct aeApiState { int epfd; struct epoll_event *events; } aeApiState; static int aeApiCreate(aeEventLoop *eventLoop) { aeApiState *state = dalloc(sizeof(aeApiState)); if (!state) return -1; state->events = dalloc(sizeof(struct epoll_event)*eventLoop->setsize); if (!state->events) { dfree(state); return -1; } state->epfd = epoll_create(1024); /* 1024 is just a hint for the kernel */ if (state->epfd == -1) { dfree(state->events); dfree(state); return -1; } eventLoop->apidata = state; return 0; } static int aeApiResize(aeEventLoop *eventLoop, int setsize) { aeApiState *state = eventLoop->apidata; state->events = drealloc(state->events, sizeof(struct epoll_event)*setsize); return 0; } static void aeApiFree(aeEventLoop *eventLoop) { aeApiState *state = eventLoop->apidata; close(state->epfd); dfree(state->events); dfree(state); } static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { aeApiState *state = eventLoop->apidata; struct epoll_event ee = {0}; /* avoid valgrind warning */ /* If the fd was already monitored for some event, we need a MOD * operation. Otherwise we need an ADD operation. */ int op = eventLoop->events[fd].mask == AE_NONE ? EPOLL_CTL_ADD : EPOLL_CTL_MOD; ee.events = 0; mask |= eventLoop->events[fd].mask; /* Merge old events */ if (mask & AE_READABLE) ee.events |= EPOLLIN; if (mask & AE_WRITABLE) ee.events |= EPOLLOUT; ee.data.fd = fd; if (epoll_ctl(state->epfd,op,fd,&ee) == -1) return -1; return 0; } static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int delmask) { aeApiState *state = eventLoop->apidata; struct epoll_event ee = {0}; /* avoid valgrind warning */ int mask = eventLoop->events[fd].mask & (~delmask); ee.events = 0; if (mask & AE_READABLE) ee.events |= EPOLLIN; if (mask & AE_WRITABLE) ee.events |= EPOLLOUT; ee.data.fd = fd; if (mask != AE_NONE) { epoll_ctl(state->epfd,EPOLL_CTL_MOD,fd,&ee); } else { /* Note, Kernel < 2.6.9 requires a non null event pointer even for * EPOLL_CTL_DEL. */ epoll_ctl(state->epfd,EPOLL_CTL_DEL,fd,&ee); } } static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) { aeApiState *state = eventLoop->apidata; int retval, numevents = 0; retval = epoll_wait(state->epfd,state->events,eventLoop->setsize, tvp ? (tvp->tv_sec*1000 + tvp->tv_usec/1000) : -1); if (retval > 0) { int j; numevents = retval; for (j = 0; j < numevents; j++) { int mask = 0; struct epoll_event *e = state->events+j; if (e->events & EPOLLIN) mask |= AE_READABLE; if (e->events & EPOLLOUT) mask |= AE_WRITABLE; if (e->events & EPOLLERR) mask |= AE_WRITABLE; if (e->events & EPOLLHUP) mask |= AE_WRITABLE; eventLoop->fired[j].fd = e->data.fd; eventLoop->fired[j].mask = mask; } } return numevents; } static char *aeApiName(void) { return "epoll"; } ================================================ FILE: dep/ae/ae_evport.c ================================================ /* ae.c module for illumos event ports. * * Copyright (c) 2012, Joyent, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include static int evport_debug = 0; /* * This file implements the ae API using event ports, present on Solaris-based * systems since Solaris 10. Using the event port interface, we associate file * descriptors with the port. Each association also includes the set of poll(2) * events that the consumer is interested in (e.g., POLLIN and POLLOUT). * * There's one tricky piece to this implementation: when we return events via * aeApiPoll, the corresponding file descriptors become dissociated from the * port. This is necessary because poll events are level-triggered, so if the * fd didn't become dissociated, it would immediately fire another event since * the underlying state hasn't changed yet. We must re-associate the file * descriptor, but only after we know that our caller has actually read from it. * The ae API does not tell us exactly when that happens, but we do know that * it must happen by the time aeApiPoll is called again. Our solution is to * keep track of the last fds returned by aeApiPoll and re-associate them next * time aeApiPoll is invoked. * * To summarize, in this module, each fd association is EITHER (a) represented * only via the in-kernel association OR (b) represented by pending_fds and * pending_masks. (b) is only true for the last fds we returned from aeApiPoll, * and only until we enter aeApiPoll again (at which point we restore the * in-kernel association). */ #define MAX_EVENT_BATCHSZ 512 typedef struct aeApiState { int portfd; /* event port */ int npending; /* # of pending fds */ int pending_fds[MAX_EVENT_BATCHSZ]; /* pending fds */ int pending_masks[MAX_EVENT_BATCHSZ]; /* pending fds' masks */ } aeApiState; static int aeApiCreate(aeEventLoop *eventLoop) { int i; aeApiState *state = dalloc(sizeof(aeApiState)); if (!state) return -1; state->portfd = port_create(); if (state->portfd == -1) { dfree(state); return -1; } state->npending = 0; for (i = 0; i < MAX_EVENT_BATCHSZ; i++) { state->pending_fds[i] = -1; state->pending_masks[i] = AE_NONE; } eventLoop->apidata = state; return 0; } static int aeApiResize(aeEventLoop *eventLoop, int setsize) { /* Nothing to resize here. */ return 0; } static void aeApiFree(aeEventLoop *eventLoop) { aeApiState *state = eventLoop->apidata; close(state->portfd); dfree(state); } static int aeApiLookupPending(aeApiState *state, int fd) { int i; for (i = 0; i < state->npending; i++) { if (state->pending_fds[i] == fd) return (i); } return (-1); } /* * Helper function to invoke port_associate for the given fd and mask. */ static int aeApiAssociate(const char *where, int portfd, int fd, int mask) { int events = 0; int rv, err; if (mask & AE_READABLE) events |= POLLIN; if (mask & AE_WRITABLE) events |= POLLOUT; if (evport_debug) fprintf(stderr, "%s: port_associate(%d, 0x%x) = ", where, fd, events); rv = port_associate(portfd, PORT_SOURCE_FD, fd, events, (void *)(uintptr_t)mask); err = errno; if (evport_debug) fprintf(stderr, "%d (%s)\n", rv, rv == 0 ? "no error" : strerror(err)); if (rv == -1) { fprintf(stderr, "%s: port_associate: %s\n", where, strerror(err)); if (err == EAGAIN) fprintf(stderr, "aeApiAssociate: event port limit exceeded."); } return rv; } static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { aeApiState *state = eventLoop->apidata; int fullmask, pfd; if (evport_debug) fprintf(stderr, "aeApiAddEvent: fd %d mask 0x%x\n", fd, mask); /* * Since port_associate's "events" argument replaces any existing events, we * must be sure to include whatever events are already associated when * we call port_associate() again. */ fullmask = mask | eventLoop->events[fd].mask; pfd = aeApiLookupPending(state, fd); if (pfd != -1) { /* * This fd was recently returned from aeApiPoll. It should be safe to * assume that the consumer has processed that poll event, but we play * it safer by simply updating pending_mask. The fd will be * re-associated as usual when aeApiPoll is called again. */ if (evport_debug) fprintf(stderr, "aeApiAddEvent: adding to pending fd %d\n", fd); state->pending_masks[pfd] |= fullmask; return 0; } return (aeApiAssociate("aeApiAddEvent", state->portfd, fd, fullmask)); } static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) { aeApiState *state = eventLoop->apidata; int fullmask, pfd; if (evport_debug) fprintf(stderr, "del fd %d mask 0x%x\n", fd, mask); pfd = aeApiLookupPending(state, fd); if (pfd != -1) { if (evport_debug) fprintf(stderr, "deleting event from pending fd %d\n", fd); /* * This fd was just returned from aeApiPoll, so it's not currently * associated with the port. All we need to do is update * pending_mask appropriately. */ state->pending_masks[pfd] &= ~mask; if (state->pending_masks[pfd] == AE_NONE) state->pending_fds[pfd] = -1; return; } /* * The fd is currently associated with the port. Like with the add case * above, we must look at the full mask for the file descriptor before * updating that association. We don't have a good way of knowing what the * events are without looking into the eventLoop state directly. We rely on * the fact that our caller has already updated the mask in the eventLoop. */ fullmask = eventLoop->events[fd].mask; if (fullmask == AE_NONE) { /* * We're removing *all* events, so use port_dissociate to remove the * association completely. Failure here indicates a bug. */ if (evport_debug) fprintf(stderr, "aeApiDelEvent: port_dissociate(%d)\n", fd); if (port_dissociate(state->portfd, PORT_SOURCE_FD, fd) != 0) { perror("aeApiDelEvent: port_dissociate"); abort(); /* will not return */ } } else if (aeApiAssociate("aeApiDelEvent", state->portfd, fd, fullmask) != 0) { /* * ENOMEM is a potentially transient condition, but the kernel won't * generally return it unless things are really bad. EAGAIN indicates * we've reached an resource limit, for which it doesn't make sense to * retry (counter-intuitively). All other errors indicate a bug. In any * of these cases, the best we can do is to abort. */ abort(); /* will not return */ } } static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) { aeApiState *state = eventLoop->apidata; struct timespec timeout, *tsp; int mask, i; uint_t nevents; port_event_t event[MAX_EVENT_BATCHSZ]; /* * If we've returned fd events before, we must re-associate them with the * port now, before calling port_get(). See the block comment at the top of * this file for an explanation of why. */ for (i = 0; i < state->npending; i++) { if (state->pending_fds[i] == -1) /* This fd has since been deleted. */ continue; if (aeApiAssociate("aeApiPoll", state->portfd, state->pending_fds[i], state->pending_masks[i]) != 0) { /* See aeApiDelEvent for why this case is fatal. */ abort(); } state->pending_masks[i] = AE_NONE; state->pending_fds[i] = -1; } state->npending = 0; if (tvp != NULL) { timeout.tv_sec = tvp->tv_sec; timeout.tv_nsec = tvp->tv_usec * 1000; tsp = &timeout; } else { tsp = NULL; } /* * port_getn can return with errno == ETIME having returned some events (!). * So if we get ETIME, we check nevents, too. */ nevents = 1; if (port_getn(state->portfd, event, MAX_EVENT_BATCHSZ, &nevents, tsp) == -1 && (errno != ETIME || nevents == 0)) { if (errno == ETIME || errno == EINTR) return 0; /* Any other error indicates a bug. */ perror("aeApiPoll: port_get"); abort(); } state->npending = nevents; for (i = 0; i < nevents; i++) { mask = 0; if (event[i].portev_events & POLLIN) mask |= AE_READABLE; if (event[i].portev_events & POLLOUT) mask |= AE_WRITABLE; eventLoop->fired[i].fd = event[i].portev_object; eventLoop->fired[i].mask = mask; if (evport_debug) fprintf(stderr, "aeApiPoll: fd %d mask 0x%x\n", (int)event[i].portev_object, mask); state->pending_fds[i] = event[i].portev_object; state->pending_masks[i] = (uintptr_t)event[i].portev_user; } return nevents; } static char *aeApiName(void) { return "evport"; } ================================================ FILE: dep/ae/ae_kqueue.c ================================================ /* Kqueue(2)-based ae.c module * * Copyright (C) 2009 Harish Mallipeddi - harish.mallipeddi@gmail.com * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include typedef struct aeApiState { int kqfd; struct kevent *events; } aeApiState; static int aeApiCreate(aeEventLoop *eventLoop) { aeApiState *state = dalloc(sizeof(aeApiState)); if (!state) return -1; state->events = dalloc(sizeof(struct kevent)*eventLoop->setsize); if (!state->events) { dfree(state); return -1; } state->kqfd = kqueue(); if (state->kqfd == -1) { dfree(state->events); dfree(state); return -1; } eventLoop->apidata = state; return 0; } static int aeApiResize(aeEventLoop *eventLoop, int setsize) { aeApiState *state = eventLoop->apidata; state->events = drealloc(state->events, sizeof(struct kevent)*setsize); return 0; } static void aeApiFree(aeEventLoop *eventLoop) { aeApiState *state = eventLoop->apidata; close(state->kqfd); dfree(state->events); dfree(state); } static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { aeApiState *state = eventLoop->apidata; struct kevent ke; if (mask & AE_READABLE) { EV_SET(&ke, fd, EVFILT_READ, EV_ADD, 0, 0, NULL); if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1; } if (mask & AE_WRITABLE) { EV_SET(&ke, fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL); if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1; } return 0; } static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) { aeApiState *state = eventLoop->apidata; struct kevent ke; if (mask & AE_READABLE) { EV_SET(&ke, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); kevent(state->kqfd, &ke, 1, NULL, 0, NULL); } if (mask & AE_WRITABLE) { EV_SET(&ke, fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); kevent(state->kqfd, &ke, 1, NULL, 0, NULL); } } static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) { aeApiState *state = eventLoop->apidata; int retval, numevents = 0; if (tvp != NULL) { struct timespec timeout; timeout.tv_sec = tvp->tv_sec; timeout.tv_nsec = tvp->tv_usec * 1000; retval = kevent(state->kqfd, NULL, 0, state->events, eventLoop->setsize, &timeout); } else { retval = kevent(state->kqfd, NULL, 0, state->events, eventLoop->setsize, NULL); } if (retval > 0) { int j; numevents = retval; for(j = 0; j < numevents; j++) { int mask = 0; struct kevent *e = state->events+j; if (e->filter == EVFILT_READ) mask |= AE_READABLE; if (e->filter == EVFILT_WRITE) mask |= AE_WRITABLE; eventLoop->fired[j].fd = e->ident; eventLoop->fired[j].mask = mask; } } return numevents; } static char *aeApiName(void) { return "kqueue"; } ================================================ FILE: dep/ae/ae_select.c ================================================ /* Select()-based ae.c module. * * Copyright (c) 2009-2012, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include typedef struct aeApiState { fd_set rfds, wfds; /* We need to have a copy of the fd sets as it's not safe to reuse * FD sets after select(). */ fd_set _rfds, _wfds; } aeApiState; static int aeApiCreate(aeEventLoop *eventLoop) { aeApiState *state = dalloc(sizeof(aeApiState)); if (!state) return -1; FD_ZERO(&state->rfds); FD_ZERO(&state->wfds); eventLoop->apidata = state; return 0; } static int aeApiResize(aeEventLoop *eventLoop, int setsize) { /* Just ensure we have enough room in the fd_set type. */ if (setsize >= FD_SETSIZE) return -1; return 0; } static void aeApiFree(aeEventLoop *eventLoop) { dfree(eventLoop->apidata); } static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { aeApiState *state = eventLoop->apidata; if (mask & AE_READABLE) FD_SET(fd,&state->rfds); if (mask & AE_WRITABLE) FD_SET(fd,&state->wfds); return 0; } static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) { aeApiState *state = eventLoop->apidata; if (mask & AE_READABLE) FD_CLR(fd,&state->rfds); if (mask & AE_WRITABLE) FD_CLR(fd,&state->wfds); } static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) { aeApiState *state = eventLoop->apidata; int retval, j, numevents = 0; memcpy(&state->_rfds,&state->rfds,sizeof(fd_set)); memcpy(&state->_wfds,&state->wfds,sizeof(fd_set)); retval = select(eventLoop->maxfd+1, &state->_rfds,&state->_wfds,NULL,tvp); if (retval > 0) { for (j = 0; j <= eventLoop->maxfd; j++) { int mask = 0; aeFileEvent *fe = &eventLoop->events[j]; if (fe->mask == AE_NONE) continue; if (fe->mask & AE_READABLE && FD_ISSET(j,&state->_rfds)) mask |= AE_READABLE; if (fe->mask & AE_WRITABLE && FD_ISSET(j,&state->_wfds)) mask |= AE_WRITABLE; eventLoop->fired[numevents].fd = j; eventLoop->fired[numevents].mask = mask; numevents++; } } return numevents; } static char *aeApiName(void) { return "select"; } ================================================ FILE: dep/darray/Makefile.am ================================================ MAINTAINERCLEANFILES = Makefile.in AM_CPPFLAGS = if !OS_SOLARIS AM_CPPFLAGS += -D_GNU_SOURCE endif AM_CPPFLAGS += -I $(top_srcdir)/dep/util AM_CPPFLAGS += -I $(top_srcdir)/dep/jemalloc-4.2.0/include AM_CPPFLAGS += -I $(top_srcdir)/dep/dmalloc AM_CFLAGS = -Wall -Wshadow AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value noinst_LIBRARIES = libdarray.a noinst_HEADERS = darray.h libdarray_a_SOURCES = \ darray.c darray.h ================================================ FILE: dep/darray/darray.c ================================================ #include #include #include darray * darray_create(unsigned long long n, size_t size) { darray *a; a = dalloc(sizeof(*a)); if (a == NULL) { return NULL; } a->elem = dalloc(n * size); if (a->elem == NULL) { dfree(a); return NULL; } a->nelem = 0; a->size = size; a->nalloc = n; return a; } void darray_destroy(darray *a) { darray_deinit(a); dfree(a); } int darray_init(darray *a, unsigned long long n, size_t size) { a->elem = dalloc(n * size); if (a->elem == NULL) { return -1; } a->nelem = 0; a->size = size; a->nalloc = n; return 0; } void darray_deinit(darray *a) { if (a->elem != NULL) { dfree(a->elem); } } unsigned long long darray_idx(darray *a, void *elem) { char *p, *q; unsigned long long off, idx; p = a->elem; q = elem; off = (unsigned long long)(q - p); idx = off / (unsigned long long)a->size; return idx; } void * darray_push(darray *a) { void *elem, *new; size_t size; if (a->nelem == a->nalloc) { /* the array is full; allocate new array */ size = a->size * a->nalloc; new = drealloc(a->elem, 2 * size); if (new == NULL) { return NULL; } a->elem = new; a->nalloc *= 2; } elem = (char *)a->elem + a->size * a->nelem; a->nelem++; return elem; } void * darray_pop(darray *a) { void *elem; a->nelem--; elem = (char *)a->elem + a->size * a->nelem; return elem; } void * darray_get(darray *a, unsigned long long idx) { void *elem; elem = (char *)a->elem + (a->size * idx); return elem; } void * darray_top(darray *a) { return darray_get(a, a->nelem - 1); } void darray_swap(darray *a, darray *b) { darray tmp; tmp = *a; *a = *b; *b = tmp; } /* * Sort nelem elements of the array in ascending order based on the * compare comparator. */ void darray_sort(darray *a, darray_compare_t compare) { qsort(a->elem, a->nelem, a->size, compare); } /* * Calls the func once for each element in the array as long as func returns * success. On failure short-circuits and returns the error status. */ int darray_each(darray *a, darray_each_t func, void *data) { unsigned long long i, nelem; for (i = 0, nelem = darray_n(a); i < nelem; i++) { void *elem = darray_get(a, i); int ret; ret = func(elem, data); if (ret != 0) { return -1; } } return 0; } ================================================ FILE: dep/darray/darray.h ================================================ #ifndef _DARRAY_H_ #define _DARRAY_H_ typedef int (*darray_compare_t)(const void *, const void *); typedef int (*darray_each_t)(void *, void *); typedef struct darray { unsigned long long nelem; /* # element */ void *elem; /* element */ size_t size; /* element size */ unsigned long long nalloc; /* # allocated element */ } darray; #define null_darray { 0, NULL, 0, 0 } static inline void darray_null(darray *a) { a->nelem = 0; a->elem = NULL; a->size = 0; a->nalloc = 0; } static inline void darray_set(darray *a, void *elem, size_t size, unsigned long long nalloc) { a->nelem = 0; a->elem = elem; a->size = size; a->nalloc = nalloc; } static inline unsigned long long darray_n(const darray *a) { return a->nelem; } darray *darray_create(unsigned long long n, size_t size); void darray_destroy(darray *a); int darray_init(darray *a, unsigned long long n, size_t size); void darray_deinit(darray *a); unsigned long long darray_idx(darray *a, void *elem); void *darray_push(darray *a); void *darray_pop(darray *a); void *darray_get(darray *a, unsigned long long idx); void *darray_top(darray *a); void darray_swap(darray *a, darray *b); void darray_sort(darray *a, darray_compare_t compare); int darray_each(darray *a, darray_each_t func, void *data); #endif ================================================ FILE: dep/dhashkit/Makefile.am ================================================ MAINTAINERCLEANFILES = Makefile.in AM_CFLAGS = -Wall -Wshadow AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value noinst_LIBRARIES = libdhashkit.a noinst_HEADERS = dhashkit.h libdhashkit_a_SOURCES = \ dhashkit.h \ dcrc16.c \ dcrc32.c \ dfnv.c \ dhsieh.c \ djenkins.c \ dketama.c \ dmd5.c \ dmodula.c \ dmurmur.c \ done_at_a_time.c \ drandom.c \ dsha1.c ================================================ FILE: dep/dhashkit/dcrc16.c ================================================ #include static const uint16_t crc16tab[256] = { 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4, 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823, 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12, 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41, 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70, 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256, 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3, 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92, 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1, 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0, }; uint32_t hash_crc16(const char *key, size_t key_length) { uint64_t x; uint32_t crc = 0; for (x=0; x < key_length; x++) { crc = (crc << 8) ^ crc16tab[((crc >> 8) ^ *key++) & 0x00ff]; } return crc; } ================================================ FILE: dep/dhashkit/dcrc32.c ================================================ #include static const uint32_t crc32tab[256] = { 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, }; /* * CRC-32 implementation compatible with libmemcached library. Unfortunately * this implementation does not return CRC-32 as per spec. */ uint32_t hash_crc32(const char *key, size_t key_length) { uint64_t x; uint32_t crc = UINT32_MAX; for (x = 0; x < key_length; x++) { crc = (crc >> 8) ^ crc32tab[(crc ^ (uint64_t)key[x]) & 0xff]; } return ((~crc) >> 16) & 0x7fff; } uint32_t hash_crc32a(const char *key, size_t key_length) { const uint8_t *p = key; uint32_t crc; crc = ~0U; while (key_length--) { crc = crc32tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); } return crc ^ ~0U; } ================================================ FILE: dep/dhashkit/dfnv.c ================================================ #include static uint64_t FNV_64_INIT = UINT64_C(0xcbf29ce484222325); static uint64_t FNV_64_PRIME = UINT64_C(0x100000001b3); static uint32_t FNV_32_INIT = 2166136261UL; static uint32_t FNV_32_PRIME = 16777619; uint32_t hash_fnv1_64(const char *key, size_t key_length) { uint64_t hash = FNV_64_INIT; size_t x; for (x = 0; x < key_length; x++) { hash *= FNV_64_PRIME; hash ^= (uint64_t)key[x]; } return (uint32_t)hash; } uint32_t hash_fnv1a_64(const char *key, size_t key_length) { uint32_t hash = (uint32_t) FNV_64_INIT; size_t x; for (x = 0; x < key_length; x++) { uint32_t val = (uint32_t)key[x]; hash ^= val; hash *= (uint32_t) FNV_64_PRIME; } return hash; } uint32_t hash_fnv1_32(const char *key, size_t key_length) { uint32_t hash = FNV_32_INIT; size_t x; for (x = 0; x < key_length; x++) { uint32_t val = (uint32_t)key[x]; hash *= FNV_32_PRIME; hash ^= val; } return hash; } uint32_t hash_fnv1a_32(const char *key, size_t key_length) { uint32_t hash = FNV_32_INIT; size_t x; for (x= 0; x < key_length; x++) { uint32_t val = (uint32_t)key[x]; hash ^= val; hash *= FNV_32_PRIME; } return hash; } ================================================ FILE: dep/dhashkit/dhashkit.h ================================================ #ifndef _DHASHKIT_H_ #define _DHASHKIT_H_ #include #include #include struct continuum { uint32_t index; /* server index */ uint32_t value; /* hash value */ }; #define HASH_CODEC(ACTION) \ ACTION( HASH_ONE_AT_A_TIME, one_at_a_time ) \ ACTION( HASH_MD5, md5 ) \ ACTION( HASH_CRC16, crc16 ) \ ACTION( HASH_CRC32, crc32 ) \ ACTION( HASH_CRC32A, crc32a ) \ ACTION( HASH_FNV1_64, fnv1_64 ) \ ACTION( HASH_FNV1A_64, fnv1a_64 ) \ ACTION( HASH_FNV1_32, fnv1_32 ) \ ACTION( HASH_FNV1A_32, fnv1a_32 ) \ ACTION( HASH_HSIEH, hsieh ) \ ACTION( HASH_MURMUR, murmur ) \ ACTION( HASH_JENKINS, jenkins ) \ #define DIST_CODEC(ACTION) \ ACTION( DIST_KETAMA, ketama ) \ ACTION( DIST_MODULA, modula ) \ ACTION( DIST_RANDOM, random ) \ #define DEFINE_ACTION(_hash, _name) _hash, typedef enum hash_type { HASH_CODEC( DEFINE_ACTION ) HASH_SENTINEL } hash_type_t; #undef DEFINE_ACTION #define DEFINE_ACTION(_dist, _name) _dist, typedef enum dist_type { DIST_CODEC( DEFINE_ACTION ) DIST_SENTINEL } dist_type_t; #undef DEFINE_ACTION uint32_t hash_one_at_a_time(const char *key, size_t key_length); void md5_signature(const unsigned char *key, unsigned long length, unsigned char *result); uint32_t hash_md5(const char *key, size_t key_length); uint32_t hash_crc16(const char *key, size_t key_length); uint32_t hash_crc32(const char *key, size_t key_length); uint32_t hash_crc32a(const char *key, size_t key_length); uint32_t hash_fnv1_64(const char *key, size_t key_length); uint32_t hash_fnv1a_64(const char *key, size_t key_length); uint32_t hash_fnv1_32(const char *key, size_t key_length); uint32_t hash_fnv1a_32(const char *key, size_t key_length); uint32_t hash_hsieh(const char *key, size_t key_length); uint32_t hash_jenkins(const char *key, size_t length); uint32_t hash_murmur(const char *key, size_t length); uint32_t ketama_dispatch(struct continuum *continuum, uint32_t ncontinuum, uint32_t hash); uint32_t modula_dispatch(struct continuum *continuum, uint32_t ncontinuum, uint32_t hash); uint32_t random_dispatch(struct continuum *continuum, uint32_t ncontinuum, uint32_t hash); /*SHA-1 in CBy Steve Reid 100% Public Domain*/ typedef struct { uint32_t state[5]; uint32_t count[2]; unsigned char buffer[64]; } SHA1_CTX; void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]); void SHA1Init(SHA1_CTX* context); void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len); void SHA1Final(unsigned char digest[20], SHA1_CTX* context); #endif ================================================ FILE: dep/dhashkit/dhsieh.c ================================================ #include #undef get16bits #if (defined(__GNUC__) && defined(__i386__)) #define get16bits(d) (*((const uint16_t *) (d))) #endif #if !defined (get16bits) #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\ +(uint32_t)(((const uint8_t *)(d))[0]) ) #endif uint32_t hash_hsieh(const char *key, size_t key_length) { uint32_t hash = 0, tmp; int rem; if (key_length <= 0 || key == NULL) { return 0; } rem = key_length & 3; key_length >>= 2; /* Main loop */ for (;key_length > 0; key_length--) { hash += get16bits (key); tmp = (get16bits (key+2) << 11) ^ hash; hash = (hash << 16) ^ tmp; key += 2*sizeof (uint16_t); hash += hash >> 11; } /* Handle end cases */ switch (rem) { case 3: hash += get16bits (key); hash ^= hash << 16; hash ^= (uint32_t)key[sizeof (uint16_t)] << 18; hash += hash >> 11; break; case 2: hash += get16bits (key); hash ^= hash << 11; hash += hash >> 17; break; case 1: hash += (unsigned char)(*key); hash ^= hash << 10; hash += hash >> 1; default: break; } /* Force "avalanching" of final 127 bits */ hash ^= hash << 3; hash += hash >> 5; hash ^= hash << 4; hash += hash >> 17; hash ^= hash << 25; hash += hash >> 6; return hash; } ================================================ FILE: dep/dhashkit/djenkins.c ================================================ #include #define hashsize(n) ((uint32_t)1<<(n)) #define hashmask(n) (hashsize(n)-1) #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) #define mix(a,b,c) \ { \ a -= c; a ^= rot(c, 4); c += b; \ b -= a; b ^= rot(a, 6); a += c; \ c -= b; c ^= rot(b, 8); b += a; \ a -= c; a ^= rot(c,16); c += b; \ b -= a; b ^= rot(a,19); a += c; \ c -= b; c ^= rot(b, 4); b += a; \ } #define final(a,b,c) \ { \ c ^= b; c -= rot(b,14); \ a ^= c; a -= rot(c,11); \ b ^= a; b -= rot(a,25); \ c ^= b; c -= rot(b,16); \ a ^= c; a -= rot(c,4); \ b ^= a; b -= rot(a,14); \ c ^= b; c -= rot(b,24); \ } #define JENKINS_INITVAL 13 /* * jenkins_hash() -- hash a variable-length key into a 32-bit value * k : the key (the unaligned variable-length array of bytes) * length : the length of the key, counting by bytes * initval : can be any 4-byte value * Returns a 32-bit value. Every bit of the key affects every bit of * the return value. Two keys differing by one or two bits will have * totally different hash values. * The best hash table sizes are powers of 2. There is no need to do * mod a prime (mod is sooo slow!). If you need less than 32 bits, * use a bitmask. For example, if you need only 10 bits, do * h = (h & hashmask(10)); * In which case, the hash table should have hashsize(10) elements. */ uint32_t hash_jenkins(const char *key, size_t length) { uint32_t a,b,c; /* internal state */ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ /* Set up the internal state */ a = b = c = 0xdeadbeef + ((uint32_t)length) + JENKINS_INITVAL; u.ptr = key; #ifndef WORDS_BIGENDIAN if ((u.i & 0x3) == 0) { const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ while (length > 12) { a += k[0]; b += k[1]; c += k[2]; mix(a,b,c); length -= 12; k += 3; } /*----------------------------- handle the last (probably partial) block */ /* * "k[2]&0xffffff" actually reads beyond the end of the string, but * then masks off the part it's not allowed to read. Because the * string is aligned, the masked-off tail is in the same word as the * rest of the string. Every machine with memory protection I've seen * does it on word boundaries, so is OK with this. But VALGRIND will * still catch it and complain. The masking trick does make the hash * noticeably faster for short strings (like English words). */ switch(length) { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=k[1]&0xffffff; a+=k[0]; break; case 6 : b+=k[1]&0xffff; a+=k[0]; break; case 5 : b+=k[1]&0xff; a+=k[0]; break; case 4 : a+=k[0]; break; case 3 : a+=k[0]&0xffffff; break; case 2 : a+=k[0]&0xffff; break; case 1 : a+=k[0]&0xff; break; case 0 : return c; /* zero length strings require no mixing */ default: return c; } } else if ((u.i & 0x1) == 0) { const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ const uint8_t *k8; /*--------------- all but last block: aligned reads and different mixing */ while (length > 12) { a += k[0] + (((uint32_t)k[1])<<16); b += k[2] + (((uint32_t)k[3])<<16); c += k[4] + (((uint32_t)k[5])<<16); mix(a,b,c); length -= 12; k += 6; } /*----------------------------- handle the last (probably partial) block */ k8 = (const uint8_t *)k; switch(length) { case 12: c+=k[4]+(((uint32_t)k[5])<<16); b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ case 10: c+=k[4]; b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 9 : c+=k8[8]; /* fall through */ case 8 : b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ case 6 : b+=k[2]; a+=k[0]+(((uint32_t)k[1])<<16); break; case 5 : b+=k8[4]; /* fall through */ case 4 : a+=k[0]+(((uint32_t)k[1])<<16); break; case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ case 2 : a+=k[0]; break; case 1 : a+=k8[0]; break; case 0 : return c; /* zero length requires no mixing */ default: return c; } } else { /* need to read the key one byte at a time */ #endif /* little endian */ const uint8_t *k = (const uint8_t *)key; /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { a += k[0]; a += ((uint32_t)k[1])<<8; a += ((uint32_t)k[2])<<16; a += ((uint32_t)k[3])<<24; b += k[4]; b += ((uint32_t)k[5])<<8; b += ((uint32_t)k[6])<<16; b += ((uint32_t)k[7])<<24; c += k[8]; c += ((uint32_t)k[9])<<8; c += ((uint32_t)k[10])<<16; c += ((uint32_t)k[11])<<24; mix(a,b,c); length -= 12; k += 12; } /*-------------------------------- last block: affect all 32 bits of (c) */ switch(length) /* all the case statements fall through */ { case 12: c+=((uint32_t)k[11])<<24; case 11: c+=((uint32_t)k[10])<<16; case 10: c+=((uint32_t)k[9])<<8; case 9 : c+=k[8]; case 8 : b+=((uint32_t)k[7])<<24; case 7 : b+=((uint32_t)k[6])<<16; case 6 : b+=((uint32_t)k[5])<<8; case 5 : b+=k[4]; case 4 : a+=((uint32_t)k[3])<<24; case 3 : a+=((uint32_t)k[2])<<16; case 2 : a+=((uint32_t)k[1])<<8; case 1 : a+=k[0]; break; case 0 : return c; default : return c; } #ifndef WORDS_BIGENDIAN } #endif final(a,b,c); return c; } ================================================ FILE: dep/dhashkit/dketama.c ================================================ #include #include #include #include #define KETAMA_CONTINUUM_ADDITION 10 /* # extra slots to build into continuum */ #define KETAMA_POINTS_PER_SERVER 160 /* 40 points per hash */ #define KETAMA_MAX_HOSTLEN 86 static uint32_t ketama_hash(const char *key, size_t key_length, uint32_t alignment) { unsigned char results[16]; md5_signature((const unsigned char*)key, (unsigned long)key_length, results); return ((uint32_t) (results[3 + alignment * 4] & 0xFF) << 24) | ((uint32_t) (results[2 + alignment * 4] & 0xFF) << 16) | ((uint32_t) (results[1 + alignment * 4] & 0xFF) << 8) | (results[0 + alignment * 4] & 0xFF); } static int ketama_item_cmp(const void *t1, const void *t2) { const struct continuum *ct1 = t1, *ct2 = t2; if (ct1->value == ct2->value) { return 0; } else if (ct1->value > ct2->value) { return 1; } else { return -1; } } uint32_t ketama_dispatch(struct continuum *continuum, uint32_t ncontinuum, uint32_t hash) { struct continuum *begin, *end, *left, *right, *middle; ASSERT(continuum != NULL); ASSERT(ncontinuum != 0); begin = left = continuum; end = right = continuum + ncontinuum; while (left < right) { middle = left + (right - left) / 2; if (middle->value < hash) { left = middle + 1; } else { right = middle; } } if (right == end) { right = begin; } return right->index; } ================================================ FILE: dep/dhashkit/dmd5.c ================================================ #include #include /* * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. * MD5 Message-Digest Algorithm (RFC 1321). * * Homepage: http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 * * Author: Alexander Peslyak, better known as Solar Designer */ typedef unsigned int MD5_u32plus; typedef struct { MD5_u32plus lo, hi; MD5_u32plus a, b, c, d; unsigned char buffer[64]; MD5_u32plus block[16]; } MD5_CTX; /* * The basic MD5 functions. * * F and G are optimized compared to their RFC 1321 definitions for * architectures that lack an AND-NOT instruction, just like in Colin Plumb's * implementation. */ #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) #define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y)))) #define H(x, y, z) ((x) ^ (y) ^ (z)) #define I(x, y, z) ((y) ^ ((x) | ~(z))) /* * The MD5 transformation for all four rounds. */ #define STEP(f, a, b, c, d, x, t, s) \ (a) += f((b), (c), (d)) + (x) + (t); \ (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \ (a) += (b); /* * SET reads 4 input bytes in little-endian byte order and stores them * in a properly aligned word in host byte order. * * The check for little-endian architectures that tolerate unaligned * memory accesses is just an optimization. Nothing will break if it * doesn't work. */ #if defined(__i386__) || defined(__x86_64__) || defined(__vax__) #define SET(n) \ (*(MD5_u32plus *)&ptr[(n) * 4]) #define GET(n) \ SET(n) #else #define SET(n) \ (ctx->block[(n)] = \ (MD5_u32plus)ptr[(n) * 4] | \ ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \ ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \ ((MD5_u32plus)ptr[(n) * 4 + 3] << 24)) #define GET(n) \ (ctx->block[(n)]) #endif /* * This processes one or more 64-byte data blocks, but does NOT update * the bit counters. There are no alignment requirements. */ static void * body(MD5_CTX *ctx, void *data, unsigned long size) { unsigned char *ptr; MD5_u32plus a, b, c, d; MD5_u32plus saved_a, saved_b, saved_c, saved_d; ptr = data; a = ctx->a; b = ctx->b; c = ctx->c; d = ctx->d; do { saved_a = a; saved_b = b; saved_c = c; saved_d = d; /* Round 1 */ STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7) STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12) STEP(F, c, d, a, b, SET(2), 0x242070db, 17) STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22) STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7) STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12) STEP(F, c, d, a, b, SET(6), 0xa8304613, 17) STEP(F, b, c, d, a, SET(7), 0xfd469501, 22) STEP(F, a, b, c, d, SET(8), 0x698098d8, 7) STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12) STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17) STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22) STEP(F, a, b, c, d, SET(12), 0x6b901122, 7) STEP(F, d, a, b, c, SET(13), 0xfd987193, 12) STEP(F, c, d, a, b, SET(14), 0xa679438e, 17) STEP(F, b, c, d, a, SET(15), 0x49b40821, 22) /* Round 2 */ STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5) STEP(G, d, a, b, c, GET(6), 0xc040b340, 9) STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14) STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20) STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5) STEP(G, d, a, b, c, GET(10), 0x02441453, 9) STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14) STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20) STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5) STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9) STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14) STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20) STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5) STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9) STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14) STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20) /* Round 3 */ STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4) STEP(H, d, a, b, c, GET(8), 0x8771f681, 11) STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16) STEP(H, b, c, d, a, GET(14), 0xfde5380c, 23) STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4) STEP(H, d, a, b, c, GET(4), 0x4bdecfa9, 11) STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16) STEP(H, b, c, d, a, GET(10), 0xbebfbc70, 23) STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4) STEP(H, d, a, b, c, GET(0), 0xeaa127fa, 11) STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16) STEP(H, b, c, d, a, GET(6), 0x04881d05, 23) STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4) STEP(H, d, a, b, c, GET(12), 0xe6db99e5, 11) STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16) STEP(H, b, c, d, a, GET(2), 0xc4ac5665, 23) /* Round 4 */ STEP(I, a, b, c, d, GET(0), 0xf4292244, 6) STEP(I, d, a, b, c, GET(7), 0x432aff97, 10) STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15) STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21) STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6) STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10) STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15) STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21) STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6) STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10) STEP(I, c, d, a, b, GET(6), 0xa3014314, 15) STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21) STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6) STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10) STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15) STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21) a += saved_a; b += saved_b; c += saved_c; d += saved_d; ptr += 64; } while (size -= 64); ctx->a = a; ctx->b = b; ctx->c = c; ctx->d = d; return ptr; } void MD5_Init(MD5_CTX *ctx) { ctx->a = 0x67452301; ctx->b = 0xefcdab89; ctx->c = 0x98badcfe; ctx->d = 0x10325476; ctx->lo = 0; ctx->hi = 0; } void MD5_Update(MD5_CTX *ctx, void *data, unsigned long size) { MD5_u32plus saved_lo; unsigned long used, free; saved_lo = ctx->lo; if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo) { ctx->hi++; } ctx->hi += size >> 29; used = saved_lo & 0x3f; if (used) { free = 64 - used; if (size < free) { memcpy(&ctx->buffer[used], data, size); return; } memcpy(&ctx->buffer[used], data, free); data = (unsigned char *)data + free; size -= free; body(ctx, ctx->buffer, 64); } if (size >= 64) { data = body(ctx, data, size & ~(unsigned long)0x3f); size &= 0x3f; } memcpy(ctx->buffer, data, size); } void MD5_Final(unsigned char *result, MD5_CTX *ctx) { unsigned long used, free; used = ctx->lo & 0x3f; ctx->buffer[used++] = 0x80; free = 64 - used; if (free < 8) { memset(&ctx->buffer[used], 0, free); body(ctx, ctx->buffer, 64); used = 0; free = 64; } memset(&ctx->buffer[used], 0, free - 8); ctx->lo <<= 3; ctx->buffer[56] = ctx->lo; ctx->buffer[57] = ctx->lo >> 8; ctx->buffer[58] = ctx->lo >> 16; ctx->buffer[59] = ctx->lo >> 24; ctx->buffer[60] = ctx->hi; ctx->buffer[61] = ctx->hi >> 8; ctx->buffer[62] = ctx->hi >> 16; ctx->buffer[63] = ctx->hi >> 24; body(ctx, ctx->buffer, 64); result[0] = ctx->a; result[1] = ctx->a >> 8; result[2] = ctx->a >> 16; result[3] = ctx->a >> 24; result[4] = ctx->b; result[5] = ctx->b >> 8; result[6] = ctx->b >> 16; result[7] = ctx->b >> 24; result[8] = ctx->c; result[9] = ctx->c >> 8; result[10] = ctx->c >> 16; result[11] = ctx->c >> 24; result[12] = ctx->d; result[13] = ctx->d >> 8; result[14] = ctx->d >> 16; result[15] = ctx->d >> 24; memset(ctx, 0, sizeof(*ctx)); } /* * Just a simple method for getting the signature * result must be == 16 */ void md5_signature(const unsigned char *key, unsigned long length, unsigned char *result) { MD5_CTX my_md5; MD5_Init(&my_md5); (void)MD5_Update(&my_md5, key, length); MD5_Final(result, &my_md5); } uint32_t hash_md5(const char *key, size_t key_length) { unsigned char results[16]; md5_signature((const unsigned char*)key, (unsigned long)key_length, results); return ((uint32_t) (results[3] & 0xFF) << 24) | ((uint32_t) (results[2] & 0xFF) << 16) | ((uint32_t) (results[1] & 0xFF) << 8) | (results[0] & 0xFF); } ================================================ FILE: dep/dhashkit/dmodula.c ================================================ #include #include #include #define MODULA_CONTINUUM_ADDITION 10 /* # extra slots to build into continuum */ #define MODULA_POINTS_PER_SERVER 1 uint32_t modula_dispatch(struct continuum *continuum, uint32_t ncontinuum, uint32_t hash) { struct continuum *c; ASSERT(continuum != NULL); ASSERT(ncontinuum != 0); c = continuum + hash % ncontinuum; return c->index; } ================================================ FILE: dep/dhashkit/dmurmur.c ================================================ /* * "Murmur" hash provided by Austin, tanjent@gmail.com * http://murmurhash.googlepages.com/ * * Note - This code makes a few assumptions about how your machine behaves - * * 1. We can read a 4-byte value from any address without crashing * 2. sizeof(int) == 4 * * And it has a few limitations - * 1. It will not work incrementally. * 2. It will not produce the same results on little-endian and big-endian * machines. * * Updated to murmur2 hash - BP */ #include uint32_t hash_murmur(const char *key, size_t length) { /* * 'm' and 'r' are mixing constants generated offline. They're not * really 'magic', they just happen to work well. */ const unsigned int m = 0x5bd1e995; const uint32_t seed = (0xdeadbeef * (uint32_t)length); const int r = 24; /* Initialize the hash to a 'random' value */ uint32_t h = seed ^ (uint32_t)length; /* Mix 4 bytes at a time into the hash */ const unsigned char * data = (const unsigned char *)key; while (length >= 4) { unsigned int k = *(unsigned int *)data; k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; data += 4; length -= 4; } /* Handle the last few bytes of the input array */ switch(length) { case 3: h ^= ((uint32_t)data[2]) << 16; case 2: h ^= ((uint32_t)data[1]) << 8; case 1: h ^= data[0]; h *= m; default: break; }; /* * Do a few final mixes of the hash to ensure the last few bytes are * well-incorporated. */ h ^= h >> 13; h *= m; h ^= h >> 15; return h; } ================================================ FILE: dep/dhashkit/done_at_a_time.c ================================================ /* * HashKit * Copyright (C) 2009 Brian Aker * All rights reserved. * * Use and distribution licensed under the BSD license. See * the COPYING file in the parent directory for full text. */ /* * This has is Jenkin's "One at A time Hash". * http://en.wikipedia.org/wiki/Jenkins_hash_function */ #include uint32_t hash_one_at_a_time(const char *key, size_t key_length) { const char *ptr = key; uint32_t value = 0; while (key_length--) { uint32_t val = (uint32_t) *ptr++; value += val; value += (value << 10); value ^= (value >> 6); } value += (value << 3); value ^= (value >> 11); value += (value << 15); return value; } ================================================ FILE: dep/dhashkit/drandom.c ================================================ #include #include #include #define RANDOM_CONTINUUM_ADDITION 10 /* # extra slots to build into continuum */ #define RANDOM_POINTS_PER_SERVER 1 uint32_t random_dispatch(struct continuum *continuum, uint32_t ncontinuum, uint32_t hash) { struct continuum *c; ASSERT(continuum != NULL); ASSERT(ncontinuum != 0); c = continuum + random() % ncontinuum; return c->index; } ================================================ FILE: dep/dhashkit/dsha1.c ================================================ /* from valgrind tests */ /* ================ sha1.c ================ */ /* SHA-1 in C By Steve Reid 100% Public Domain Test Vectors (from FIPS PUB 180-1) "abc" A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 A million repetitions of "a" 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F */ /* #define LITTLE_ENDIAN * This should be #define'd already, if true. */ /* #define SHA1HANDSOFF * Copies data before messing with it. */ #define SHA1HANDSOFF #include #include #include #include #define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits)))) /* blk0() and blk() perform the initial expand. */ /* I got the idea of expanding during the round function from SSLeay */ #ifdef VR_LITTLE_ENDIAN #define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \ |(rol(block->l[i],8)&0x00FF00FF)) #else #define blk0(i) block->l[i] #endif #define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \ ^block->l[(i+2)&15]^block->l[i&15],1)) /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ #define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30); #define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30); #define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30); #define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30); #define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30); /* Hash a single 512-bit block. This is the core of the algorithm. */ void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]) { uint32_t a, b, c, d, e; typedef union { unsigned char c[64]; uint32_t l[16]; } CHAR64LONG16; #ifdef SHA1HANDSOFF CHAR64LONG16 block[1]; /* use array to appear as a pointer */ memcpy(block, buffer, 64); #else /* The following had better never be used because it causes the * pointer-to-const buffer to be cast into a pointer to non-const. * And the result is written through. I threw a "const" in, hoping * this will cause a diagnostic. */ CHAR64LONG16* block = (const CHAR64LONG16*)buffer; #endif /* Copy context->state[] to working vars */ a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; /* 4 rounds of 20 operations each. Loop unrolled. */ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); /* Add the working vars back into context.state[] */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; /* Wipe variables */ a = b = c = d = e = 0; #ifdef SHA1HANDSOFF memset(block, '\0', sizeof(block)); #endif } /* SHA1Init - Initialize new context */ void SHA1Init(SHA1_CTX* context) { /* SHA1 initialization constants */ context->state[0] = 0x67452301; context->state[1] = 0xEFCDAB89; context->state[2] = 0x98BADCFE; context->state[3] = 0x10325476; context->state[4] = 0xC3D2E1F0; context->count[0] = context->count[1] = 0; } /* Run your data through this. */ void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len) { uint32_t i, j; j = context->count[0]; if ((context->count[0] += len << 3) < j) context->count[1]++; context->count[1] += (len>>29); j = (j >> 3) & 63; if ((j + len) > 63) { memcpy(&context->buffer[j], data, (i = 64-j)); SHA1Transform(context->state, context->buffer); for ( ; i + 63 < len; i += 64) { SHA1Transform(context->state, &data[i]); } j = 0; } else i = 0; memcpy(&context->buffer[j], &data[i], len - i); } /* Add padding and return the message digest. */ void SHA1Final(unsigned char digest[20], SHA1_CTX* context) { unsigned i; unsigned char finalcount[8]; unsigned char c; #if 0 /* untested "improvement" by DHR */ /* Convert context->count to a sequence of bytes * in finalcount. Second element first, but * big-endian order within element. * But we do it all backwards. */ unsigned char *fcp = &finalcount[8]; for (i = 0; i < 2; i++) { uint32_t t = context->count[i]; int j; for (j = 0; j < 4; t >>= 8, j++) *--fcp = (unsigned char) t; } #else for (i = 0; i < 8; i++) { finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ } #endif c = 0200; SHA1Update(context, &c, 1); while ((context->count[0] & 504) != 448) { c = 0000; SHA1Update(context, &c, 1); } SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */ for (i = 0; i < 20; i++) { digest[i] = (unsigned char) ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); } /* Wipe variables */ memset(context, '\0', sizeof(*context)); memset(&finalcount, '\0', sizeof(finalcount)); } /* ================ end of sha1.c ================ */ ================================================ FILE: dep/dlist/Makefile.am ================================================ MAINTAINERCLEANFILES = Makefile.in AM_CPPFLAGS = if !OS_SOLARIS AM_CPPFLAGS += -D_GNU_SOURCE endif AM_CPPFLAGS += -I $(top_srcdir)/dep/util AM_CPPFLAGS += -I $(top_srcdir)/dep/jemalloc-4.2.0/include AM_CPPFLAGS += -I $(top_srcdir)/dep/dmalloc AM_CFLAGS = -Wall -Wshadow AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value noinst_LIBRARIES = libdlist.a noinst_HEADERS = dlist.h dmtqueue.h dlockqueue.h libdlist_a_SOURCES = \ dlist.c dlist.h \ dmtqueue.c dmtqueue.h \ dlockqueue.c dlockqueue.h ================================================ FILE: dep/dlist/dlist.c ================================================ #include #include #include /* Create a new list. The created list can be freed with * AlFreeList(), but private value of every node need to be freed * by the user before to call AlFreeList(). * * On error, NULL is returned. Otherwise the pointer to the new list. */ dlist *dlistCreate(void) { struct dlist *list; if ((list = dalloc(sizeof(*list))) == NULL) return NULL; list->head = list->tail = NULL; list->len = 0; list->dup = NULL; list->free = NULL; list->match = NULL; return list; } /* Free the whole list. * * This function can't fail. */ void dlistRelease(dlist *list) { unsigned long len; dlistNode *current, *next; current = list->head; len = list->len; while(len--) { next = current->next; if (list->free) list->free(current->value); dfree(current); current = next; } dfree(list); } /* Add a new node to the list, to head, containing the specified 'value' * pointer as value. * * On error, NULL is returned and no operation is performed (i.e. the * list remains unaltered). * On success the 'list' pointer you pass to the function is returned. */ dlist *dlistAddNodeHead(dlist *list, void *value) { dlistNode *node; if ((node = dalloc(sizeof(*node))) == NULL) return NULL; node->value = value; if (list->len == 0) { list->head = list->tail = node; node->prev = node->next = NULL; } else { node->prev = NULL; node->next = list->head; list->head->prev = node; list->head = node; } list->len++; return list; } /* Add a new node to the list, to tail, containing the specified 'value' * pointer as value. * * On error, NULL is returned and no operation is performed (i.e. the * list remains unaltered). * On success the 'list' pointer you pass to the function is returned. */ dlist *dlistAddNodeTail(dlist *list, void *value) { dlistNode *node; if ((node = dalloc(sizeof(*node))) == NULL) return NULL; node->value = value; if (list->len == 0) { list->head = list->tail = node; node->prev = node->next = NULL; } else { node->prev = list->tail; node->next = NULL; list->tail->next = node; list->tail = node; } list->len++; return list; } dlist *dlistInsertNode(dlist *list, dlistNode *old_node, void *value, int after) { dlistNode *node; if ((node = dalloc(sizeof(*node))) == NULL) return NULL; node->value = value; if (after) { node->prev = old_node; node->next = old_node->next; if (list->tail == old_node) { list->tail = node; } } else { node->next = old_node; node->prev = old_node->prev; if (list->head == old_node) { list->head = node; } } if (node->prev != NULL) { node->prev->next = node; } if (node->next != NULL) { node->next->prev = node; } list->len++; return list; } /* Remove the specified node from the specified list. * It's up to the caller to free the private value of the node. * * This function can't fail. */ void dlistDelNode(dlist *list, dlistNode *node) { if (node->prev) node->prev->next = node->next; else list->head = node->next; if (node->next) node->next->prev = node->prev; else list->tail = node->prev; if (list->free) list->free(node->value); dfree(node); list->len--; } /* Returns a list iterator 'iter'. After the initialization every * call to dlistNext() will return the next element of the list. * * This function can't fail. */ dlistIter *dlistGetIterator(dlist *list, int direction) { dlistIter *iter; if ((iter = dalloc(sizeof(*iter))) == NULL) return NULL; if (direction == AL_START_HEAD) iter->next = list->head; else iter->next = list->tail; iter->direction = direction; return iter; } /* Release the iterator memory */ void dlistReleaseIterator(dlistIter *iter) { dfree(iter); } /* Create an iterator in the list private iterator structure */ void dlistRewind(dlist *list, dlistIter *li) { li->next = list->head; li->direction = AL_START_HEAD; } void dlistRewindTail(dlist *list, dlistIter *li) { li->next = list->tail; li->direction = AL_START_TAIL; } /* Return the next element of an iterator. * It's valid to remove the currently returned element using * dlistDelNode(), but not to remove other elements. * * The function returns a pointer to the next element of the list, * or NULL if there are no more elements, so the classical usage patter * is: * * iter = dlistGetIterator(list,); * while ((node = dlistNext(iter)) != NULL) { * doSomethingWith(dlistNodeValue(node)); * } * * */ dlistNode *dlistNext(dlistIter *iter) { dlistNode *current = iter->next; if (current != NULL) { if (iter->direction == AL_START_HEAD) iter->next = current->next; else iter->next = current->prev; } return current; } /* Duplicate the whole list. On out of memory NULL is returned. * On success a copy of the original list is returned. * * The 'Dup' method set with listSetDupMethod() function is used * to copy the node value. Otherwise the same pointer value of * the original node is used as value of the copied node. * * The original list both on success or error is never modified. */ dlist *dlistDup(dlist *orig) { dlist *copy; dlistIter iter; dlistNode *node; if ((copy = dlistCreate()) == NULL) return NULL; copy->dup = orig->dup; copy->free = orig->free; copy->match = orig->match; dlistRewind(orig, &iter); while((node = dlistNext(&iter)) != NULL) { void *value; if (copy->dup) { value = copy->dup(node->value); if (value == NULL) { dlistRelease(copy); return NULL; } } else value = node->value; if (dlistAddNodeTail(copy, value) == NULL) { dlistRelease(copy); return NULL; } } return copy; } /* Search the list for a node matching a given key. * The match is performed using the 'match' method * set with listSetMatchMethod(). If no 'match' method * is set, the 'value' pointer of every node is directly * compared with the 'key' pointer. * * On success the first matching node pointer is returned * (search starts from head). If no matching node exists * NULL is returned. */ dlistNode *dlistSearchKey(dlist *list, void *key) { dlistIter iter; dlistNode *node; dlistRewind(list, &iter); while((node = dlistNext(&iter)) != NULL) { if (list->match) { if (list->match(node->value, key)) { return node; } } else { if (key == node->value) { return node; } } } return NULL; } /* Return the element at the specified zero-based index * where 0 is the head, 1 is the element next to head * and so on. Negative integers are used in order to count * from the tail, -1 is the last element, -2 the penultimate * and so on. If the index is out of range NULL is returned. */ dlistNode *dlistIndex(dlist *list, long index) { dlistNode *n; if (index < 0) { index = (-index)-1; n = list->tail; while(index-- && n) n = n->prev; } else { n = list->head; while(index-- && n) n = n->next; } return n; } /* Rotate the list removing the tail node and inserting it to the head. */ void dlistRotate(dlist *list) { dlistNode *tail = list->tail; if (dlistLength(list) <= 1) return; /* Detach current tail */ list->tail = tail->prev; list->tail->next = NULL; /* Move it as head */ list->head->prev = tail; tail->prev = NULL; tail->next = list->head; list->head = tail; } dlist *dlistPush(dlist *list, void *value) { dlistAddNodeTail(list, value); return list; } void *dlistPop(dlist *list) { dlistNode *node; void *value; node = dlistFirst(list); if (node == NULL) { return NULL; } value = dlistNodeValue(node); dlistDelNode(list, node); if (list->free) return NULL; return value; } ================================================ FILE: dep/dlist/dlist.h ================================================ #ifndef _DLIST_H__ #define _DLIST_H__ /* Node, List, and Iterator are the only data structures used currently. */ typedef struct dlistNode { struct dlistNode *prev; struct dlistNode *next; void *value; } dlistNode; typedef struct dlistIter { dlistNode *next; int direction; } dlistIter; typedef struct dlist { dlistNode *head; dlistNode *tail; void *(*dup)(void *ptr); void (*free)(void *ptr); int (*match)(void *ptr, void *key); unsigned long len; } dlist; /* Functions implemented as macros */ #define dlistLength(l) ((l)->len) #define dlistFirst(l) ((l)->head) #define dlistLast(l) ((l)->tail) #define dlistPrevNode(n) ((n)->prev) #define dlistNextNode(n) ((n)->next) #define dlistNodeValue(n) ((n)->value) #define dlistSetDupMethod(l,m) ((l)->dup = (m)) #define dlistSetFreeMethod(l,m) ((l)->free = (m)) #define dlistSetMatchMethod(l,m) ((l)->match = (m)) #define dlistGetDupMethod(l) ((l)->dup) #define dlistGetFree(l) ((l)->free) #define dlistGetMatchMethod(l) ((l)->match) /* Prototypes */ dlist *dlistCreate(void); void dlistRelease(dlist *list); dlist *dlistAddNodeHead(dlist *list, void *value); dlist *dlistAddNodeTail(dlist *list, void *value); dlist *dlistInsertNode(dlist *list, dlistNode *old_node, void *value, int after); void dlistDelNode(dlist *list, dlistNode *node); dlistIter *dlistGetIterator(dlist *list, int direction); dlistNode *dlistNext(dlistIter *iter); void dlistReleaseIterator(dlistIter *iter); dlist *dlistDup(dlist *orig); dlistNode *dlistSearchKey(dlist *list, void *key); dlistNode *dlistIndex(dlist *list, long index); void dlistRewind(dlist *list, dlistIter *li); void dlistRewindTail(dlist *list, dlistIter *li); void dlistRotate(dlist *list); dlist *dlistPush(dlist *list, void *value); void *dlistPop(dlist *list); /* Directions for iterators */ #define AL_START_HEAD 0 #define AL_START_TAIL 1 #endif /* __ADLIST_H__ */ ================================================ FILE: dep/dlist/dlockqueue.c ================================================ #include #include #include #include #include #include #include dlockqueue *dlockqueue_create(void) { dlockqueue *lqueue; lqueue = dalloc(sizeof(*lqueue)); if (lqueue == NULL) { return NULL; } lqueue->maxlen = -1; lqueue->maxlen_policy = MAX_LENGTH_POLICY_REJECT; pthread_mutex_init(&lqueue->lmutex,NULL); lqueue->l = dlistCreate(); if (lqueue->l == NULL) { dlockqueue_destroy(lqueue); return NULL; } return lqueue; } long long dlockqueue_push(void *q, void *value) { dlockqueue *lqueue = q; dlist *list; long long length; pthread_mutex_lock(&lqueue->lmutex); length = (long long)dlistLength(lqueue->l); if (lqueue->maxlen >0 && length >= lqueue->maxlen) { if (lqueue->maxlen_policy == MAX_LENGTH_POLICY_REJECT) { length = -1; } else if (lqueue->maxlen_policy == MAX_LENGTH_POLICY_EVICT_HEAD) { while (length >= lqueue->maxlen) { dlistNode *ln = dlistFirst(lqueue->l); dlistDelNode(lqueue->l,ln); length = (long long)dlistLength(lqueue->l); } list = dlistAddNodeTail(lqueue->l, value); length ++; } else if (lqueue->maxlen_policy == MAX_LENGTH_POLICY_EVICT_END) { while (length >= lqueue->maxlen) { dlistNode *ln = dlistLast(lqueue->l); dlistDelNode(lqueue->l,ln); length = (long long)dlistLength(lqueue->l); } list = dlistAddNodeTail(lqueue->l, value); length ++; } } else { list = dlistAddNodeTail(lqueue->l, value); length ++; } pthread_mutex_unlock(&lqueue->lmutex); if (list == NULL) { return -1; } return length; } void *dlockqueue_pop(void *q) { dlockqueue *lqueue = q; dlistNode *node; void *value; if (lqueue == NULL || lqueue->l == NULL) { return NULL; } pthread_mutex_lock(&lqueue->lmutex); node = dlistFirst(lqueue->l); if (node == NULL) { pthread_mutex_unlock(&lqueue->lmutex); return NULL; } value = dlistNodeValue(node); dlistDelNode(lqueue->l, node); pthread_mutex_unlock(&lqueue->lmutex); return value; } void dlockqueue_destroy(void *q) { dlockqueue *lqueue = q; if (lqueue == NULL) { return; } if (lqueue->l != NULL) { dlistRelease(lqueue->l); } pthread_mutex_destroy(&lqueue->lmutex); dfree(lqueue); } long long dlockqueue_length(void *q) { dlockqueue *lqueue = q; long long length; if (lqueue == NULL || lqueue->l == NULL) { return -1; } pthread_mutex_lock(&lqueue->lmutex); length = dlistLength(lqueue->l); pthread_mutex_unlock(&lqueue->lmutex); return length; } ================================================ FILE: dep/dlist/dlockqueue.h ================================================ #ifndef _DLOCKQUEUE_H_ #define _DLOCKQUEUE_H_ struct dlist; typedef struct dlockqueue{ struct dlist *l; long long maxlen; int maxlen_policy; pthread_mutex_t lmutex; } dlockqueue; dlockqueue *dlockqueue_create(void); long long dlockqueue_push(void *q, void *value); void *dlockqueue_pop(void *q); void dlockqueue_destroy(void *q); long long dlockqueue_length(void *q); #endif ================================================ FILE: dep/dlist/dmtqueue.c ================================================ #include #include #include #include #include /******** multi-thread safe queue interface ********/ dmtqueue *dmtqueue_create(void) { dmtqueue *q; q = dalloc(sizeof(*q)); if (q == NULL) { return NULL; } q->l = NULL; q->lock_push = NULL; q->lock_pop = NULL; q->destroy = NULL; q->length = NULL; return q; } void dmtqueue_destroy(dmtqueue *q) { if (q == NULL) { return; } if (q->destroy) { q->destroy(q->l); } dfree(q); } long long dmtqueue_push(dmtqueue *q, void *value) { if(q == NULL || q->l == NULL || q->lock_push == NULL) { return -1; } return q->lock_push(q->l, value); } void *dmtqueue_pop(dmtqueue *q) { if(q == NULL || q->l == NULL || q->lock_pop == NULL) { return NULL; } return q->lock_pop(q->l); } int dmtqueue_empty(dmtqueue *q) { if(q == NULL || q->l == NULL || q->length == NULL) { return -1; } if(q->length(q->l) > 0) { return 0; } return 1; } long long dmtqueue_length(dmtqueue *q) { if(q == NULL || q->l == NULL || q->length == NULL) { return -1; } return q->length(q->l); } /******** multi-thread safe queue implement ********/ /** * This is multi-thread safe queue. * This lock list's performance is not good, but it is safe. */ int dmtqueue_init_with_lockqueue(dmtqueue *q, dlockqueue_freefunc freefunc) { dlockqueue *lq; if (q == NULL) { return -1; } lq = dlockqueue_create(); if (lq == NULL) { return -1; } lq->l->free = freefunc; q->l = lq; q->lock_push = dlockqueue_push; q->lock_pop = dlockqueue_pop; q->destroy = dlockqueue_destroy; q->length = dlockqueue_length; return 0; } ================================================ FILE: dep/dlist/dmtqueue.h ================================================ #ifndef _DMTQUEUE_H_ #define _DMTQUEUE_H_ #define MAX_LENGTH_POLICY_REJECT 0 #define MAX_LENGTH_POLICY_EVICT_HEAD 1 #define MAX_LENGTH_POLICY_EVICT_END 2 /* Multi-thread safe queue */ typedef struct dmtqueue{ void *l; long long (*lock_push)(void *q, void *value); void *(*lock_pop)(void *q); void (*destroy)(void *q); long long (*length)(void *q); } dmtqueue; #define dmtqueueSetMaxlength(q,l) ((q)->l->maxlen = (l)) #define dmtqueueSetMaxlengthPolicy(q,p) ((q)->l->maxlen = (p)) typedef int (*dmtqueue_init)(dmtqueue *); /******** multi-thread safe list interface ********/ dmtqueue *dmtqueue_create(void); void dmtqueue_destroy(dmtqueue *q); long long dmtqueue_push(dmtqueue *q, void *value); void *dmtqueue_pop(dmtqueue *q); int dmtqueue_empty(dmtqueue *q); long long dmtqueue_length(dmtqueue *q); /******** multi-thread safe list implement ********/ typedef void (*dlockqueue_freefunc)(void *); int dmtqueue_init_with_lockqueue(dmtqueue *l, dlockqueue_freefunc freefunc); #endif ================================================ FILE: dep/dmalloc/Makefile.am ================================================ MAINTAINERCLEANFILES = Makefile.in AM_CPPFLAGS = if !OS_SOLARIS AM_CPPFLAGS += -D_GNU_SOURCE endif AM_CPPFLAGS += -I $(top_srcdir)/dep/jemalloc-4.2.0/include AM_CPPFLAGS += -I $(top_srcdir)/dep/util AM_CFLAGS = -Wall -Wshadow AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value noinst_LIBRARIES = libdmalloc.a noinst_HEADERS = dmalloc.h libdmalloc_a_SOURCES = \ dmalloc.c dmalloc.h ================================================ FILE: dep/dmalloc/dmalloc.c ================================================ #include #include #include #include #include #include #include #include #include /*memory api*/ static size_t used_memory = 0; pthread_mutex_t used_memory_mutex = PTHREAD_MUTEX_INITIALIZER; #if defined(__ATOMIC_RELAXED) #define update_used_mem_stat_add(__n) __atomic_add_fetch(&used_memory, (__n), __ATOMIC_RELAXED) #define update_used_mem_stat_sub(__n) __atomic_sub_fetch(&used_memory, (__n), __ATOMIC_RELAXED) char *malloc_lock_type(void) {return "__ATOMIC_RELAXED";} #elif defined(HAVE_ATOMIC) #define update_used_mem_stat_add(__n) __sync_add_and_fetch(&used_memory, (__n)) #define update_used_mem_stat_sub(__n) __sync_sub_and_fetch(&used_memory, (__n)) char *malloc_lock_type(void) {return "HAVE_ATOMIC";} #else #define update_used_mem_stat_add(__n) do { \ pthread_mutex_lock(&used_memory_mutex); \ used_memory += (__n); \ pthread_mutex_unlock(&used_memory_mutex); \ } while(0) #define update_used_mem_stat_sub(__n) do { \ pthread_mutex_lock(&used_memory_mutex); \ used_memory -= (__n); \ pthread_mutex_unlock(&used_memory_mutex); \ } while(0) char *malloc_lock_type(void) {return "pthread_mutex_t";} #endif #define update_dmalloc_stat_alloc(__n) do { \ size_t _n = (__n); \ if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \ update_used_mem_stat_add(_n); \ } while(0) #define update_dmalloc_stat_free(__n) do { \ size_t _n = (__n); \ if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \ update_used_mem_stat_sub(_n); \ } while(0) #ifdef HAVE_MALLOC_SIZE #define PREFIX_SIZE (0) #else #if defined(__sun) || defined(__sparc) || defined(__sparc__) #define PREFIX_SIZE (sizeof(long long)) #else #define PREFIX_SIZE (sizeof(size_t)) #endif #endif /* Provide dmalloc_size() for systems where this function is not provided by * malloc itself, given that in that case we store a header with this * information as the first bytes of every allocation. */ #ifndef HAVE_MALLOC_SIZE size_t dmalloc_size(void *ptr) { void *realptr = (char*)ptr-PREFIX_SIZE; size_t size = *((size_t*)realptr); /* Assume at least that all the allocations are padded at sizeof(long) by * the underlying allocator. */ if (size&(sizeof(long)-1)) size += sizeof(long)-(size&(sizeof(long)-1)); return size+PREFIX_SIZE; } #endif void * _dalloc(size_t size, const char *name, int line) { void *p; ASSERT(size != 0); #ifdef DUSE_JEMALLOC p = je_malloc(size+PREFIX_SIZE); #else p = malloc(size+PREFIX_SIZE); #endif if (p == NULL) { log_error("malloc(%zu) failed @ %s:%d", size, name, line); } else { #ifdef HAVE_MALLOC_SIZE update_dmalloc_stat_alloc(dmalloc_size(p)); return p; #else *((size_t*)p) = size; update_dmalloc_stat_alloc(size+PREFIX_SIZE); return (char*)p+PREFIX_SIZE; #endif log_debug(LOG_VVERB, "malloc(%zu) at %p @ %s:%d", size, p, name, line); } return p; } void * _dzalloc(size_t size, const char *name, int line) { void *p; p = _dalloc(size, name, line); if (p != NULL) { memset(p, 0, size); } return p; } void * _dcalloc(size_t nmemb, size_t size, const char *name, int line) { return _dzalloc(nmemb * size, name, line); } void * _drealloc(void *ptr, size_t size, const char *name, int line) { #ifndef HAVE_MALLOC_SIZE void *realp; #endif void *p; size_t oldsize; ASSERT(size != 0); if (ptr == NULL) return _dalloc(size, name, line); #ifdef HAVE_MALLOC_SIZE oldsize = dmalloc_size(ptr); #ifdef DUSE_JEMALLOC p = je_realloc(ptr, size); #else p = realloc(ptr, size); #endif #else realp = (char*)ptr-PREFIX_SIZE; oldsize = *((size_t*)realp); #ifdef DUSE_JEMALLOC p = je_realloc(ptr, size+PREFIX_SIZE); #else p = realloc(ptr, size+PREFIX_SIZE); #endif #endif if (p == NULL) { log_error("realloc(%zu) failed @ %s:%d", size, name, line); return NULL; } else { log_debug(LOG_VVERB, "realloc(%zu) at %p @ %s:%d", size, p, name, line); #ifdef HAVE_MALLOC_SIZE update_dmalloc_stat_free(oldsize); update_dmalloc_stat_alloc(dmalloc_size(p)); return p; #else *((size_t*)p) = size; update_dmalloc_stat_free(oldsize); update_dmalloc_stat_alloc(size); return p+PREFIX_SIZE; #endif } return NULL; } void _dfree(void *ptr, const char *name, int line) { #ifndef HAVE_MALLOC_SIZE void *realp; size_t oldsize; #endif ASSERT(ptr != NULL); log_debug(LOG_VVERB, "free(%p) @ %s:%d", ptr, name, line); #ifdef HAVE_MALLOC_SIZE update_dmalloc_stat_free(dmalloc_size(ptr)); #ifdef DUSE_JEMALLOC je_free(ptr); #else free(ptr); #endif #else realp = (char*)ptr-PREFIX_SIZE; oldsize = *((size_t*)realp); update_dmalloc_stat_free(oldsize+PREFIX_SIZE); free(realp); #ifdef DUSE_JEMALLOC je_free(realp); #else free(realp); #endif #endif } size_t dalloc_used_memory(void) { size_t um; #if defined(__ATOMIC_RELAXED) || defined(HAVE_ATOMIC) um = update_used_mem_stat_add(0); #else pthread_mutex_lock(&used_memory_mutex); um = used_memory; pthread_mutex_unlock(&used_memory_mutex); #endif return um; } /* Returns the size of physical memory (RAM) in bytes. * It looks ugly, but this is the cleanest way to achive cross platform results. * Cleaned up from: * * http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system * * Note that this function: * 1) Was released under the following CC attribution license: * http://creativecommons.org/licenses/by/3.0/deed.en_US. * 2) Was originally implemented by David Robert Nadeau. * 3) Was modified for Redis by Matt Stancliff. * 4) This note exists in order to comply with the original license. */ size_t dalloc_get_memory_size(void) { #if defined(__unix__) || defined(__unix) || defined(unix) || \ (defined(__APPLE__) && defined(__MACH__)) #if defined(CTL_HW) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64)) int mib[2]; mib[0] = CTL_HW; #if defined(HW_MEMSIZE) mib[1] = HW_MEMSIZE; /* OSX. --------------------- */ #elif defined(HW_PHYSMEM64) mib[1] = HW_PHYSMEM64; /* NetBSD, OpenBSD. --------- */ #endif int64_t size = 0; /* 64-bit */ size_t len = sizeof(size); if (sysctl( mib, 2, &size, &len, NULL, 0) == 0) return (size_t)size; return 0L; /* Failed? */ #elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE) /* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */ return (size_t)sysconf(_SC_PHYS_PAGES) * (size_t)sysconf(_SC_PAGESIZE); #elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM)) /* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */ int mib[2]; mib[0] = CTL_HW; #if defined(HW_REALMEM) mib[1] = HW_REALMEM; /* FreeBSD. ----------------- */ #elif defined(HW_PYSMEM) mib[1] = HW_PHYSMEM; /* Others. ------------------ */ #endif unsigned int size = 0; /* 32-bit */ size_t len = sizeof(size); if (sysctl(mib, 2, &size, &len, NULL, 0) == 0) return (size_t)size; return 0L; /* Failed? */ #endif /* sysctl and sysconf variants */ #else return 0L; /* Unknown OS. */ #endif } /* Get the RSS information in an OS-specific way. * * WARNING: the function zmalloc_get_rss() is not designed to be fast * and may not be called in the busy loops where Redis tries to release * memory expiring or swapping out objects. * * For this kind of "fast RSS reporting" usages use instead the * function RedisEstimateRSS() that is a much faster (and less precise) * version of the function. */ #if defined(HAVE_PROC_STAT) #include #include #include #include size_t dalloc_get_rss(void) { int page = sysconf(_SC_PAGESIZE); size_t rss; char buf[4096]; char filename[256]; int fd, count; char *p, *x; snprintf(filename,256,"/proc/%d/stat",getpid()); if ((fd = open(filename,O_RDONLY)) == -1) return 0; if (read(fd,buf,4096) <= 0) { close(fd); return 0; } close(fd); p = buf; count = 23; /* RSS is the 24th field in /proc//stat */ while(p && count--) { p = strchr(p,' '); if (p) p++; } if (!p) return 0; x = strchr(p,' '); if (!x) return 0; *x = '\0'; rss = strtoll(p,NULL,10); rss *= page; return rss; } #elif defined(HAVE_TASKINFO) #include #include #include #include #include #include #include size_t dalloc_get_rss(void) { task_t task = MACH_PORT_NULL; struct task_basic_info t_info; mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; if (task_for_pid(current_task(), getpid(), &task) != KERN_SUCCESS) return 0; task_info(task, TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count); return t_info.resident_size; } #else size_t dalloc_get_rss(void) { /* If we can't get the RSS in an OS-specific way for this system just * return the memory usage we estimated in dalloc().. * * Fragmentation will appear to be always 1 (no fragmentation) * of course... */ return dalloc_used_memory(); } #endif /* Fragmentation = RSS / allocated-bytes */ float dalloc_get_fragmentation_ratio(size_t rss) { return (float)rss/dalloc_used_memory(); } ================================================ FILE: dep/dmalloc/dmalloc.h ================================================ #ifndef _DMALLOC_H_ #define _DMALLOC_H_ #ifdef HAVE_CONFIG_H # include #endif #include #ifdef HAVE_JEMALLOC # define DUSE_JEMALLOC 1 #endif /* * Memory allocation and free wrappers. * * These wrappers enables us to loosely detect double free, dangling * pointer access and zero-byte alloc. */ #if defined(DUSE_JEMALLOC) #define DMALLOC_LIB ("jemalloc-" __xstr(JEMALLOC_VERSION_MAJOR) "." __xstr(JEMALLOC_VERSION_MINOR) "." __xstr(JEMALLOC_VERSION_BUGFIX)) #include #if (JEMALLOC_VERSION_MAJOR == 2 && JEMALLOC_VERSION_MINOR >= 1) || (JEMALLOC_VERSION_MAJOR > 2) #define HAVE_MALLOC_SIZE 1 #define dmalloc_size(p) je_malloc_usable_size(p) #else #error "Newer version of jemalloc required" #endif #elif defined(__APPLE__) #include #define HAVE_MALLOC_SIZE 1 #define dmalloc_size(p) malloc_size(p) #endif #ifndef DMALLOC_LIB #define DMALLOC_LIB "libc" #endif #define dalloc(_s) \ _dalloc((size_t)(_s), __FILE__, __LINE__) #define dzalloc(_s) \ _dzalloc((size_t)(_s), __FILE__, __LINE__) #define dcalloc(_n, _s) \ _dcalloc((size_t)(_n), (size_t)(_s), __FILE__, __LINE__) #define drealloc(_p, _s) \ _drealloc(_p, (size_t)(_s), __FILE__, __LINE__) #define dfree(_p) do { \ _dfree(_p, __FILE__, __LINE__); \ } while (0) char *dmalloc_lock_type(void); #ifndef HAVE_MALLOC_SIZE size_t dmalloc_size(void *ptr); #endif void *_dalloc(size_t size, const char *name, int line); void *_dzalloc(size_t size, const char *name, int line); void *_dcalloc(size_t nmemb, size_t size, const char *name, int line); void *_drealloc(void *ptr, size_t size, const char *name, int line); void _dfree(void *ptr, const char *name, int line); size_t dalloc_used_memory(void); size_t dalloc_get_memory_size(void); size_t dalloc_get_rss(void); float dalloc_get_fragmentation_ratio(size_t rss); #endif ================================================ FILE: dep/himemcached-0.1.0/Makefile.am ================================================ MAINTAINERCLEANFILES = Makefile.in AM_CFLAGS = -Wall -Wshadow AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value noinst_LIBRARIES = libhimemcached.a noinst_HEADERS = himemcached.h himcread.h himcdep/sds.h libhimemcached_a_SOURCES = \ himcdep/sds.c himcdep/sds.h \ himcread.c himcread.h \ himemcached.c himemcached.h ================================================ FILE: dep/himemcached-0.1.0/himcdep/sds.c ================================================ /* SDS (Simple Dynamic Strings), A C dynamic strings library. * * Copyright (c) 2006-2014, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include "sds.h" /* Create a new sds string with the content specified by the 'init' pointer * and 'initlen'. * If NULL is used for 'init' the string is initialized with zero bytes. * * The string is always null-termined (all the sds strings are, always) so * even if you create an sds string with: * * mystring = sdsnewlen("abc",3"); * * You can print the string with printf() as there is an implicit \0 at the * end of the string. However the string is binary safe and can contain * \0 characters in the middle, as the length is stored in the sds header. */ sds sdsnewlen(const void *init, size_t initlen) { struct sdshdr *sh; if (init) { sh = malloc(sizeof *sh+initlen+1); } else { sh = calloc(sizeof *sh+initlen+1,1); } if (sh == NULL) return NULL; sh->len = initlen; sh->free = 0; if (initlen && init) memcpy(sh->buf, init, initlen); sh->buf[initlen] = '\0'; return (char*)sh->buf; } /* Create an empty (zero length) sds string. Even in this case the string * always has an implicit null term. */ sds sdsempty(void) { return sdsnewlen("",0); } /* Create a new sds string starting from a null termined C string. */ sds sdsnew(const char *init) { size_t initlen = (init == NULL) ? 0 : strlen(init); return sdsnewlen(init, initlen); } /* Duplicate an sds string. */ sds sdsdup(const sds s) { return sdsnewlen(s, sdslen(s)); } /* Free an sds string. No operation is performed if 's' is NULL. */ void sdsfree(sds s) { if (s == NULL) return; free(s-sizeof(struct sdshdr)); } /* Set the sds string length to the length as obtained with strlen(), so * considering as content only up to the first null term character. * * This function is useful when the sds string is hacked manually in some * way, like in the following example: * * s = sdsnew("foobar"); * s[2] = '\0'; * sdsupdatelen(s); * printf("%d\n", sdslen(s)); * * The output will be "2", but if we comment out the call to sdsupdatelen() * the output will be "6" as the string was modified but the logical length * remains 6 bytes. */ void sdsupdatelen(sds s) { struct sdshdr *sh = (void*) (s-sizeof *sh); int reallen = strlen(s); sh->free += (sh->len-reallen); sh->len = reallen; } /* Modify an sds string on-place to make it empty (zero length). * However all the existing buffer is not discarded but set as free space * so that next append operations will not require allocations up to the * number of bytes previously available. */ void sdsclear(sds s) { struct sdshdr *sh = (void*) (s-sizeof *sh); sh->free += sh->len; sh->len = 0; sh->buf[0] = '\0'; } /* Enlarge the free space at the end of the sds string so that the caller * is sure that after calling this function can overwrite up to addlen * bytes after the end of the string, plus one more byte for nul term. * * Note: this does not change the *length* of the sds string as returned * by sdslen(), but only the free buffer space we have. */ sds sdsMakeRoomFor(sds s, size_t addlen) { struct sdshdr *sh, *newsh; size_t free = sdsavail(s); size_t len, newlen; if (free >= addlen) return s; len = sdslen(s); sh = (void*) (s-sizeof *sh); newlen = (len+addlen); if (newlen < SDS_MAX_PREALLOC) newlen *= 2; else newlen += SDS_MAX_PREALLOC; newsh = realloc(sh, sizeof *newsh+newlen+1); if (newsh == NULL) return NULL; newsh->free = newlen - len; return newsh->buf; } /* Reallocate the sds string so that it has no free space at the end. The * contained string remains not altered, but next concatenation operations * will require a reallocation. * * After the call, the passed sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. */ sds sdsRemoveFreeSpace(sds s) { struct sdshdr *sh; sh = (void*) (s-sizeof *sh); sh = realloc(sh, sizeof *sh+sh->len+1); sh->free = 0; return sh->buf; } /* Return the total size of the allocation of the specifed sds string, * including: * 1) The sds header before the pointer. * 2) The string. * 3) The free buffer at the end if any. * 4) The implicit null term. */ size_t sdsAllocSize(sds s) { struct sdshdr *sh = (void*) (s-sizeof *sh); return sizeof(*sh)+sh->len+sh->free+1; } /* Increment the sds length and decrements the left free space at the * end of the string according to 'incr'. Also set the null term * in the new end of the string. * * This function is used in order to fix the string length after the * user calls sdsMakeRoomFor(), writes something after the end of * the current string, and finally needs to set the new length. * * Note: it is possible to use a negative increment in order to * right-trim the string. * * Usage example: * * Using sdsIncrLen() and sdsMakeRoomFor() it is possible to mount the * following schema, to cat bytes coming from the kernel to the end of an * sds string without copying into an intermediate buffer: * * oldlen = sdslen(s); * s = sdsMakeRoomFor(s, BUFFER_SIZE); * nread = read(fd, s+oldlen, BUFFER_SIZE); * ... check for nread <= 0 and handle it ... * sdsIncrLen(s, nread); */ void sdsIncrLen(sds s, int incr) { struct sdshdr *sh = (void*) (s-sizeof *sh); assert(sh->free >= incr); sh->len += incr; sh->free -= incr; assert(sh->free >= 0); s[sh->len] = '\0'; } /* Grow the sds to have the specified length. Bytes that were not part of * the original length of the sds will be set to zero. * * if the specified length is smaller than the current length, no operation * is performed. */ sds sdsgrowzero(sds s, size_t len) { struct sdshdr *sh = (void*) (s-sizeof *sh); size_t totlen, curlen = sh->len; if (len <= curlen) return s; s = sdsMakeRoomFor(s,len-curlen); if (s == NULL) return NULL; /* Make sure added region doesn't contain garbage */ sh = (void*)(s-sizeof *sh); memset(s+curlen,0,(len-curlen+1)); /* also set trailing \0 byte */ totlen = sh->len+sh->free; sh->len = len; sh->free = totlen-sh->len; return s; } /* Append the specified binary-safe string pointed by 't' of 'len' bytes to the * end of the specified sds string 's'. * * After the call, the passed sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. */ sds sdscatlen(sds s, const void *t, size_t len) { struct sdshdr *sh; size_t curlen = sdslen(s); s = sdsMakeRoomFor(s,len); if (s == NULL) return NULL; sh = (void*) (s-sizeof *sh); memcpy(s+curlen, t, len); sh->len = curlen+len; sh->free = sh->free-len; s[curlen+len] = '\0'; return s; } /* Append the specified null termianted C string to the sds string 's'. * * After the call, the passed sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. */ sds sdscat(sds s, const char *t) { return sdscatlen(s, t, strlen(t)); } /* Append the specified sds 't' to the existing sds 's'. * * After the call, the modified sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. */ sds sdscatsds(sds s, const sds t) { return sdscatlen(s, t, sdslen(t)); } /* Destructively modify the sds string 's' to hold the specified binary * safe string pointed by 't' of length 'len' bytes. */ sds sdscpylen(sds s, const char *t, size_t len) { struct sdshdr *sh = (void*) (s-sizeof *sh); size_t totlen = sh->free+sh->len; if (totlen < len) { s = sdsMakeRoomFor(s,len-sh->len); if (s == NULL) return NULL; sh = (void*) (s-sizeof *sh); totlen = sh->free+sh->len; } memcpy(s, t, len); s[len] = '\0'; sh->len = len; sh->free = totlen-len; return s; } /* Like sdscpylen() but 't' must be a null-termined string so that the length * of the string is obtained with strlen(). */ sds sdscpy(sds s, const char *t) { return sdscpylen(s, t, strlen(t)); } /* Helper for sdscatlonglong() doing the actual number -> string * conversion. 's' must point to a string with room for at least * SDS_LLSTR_SIZE bytes. * * The function returns the lenght of the null-terminated string * representation stored at 's'. */ #define SDS_LLSTR_SIZE 21 int sdsll2str(char *s, long long value) { char *p, aux; unsigned long long v; size_t l; /* Generate the string representation, this method produces * an reversed string. */ v = (value < 0) ? -value : value; p = s; do { *p++ = '0'+(v%10); v /= 10; } while(v); if (value < 0) *p++ = '-'; /* Compute length and add null term. */ l = p-s; *p = '\0'; /* Reverse the string. */ p--; while(s < p) { aux = *s; *s = *p; *p = aux; s++; p--; } return l; } /* Identical sdsll2str(), but for unsigned long long type. */ int sdsull2str(char *s, unsigned long long v) { char *p, aux; size_t l; /* Generate the string representation, this method produces * an reversed string. */ p = s; do { *p++ = '0'+(v%10); v /= 10; } while(v); /* Compute length and add null term. */ l = p-s; *p = '\0'; /* Reverse the string. */ p--; while(s < p) { aux = *s; *s = *p; *p = aux; s++; p--; } return l; } /* Like sdscatpritf() but gets va_list instead of being variadic. */ sds sdscatvprintf(sds s, const char *fmt, va_list ap) { va_list cpy; char *buf, *t; size_t buflen = 16; while(1) { buf = malloc(buflen); if (buf == NULL) return NULL; buf[buflen-2] = '\0'; va_copy(cpy,ap); vsnprintf(buf, buflen, fmt, cpy); if (buf[buflen-2] != '\0') { free(buf); buflen *= 2; continue; } break; } t = sdscat(s, buf); free(buf); return t; } /* Append to the sds string 's' a string obtained using printf-alike format * specifier. * * After the call, the modified sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. * * Example: * * s = sdsnew("Sum is: "); * s = sdscatprintf(s,"%d+%d = %d",a,b,a+b); * * Often you need to create a string from scratch with the printf-alike * format. When this is the need, just use sdsempty() as the target string: * * s = sdscatprintf(sdsempty(), "... your format ...", args); */ sds sdscatprintf(sds s, const char *fmt, ...) { va_list ap; char *t; va_start(ap, fmt); t = sdscatvprintf(s,fmt,ap); va_end(ap); return t; } /* This function is similar to sdscatprintf, but much faster as it does * not rely on sprintf() family functions implemented by the libc that * are often very slow. Moreover directly handling the sds string as * new data is concatenated provides a performance improvement. * * However this function only handles an incompatible subset of printf-alike * format specifiers: * * %s - C String * %S - SDS string * %i - signed int * %I - 64 bit signed integer (long long, int64_t) * %u - unsigned int * %U - 64 bit unsigned integer (unsigned long long, uint64_t) * %T - A size_t variable. * %% - Verbatim "%" character. */ sds sdscatfmt(sds s, char const *fmt, ...) { struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr))); size_t initlen = sdslen(s); const char *f = fmt; int i; va_list ap; va_start(ap,fmt); f = fmt; /* Next format specifier byte to process. */ i = initlen; /* Position of the next byte to write to dest str. */ while(*f) { char next, *str; int l; long long num; unsigned long long unum; /* Make sure there is always space for at least 1 char. */ if (sh->free == 0) { s = sdsMakeRoomFor(s,1); sh = (void*) (s-(sizeof(struct sdshdr))); } switch(*f) { case '%': next = *(f+1); f++; switch(next) { case 's': case 'S': str = va_arg(ap,char*); l = (next == 's') ? strlen(str) : sdslen(str); if (sh->free < l) { s = sdsMakeRoomFor(s,l); sh = (void*) (s-(sizeof(struct sdshdr))); } memcpy(s+i,str,l); sh->len += l; sh->free -= l; i += l; break; case 'i': case 'I': if (next == 'i') num = va_arg(ap,int); else num = va_arg(ap,long long); { char buf[SDS_LLSTR_SIZE]; l = sdsll2str(buf,num); if (sh->free < l) { s = sdsMakeRoomFor(s,l); sh = (void*) (s-(sizeof(struct sdshdr))); } memcpy(s+i,buf,l); sh->len += l; sh->free -= l; i += l; } break; case 'u': case 'U': case 'T': if (next == 'u') unum = va_arg(ap,unsigned int); else if(next == 'U') unum = va_arg(ap,unsigned long long); else unum = (unsigned long long)va_arg(ap,size_t); { char buf[SDS_LLSTR_SIZE]; l = sdsull2str(buf,unum); if (sh->free < l) { s = sdsMakeRoomFor(s,l); sh = (void*) (s-(sizeof(struct sdshdr))); } memcpy(s+i,buf,l); sh->len += l; sh->free -= l; i += l; } break; default: /* Handle %% and generally %. */ s[i++] = next; sh->len += 1; sh->free -= 1; break; } break; default: s[i++] = *f; sh->len += 1; sh->free -= 1; break; } f++; } va_end(ap); /* Add null-term */ s[i] = '\0'; return s; } /* Remove the part of the string from left and from right composed just of * contiguous characters found in 'cset', that is a null terminted C string. * * After the call, the modified sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. * * Example: * * s = sdsnew("AA...AA.a.aa.aHelloWorld :::"); * s = sdstrim(s,"A. :"); * printf("%s\n", s); * * Output will be just "Hello World". */ void sdstrim(sds s, const char *cset) { struct sdshdr *sh = (void*) (s-sizeof *sh); char *start, *end, *sp, *ep; size_t len; sp = start = s; ep = end = s+sdslen(s)-1; while(sp <= end && strchr(cset, *sp)) sp++; while(ep > start && strchr(cset, *ep)) ep--; len = (sp > ep) ? 0 : ((ep-sp)+1); if (sh->buf != sp) memmove(sh->buf, sp, len); sh->buf[len] = '\0'; sh->free = sh->free+(sh->len-len); sh->len = len; } /* Turn the string into a smaller (or equal) string containing only the * substring specified by the 'start' and 'end' indexes. * * start and end can be negative, where -1 means the last character of the * string, -2 the penultimate character, and so forth. * * The interval is inclusive, so the start and end characters will be part * of the resulting string. * * The string is modified in-place. * * Example: * * s = sdsnew("Hello World"); * sdsrange(s,1,-1); => "ello World" */ void sdsrange(sds s, int start, int end) { struct sdshdr *sh = (void*) (s-sizeof *sh); size_t newlen, len = sdslen(s); if (len == 0) return; if (start < 0) { start = len+start; if (start < 0) start = 0; } if (end < 0) { end = len+end; if (end < 0) end = 0; } newlen = (start > end) ? 0 : (end-start)+1; if (newlen != 0) { if (start >= (signed)len) { newlen = 0; } else if (end >= (signed)len) { end = len-1; newlen = (start > end) ? 0 : (end-start)+1; } } else { start = 0; } if (start && newlen) memmove(sh->buf, sh->buf+start, newlen); sh->buf[newlen] = 0; sh->free = sh->free+(sh->len-newlen); sh->len = newlen; } /* Apply tolower() to every character of the sds string 's'. */ void sdstolower(sds s) { int len = sdslen(s), j; for (j = 0; j < len; j++) s[j] = tolower(s[j]); } /* Apply toupper() to every character of the sds string 's'. */ void sdstoupper(sds s) { int len = sdslen(s), j; for (j = 0; j < len; j++) s[j] = toupper(s[j]); } /* Compare two sds strings s1 and s2 with memcmp(). * * Return value: * * 1 if s1 > s2. * -1 if s1 < s2. * 0 if s1 and s2 are exactly the same binary string. * * If two strings share exactly the same prefix, but one of the two has * additional characters, the longer string is considered to be greater than * the smaller one. */ int sdscmp(const sds s1, const sds s2) { size_t l1, l2, minlen; int cmp; l1 = sdslen(s1); l2 = sdslen(s2); minlen = (l1 < l2) ? l1 : l2; cmp = memcmp(s1,s2,minlen); if (cmp == 0) return l1-l2; return cmp; } /* Split 's' with separator in 'sep'. An array * of sds strings is returned. *count will be set * by reference to the number of tokens returned. * * On out of memory, zero length string, zero length * separator, NULL is returned. * * Note that 'sep' is able to split a string using * a multi-character separator. For example * sdssplit("foo_-_bar","_-_"); will return two * elements "foo" and "bar". * * This version of the function is binary-safe but * requires length arguments. sdssplit() is just the * same function but for zero-terminated strings. */ sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count) { int elements = 0, slots = 5, start = 0, j; sds *tokens; if (seplen < 1 || len < 0) return NULL; tokens = malloc(sizeof(sds)*slots); if (tokens == NULL) return NULL; if (len == 0) { *count = 0; return tokens; } for (j = 0; j < (len-(seplen-1)); j++) { /* make sure there is room for the next element and the final one */ if (slots < elements+2) { sds *newtokens; slots *= 2; newtokens = realloc(tokens,sizeof(sds)*slots); if (newtokens == NULL) goto cleanup; tokens = newtokens; } /* search the separator */ if ((seplen == 1 && *(s+j) == sep[0]) || (memcmp(s+j,sep,seplen) == 0)) { tokens[elements] = sdsnewlen(s+start,j-start); if (tokens[elements] == NULL) goto cleanup; elements++; start = j+seplen; j = j+seplen-1; /* skip the separator */ } } /* Add the final element. We are sure there is room in the tokens array. */ tokens[elements] = sdsnewlen(s+start,len-start); if (tokens[elements] == NULL) goto cleanup; elements++; *count = elements; return tokens; cleanup: { int i; for (i = 0; i < elements; i++) sdsfree(tokens[i]); free(tokens); *count = 0; return NULL; } } /* Free the result returned by sdssplitlen(), or do nothing if 'tokens' is NULL. */ void sdsfreesplitres(sds *tokens, int count) { if (!tokens) return; while(count--) sdsfree(tokens[count]); free(tokens); } /* Create an sds string from a long long value. It is much faster than: * * sdscatprintf(sdsempty(),"%lld\n", value); */ sds sdsfromlonglong(long long value) { char buf[32], *p; unsigned long long v; v = (value < 0) ? -value : value; p = buf+31; /* point to the last character */ do { *p-- = '0'+(v%10); v /= 10; } while(v); if (value < 0) *p-- = '-'; p++; return sdsnewlen(p,32-(p-buf)); } /* Append to the sds string "s" an escaped string representation where * all the non-printable characters (tested with isprint()) are turned into * escapes in the form "\n\r\a...." or "\x". * * After the call, the modified sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. */ sds sdscatrepr(sds s, const char *p, size_t len) { s = sdscatlen(s,"\"",1); while(len--) { switch(*p) { case '\\': case '"': s = sdscatprintf(s,"\\%c",*p); break; case '\n': s = sdscatlen(s,"\\n",2); break; case '\r': s = sdscatlen(s,"\\r",2); break; case '\t': s = sdscatlen(s,"\\t",2); break; case '\a': s = sdscatlen(s,"\\a",2); break; case '\b': s = sdscatlen(s,"\\b",2); break; default: if (isprint(*p)) s = sdscatprintf(s,"%c",*p); else s = sdscatprintf(s,"\\x%02x",(unsigned char)*p); break; } p++; } return sdscatlen(s,"\"",1); } /* Helper function for sdssplitargs() that returns non zero if 'c' * is a valid hex digit. */ int is_hex_digit(char c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } /* Helper function for sdssplitargs() that converts a hex digit into an * integer from 0 to 15 */ int hex_digit_to_int(char c) { switch(c) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; case 'a': case 'A': return 10; case 'b': case 'B': return 11; case 'c': case 'C': return 12; case 'd': case 'D': return 13; case 'e': case 'E': return 14; case 'f': case 'F': return 15; default: return 0; } } /* Split a line into arguments, where every argument can be in the * following programming-language REPL-alike form: * * foo bar "newline are supported\n" and "\xff\x00otherstuff" * * The number of arguments is stored into *argc, and an array * of sds is returned. * * The caller should free the resulting array of sds strings with * sdsfreesplitres(). * * Note that sdscatrepr() is able to convert back a string into * a quoted string in the same format sdssplitargs() is able to parse. * * The function returns the allocated tokens on success, even when the * input string is empty, or NULL if the input contains unbalanced * quotes or closed quotes followed by non space characters * as in: "foo"bar or "foo' */ sds *sdssplitargs(const char *line, int *argc) { const char *p = line; char *current = NULL; char **vector = NULL; *argc = 0; while(1) { /* skip blanks */ while(*p && isspace(*p)) p++; if (*p) { /* get a token */ int inq=0; /* set to 1 if we are in "quotes" */ int insq=0; /* set to 1 if we are in 'single quotes' */ int done=0; if (current == NULL) current = sdsempty(); while(!done) { if (inq) { if (*p == '\\' && *(p+1) == 'x' && is_hex_digit(*(p+2)) && is_hex_digit(*(p+3))) { unsigned char byte; byte = (hex_digit_to_int(*(p+2))*16)+ hex_digit_to_int(*(p+3)); current = sdscatlen(current,(char*)&byte,1); p += 3; } else if (*p == '\\' && *(p+1)) { char c; p++; switch(*p) { case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'b': c = '\b'; break; case 'a': c = '\a'; break; default: c = *p; break; } current = sdscatlen(current,&c,1); } else if (*p == '"') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { current = sdscatlen(current,p,1); } } else if (insq) { if (*p == '\\' && *(p+1) == '\'') { p++; current = sdscatlen(current,"'",1); } else if (*p == '\'') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { current = sdscatlen(current,p,1); } } else { switch(*p) { case ' ': case '\n': case '\r': case '\t': case '\0': done=1; break; case '"': inq=1; break; case '\'': insq=1; break; default: current = sdscatlen(current,p,1); break; } } if (*p) p++; } /* add the token to the vector */ vector = realloc(vector,((*argc)+1)*sizeof(char*)); vector[*argc] = current; (*argc)++; current = NULL; } else { /* Even on empty input string return something not NULL. */ if (vector == NULL) vector = malloc(sizeof(void*)); return vector; } } err: while((*argc)--) sdsfree(vector[*argc]); free(vector); if (current) sdsfree(current); *argc = 0; return NULL; } /* Modify the string substituting all the occurrences of the set of * characters specified in the 'from' string to the corresponding character * in the 'to' array. * * For instance: sdsmapchars(mystring, "ho", "01", 2) * will have the effect of turning the string "hello" into "0ell1". * * The function returns the sds string pointer, that is always the same * as the input pointer since no resize is needed. */ sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen) { size_t j, i, l = sdslen(s); for (j = 0; j < l; j++) { for (i = 0; i < setlen; i++) { if (s[j] == from[i]) { s[j] = to[i]; break; } } } return s; } /* Join an array of C strings using the specified separator (also a C string). * Returns the result as an sds string. */ sds sdsjoin(char **argv, int argc, char *sep, size_t seplen) { sds join = sdsempty(); int j; for (j = 0; j < argc; j++) { join = sdscat(join, argv[j]); if (j != argc-1) join = sdscatlen(join,sep,seplen); } return join; } /* Like sdsjoin, but joins an array of SDS strings. */ sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen) { sds join = sdsempty(); int j; for (j = 0; j < argc; j++) { join = sdscatsds(join, argv[j]); if (j != argc-1) join = sdscatlen(join,sep,seplen); } return join; } #ifdef SDS_TEST_MAIN #include #include "testhelp.h" int main(void) { { struct sdshdr *sh; sds x = sdsnew("foo"), y; test_cond("Create a string and obtain the length", sdslen(x) == 3 && memcmp(x,"foo\0",4) == 0) sdsfree(x); x = sdsnewlen("foo",2); test_cond("Create a string with specified length", sdslen(x) == 2 && memcmp(x,"fo\0",3) == 0) x = sdscat(x,"bar"); test_cond("Strings concatenation", sdslen(x) == 5 && memcmp(x,"fobar\0",6) == 0); x = sdscpy(x,"a"); test_cond("sdscpy() against an originally longer string", sdslen(x) == 1 && memcmp(x,"a\0",2) == 0) x = sdscpy(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk"); test_cond("sdscpy() against an originally shorter string", sdslen(x) == 33 && memcmp(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\0",33) == 0) sdsfree(x); x = sdscatprintf(sdsempty(),"%d",123); test_cond("sdscatprintf() seems working in the base case", sdslen(x) == 3 && memcmp(x,"123\0",4) ==0) sdsfree(x); x = sdsnew("xxciaoyyy"); sdstrim(x,"xy"); test_cond("sdstrim() correctly trims characters", sdslen(x) == 4 && memcmp(x,"ciao\0",5) == 0) y = sdsdup(x); sdsrange(y,1,1); test_cond("sdsrange(...,1,1)", sdslen(y) == 1 && memcmp(y,"i\0",2) == 0) sdsfree(y); y = sdsdup(x); sdsrange(y,1,-1); test_cond("sdsrange(...,1,-1)", sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0) sdsfree(y); y = sdsdup(x); sdsrange(y,-2,-1); test_cond("sdsrange(...,-2,-1)", sdslen(y) == 2 && memcmp(y,"ao\0",3) == 0) sdsfree(y); y = sdsdup(x); sdsrange(y,2,1); test_cond("sdsrange(...,2,1)", sdslen(y) == 0 && memcmp(y,"\0",1) == 0) sdsfree(y); y = sdsdup(x); sdsrange(y,1,100); test_cond("sdsrange(...,1,100)", sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0) sdsfree(y); y = sdsdup(x); sdsrange(y,100,100); test_cond("sdsrange(...,100,100)", sdslen(y) == 0 && memcmp(y,"\0",1) == 0) sdsfree(y); sdsfree(x); x = sdsnew("foo"); y = sdsnew("foa"); test_cond("sdscmp(foo,foa)", sdscmp(x,y) > 0) sdsfree(y); sdsfree(x); x = sdsnew("bar"); y = sdsnew("bar"); test_cond("sdscmp(bar,bar)", sdscmp(x,y) == 0) sdsfree(y); sdsfree(x); x = sdsnew("aar"); y = sdsnew("bar"); test_cond("sdscmp(bar,bar)", sdscmp(x,y) < 0) sdsfree(y); sdsfree(x); x = sdsnewlen("\a\n\0foo\r",7); y = sdscatrepr(sdsempty(),x,sdslen(x)); test_cond("sdscatrepr(...data...)", memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0) { int oldfree; sdsfree(x); x = sdsnew("0"); sh = (void*) (x-(sizeof(struct sdshdr))); test_cond("sdsnew() free/len buffers", sh->len == 1 && sh->free == 0); x = sdsMakeRoomFor(x,1); sh = (void*) (x-(sizeof(struct sdshdr))); test_cond("sdsMakeRoomFor()", sh->len == 1 && sh->free > 0); oldfree = sh->free; x[1] = '1'; sdsIncrLen(x,1); test_cond("sdsIncrLen() -- content", x[0] == '0' && x[1] == '1'); test_cond("sdsIncrLen() -- len", sh->len == 2); test_cond("sdsIncrLen() -- free", sh->free == oldfree-1); } } test_report() return 0; } #endif ================================================ FILE: dep/himemcached-0.1.0/himcdep/sds.h ================================================ /* SDS (Simple Dynamic Strings), A C dynamic strings library. * * Copyright (c) 2006-2014, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef __SDS_H #define __SDS_H #define SDS_MAX_PREALLOC (1024*1024) #include #include #ifdef _MSC_VER #include "win32.h" #endif typedef char *sds; struct sdshdr { int len; int free; char buf[]; }; static inline size_t sdslen(const sds s) { struct sdshdr *sh = (struct sdshdr *)(s-sizeof *sh); return sh->len; } static inline size_t sdsavail(const sds s) { struct sdshdr *sh = (struct sdshdr *)(s-sizeof *sh); return sh->free; } sds sdsnewlen(const void *init, size_t initlen); sds sdsnew(const char *init); sds sdsempty(void); size_t sdslen(const sds s); sds sdsdup(const sds s); void sdsfree(sds s); size_t sdsavail(const sds s); sds sdsgrowzero(sds s, size_t len); sds sdscatlen(sds s, const void *t, size_t len); sds sdscat(sds s, const char *t); sds sdscatsds(sds s, const sds t); sds sdscpylen(sds s, const char *t, size_t len); sds sdscpy(sds s, const char *t); sds sdscatvprintf(sds s, const char *fmt, va_list ap); #ifdef __GNUC__ sds sdscatprintf(sds s, const char *fmt, ...) __attribute__((format(printf, 2, 3))); #else sds sdscatprintf(sds s, const char *fmt, ...); #endif sds sdscatfmt(sds s, char const *fmt, ...); void sdstrim(sds s, const char *cset); void sdsrange(sds s, int start, int end); void sdsupdatelen(sds s); void sdsclear(sds s); int sdscmp(const sds s1, const sds s2); sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count); void sdsfreesplitres(sds *tokens, int count); void sdstolower(sds s); void sdstoupper(sds s); sds sdsfromlonglong(long long value); sds sdscatrepr(sds s, const char *p, size_t len); sds *sdssplitargs(const char *line, int *argc); sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen); sds sdsjoin(char **argv, int argc, char *sep, size_t seplen); sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen); /* Low level functions exposed to the user API */ sds sdsMakeRoomFor(sds s, size_t addlen); void sdsIncrLen(sds s, int incr); sds sdsRemoveFreeSpace(sds s); size_t sdsAllocSize(sds s); #endif ================================================ FILE: dep/himemcached-0.1.0/himcread.c ================================================ #include #include #ifndef _MSC_VER #include #endif #include #include #include #include "himcread.h" #include "himcdep/sds.h" #define PARSE_OK 0 /* Parsing ok */ #define PARSE_ERROR 1 /* Parsing error */ #define PARSE_AGAIN 3 /* Incomplete -> parse again */ #define RSP_TYPE_UNKNOWN 0 #define RSP_TYPE_NUM 1 #define RSP_TYPE_STORED 2 #define RSP_TYPE_NOT_STORED 3 #define RSP_TYPE_EXISTS 4 #define RSP_TYPE_NOT_FOUND 5 #define RSP_TYPE_END 6 #define RSP_TYPE_VALUE 7 #define RSP_TYPE_DELETED 8 #define RSP_TYPE_ERROR 9 #define RSP_TYPE_CLIENT_ERROR 10 #define RSP_TYPE_SERVER_ERROR 11 static void memcachedReaderReset(mcReader *r); static void __memcachedReaderSetError(mcReader *r, int type, const char *str) { size_t len; memcachedReaderReset(r); /* Clear input buffer on errors. */ if (r->buf != NULL) { sdsfree(r->buf); r->buf = NULL; r->pos = r->len = 0; } /* Set error. */ r->err = type; len = strlen(str); len = len < (sizeof(r->errstr)-1) ? len : (sizeof(r->errstr)-1); memcpy(r->errstr,str,len); r->errstr[len] = '\0'; } static size_t chrtos(char *buf, size_t size, char byte) { size_t len = 0; switch(byte) { case '\\': case '"': len = snprintf(buf,size,"\"\\%c\"",byte); break; case '\n': len = snprintf(buf,size,"\"\\n\""); break; case '\r': len = snprintf(buf,size,"\"\\r\""); break; case '\t': len = snprintf(buf,size,"\"\\t\""); break; case '\a': len = snprintf(buf,size,"\"\\a\""); break; case '\b': len = snprintf(buf,size,"\"\\b\""); break; default: if (isprint(byte)) len = snprintf(buf,size,"\"%c\"",byte); else len = snprintf(buf,size,"\"\\x%02x\"",(unsigned char)byte); break; } return len; } static void __memcachedReaderSetErrorProtocolByte(mcReader *r, char byte) { char cbuf[8], sbuf[128]; chrtos(cbuf,sizeof(cbuf),byte); snprintf(sbuf,sizeof(sbuf), "Protocol error, got %s as reply type byte", cbuf); __memcachedReaderSetError(r,MC_ERR_PROTOCOL,sbuf); } static void __memcachedReaderSetErrorOOM(mcReader *r) { __memcachedReaderSetError(r,MC_ERR_OOM,"Out of memory"); } static int elementArrayCreate(mcReader *r) { assert(r->alloc_len == 0); assert(r->element == NULL); assert(r->elements == 0); r->element = malloc(10*sizeof(void*)); if (r->element == NULL) { __memcachedReaderSetErrorOOM(r); return MC_ERR; } r->alloc_len = 10; r->elements = 0; return MC_OK; } static void elementArrayDestroy(mcReader *r) { unsigned int i; if (r->element == NULL) return; if (r->fn && r->fn->freeObject) { for (i = 0; i < r->elements; i ++) { if (r->element[i]) r->fn->freeObject(r->element[i]); } } free(r->element); r->element = NULL; r->elements = 0; r->alloc_len = 0; return MC_OK; } #define EXPAND_MAX_SIZE_PER_TIME 300 static int elementArrayExpand(mcReader *r) { size_t new_length; if (r->alloc_len <= 150) { new_length = r->alloc_len*2; } else if (r->alloc_len <= 500) { new_length = r->alloc_len+EXPAND_MAX_SIZE_PER_TIME; } r->element = realloc(r->element,new_length*sizeof(void*)); if (r->element == NULL) { __memcachedReaderSetErrorOOM(r); return MC_ERR; } r->alloc_len = new_length; return MC_OK; } static int elementArrayAdd(mcReader *r, void *reply) { assert(r->elements <= r->alloc_len); if (r->elements == r->alloc_len) { if (elementArrayExpand(r) != MC_OK) return MC_ERR; } r->element[r->elements++] = reply; return MC_OK; } static void memcachedParseResponse(mcReader *r) { void *obj; char *p, *m; char ch; enum { SW_START, SW_RSP_NUM, SW_RSP_STR, SW_SPACES_BEFORE_KEY, SW_KEY, SW_SPACES_BEFORE_FLAGS, /* 5 */ SW_FLAGS, SW_SPACES_BEFORE_VLEN, SW_VLEN, SW_RUNTO_VAL, SW_VAL, /* 10 */ SW_VAL_LF, SW_END, SW_RUNTO_CRLF, SW_CRLF, SW_ALMOST_DONE, /* 15 */ SW_SENTINEL } state; state = r->state; assert(state >= SW_START && state < SW_SENTINEL); /* validate the parsing marker */ assert(r->buf != NULL); assert(r->pos < r->len); for (p = r->buf+r->pos; p <= r->buf+r->len; p++) { ch = *p; switch (state) { case SW_START: if (isdigit(ch)) { state = SW_RSP_NUM; } else { state = SW_RSP_STR; } p = p - 1; /* go back by 1 byte */ break; case SW_RSP_NUM: if (r->token == NULL) { /* rsp_start <- p; type_start <- p */ r->token = p; } if (isdigit(ch)) { /* num <- num * 10 + (ch - '0') */ r->integer = r->integer*10 + (long long)(ch-'0'); } else if (ch == ' ' || ch == '\r') { /* type_end <- p - 1 */ r->token = NULL; r->integer = 0; r->type = RSP_TYPE_NUM; p = p - 1; /* go back by 1 byte */ state = SW_CRLF; } else { goto error; } break; case SW_RSP_STR: if (r->token == NULL) { /* rsp_start <- p; type_start <- p */ r->token = p; } if (ch == ' ' || ch == '\r') { /* type_end <- p - 1 */ m = r->token; /* r->token = NULL; */ r->type = RSP_TYPE_UNKNOWN; assert(r->str == NULL && r->strlen == 0); switch (p - m) { case 3: if (!strncmp(m,"END\r",4)) { r->type = RSP_TYPE_END; /* end_start <- m; end_end <- p - 1 */ } break; case 5: if (!strncmp(m,"VALUE",5)) { /* * Encompasses responses for 'get', 'gets' and * 'cas' command. */ r->type = RSP_TYPE_VALUE; break; } if (!strncmp(m,"ERROR",5)) { r->type = RSP_TYPE_ERROR; break; } break; case 6: if (!strncmp(m,"STORED",6)) { r->type = RSP_TYPE_STORED; r->str = m; r->strlen = 6; break; } if (!strncmp(m,"EXISTS",6)) { r->type = RSP_TYPE_EXISTS; r->str = m; r->strlen = 6; break; } break; case 7: if (!strncmp(m,"DELETED",7)) { r->type = RSP_TYPE_DELETED; r->str = m; r->strlen = 7; break; } break; case 9: if (!strncmp(m,"NOT_FOUND",9)) { r->type = RSP_TYPE_NOT_FOUND; r->str = m; r->strlen = 9; break; } break; case 10: if (!strncmp(m,"NOT_STORED",10)) { r->type = RSP_TYPE_NOT_STORED; r->str = m; r->strlen = 10; break; } break; case 12: if (!strncmp(m,"CLIENT_ERROR",12)) { r->type = RSP_TYPE_CLIENT_ERROR; break; } if (!strncmp(m,"SERVER_ERROR",12)) { r->type = RSP_TYPE_SERVER_ERROR; break; } break; } switch (r->type) { case RSP_TYPE_UNKNOWN: goto error; case RSP_TYPE_STORED: case RSP_TYPE_NOT_STORED: case RSP_TYPE_EXISTS: case RSP_TYPE_NOT_FOUND: case RSP_TYPE_DELETED: state = SW_CRLF; break; case RSP_TYPE_END: state = SW_CRLF; break; case RSP_TYPE_VALUE: state = SW_SPACES_BEFORE_KEY; break; case RSP_TYPE_ERROR: state = SW_CRLF; break; case RSP_TYPE_CLIENT_ERROR: case RSP_TYPE_SERVER_ERROR: r->token = NULL; state = SW_RUNTO_CRLF; break; default: NOT_REACHED(); } p = p - 1; /* go back by 1 byte */ } break; case SW_SPACES_BEFORE_KEY: if (ch != ' ') { state = SW_KEY; p = p - 1; /* go back by 1 byte */ r->token = NULL; } break; case SW_KEY: if (r->token == NULL) { r->token = p; } if (ch == ' ') { assert(r->str == NULL && r->strlen == 0); m = r->token; r->token = NULL; state = SW_SPACES_BEFORE_FLAGS; r->strlen = p-m; r->str = m; } break; case SW_SPACES_BEFORE_FLAGS: if (ch != ' ') { if (!isdigit(ch)) { goto error; } state = SW_FLAGS; p = p - 1; /* go back by 1 byte */ r->kflags = 0; } break; case SW_FLAGS: if (isdigit(ch)) { /* flags <- flags * 10 + (ch - '0') */ r->kflags = r->kflags*10 + (int)(ch-'0'); } else if (ch == ' ') { /* flags_end <- p - 1 */ /* r->token = NULL; */ state = SW_SPACES_BEFORE_VLEN; } else { goto error; } break; case SW_SPACES_BEFORE_VLEN: if (ch != ' ') { if (!isdigit(ch)) { goto error; } p = p - 1; /* go back by 1 byte */ state = SW_VLEN; r->integer = 0; } break; case SW_VLEN: if (isdigit(ch)) { r->integer = r->integer*10 + (long long)(ch-'0'); } else if (ch == ' ' || ch == '\r') { /* vlen_end <- p - 1 */ p = p - 1; /* go back by 1 byte */ /* r->token = NULL; */ state = SW_RUNTO_CRLF; } else { goto error; } break; case SW_RUNTO_VAL: switch (ch) { case '\n': /* val_start <- p + 1 */ state = SW_VAL; r->token = NULL; break; default: goto error; } break; case SW_VAL: if (r->token == NULL) { /* flags_start <- p */ r->token = p; } m = r->token + r->integer; if (m > r->buf+r->len) { p = r->buf + r->len; break; } switch (*m) { case '\r': /* val_end <- p - 1 */ p = m; /* move forward by vlen bytes */ state = SW_VAL_LF; break; default: goto error; } break; case SW_VAL_LF: switch (ch) { case '\n': /* state = SW_END; */ if (r->fn && r->fn->createString) obj = r->fn->createString(MC_REPLY_STRING,r->str,r->strlen, r->token,r->integer,r->kflags,r->kversion); else obj = (void*)MC_REPLY_STRING; if (r->element) { assert(r->subreply == NULL); elementArrayAdd(r,r->subreply); } else if (r->subreply) { elementArrayCreate(r); elementArrayAdd(r,r->subreply); r->subreply = NULL; elementArrayAdd(r,obj); } else { r->subreply = obj; } r->token = NULL; r->str = NULL; r->strlen = 0; r->kflags = 0; r->kversion = -1; state = SW_RSP_STR; break; default: goto error; } break; case SW_END: if (r->token == NULL) { if (ch != 'E') { goto error; } /* end_start <- p */ r->token = p; } else if (ch == '\r') { /* end_end <- p */ m = r->token; r->token = NULL; switch (p - m) { case 3: if (!strncmp(m,"END\r",4)) { state = SW_ALMOST_DONE; } break; default: goto error; } } break; case SW_RUNTO_CRLF: switch (ch) { case '\r': if (r->type == RSP_TYPE_VALUE) { state = SW_RUNTO_VAL; } else { if (r->type == RSP_TYPE_CLIENT_ERROR || r->type == RSP_TYPE_SERVER_ERROR) { m = r->token; r->token = NULL; r->strlen = p-m; r->str = m; } state = SW_ALMOST_DONE; } break; default: break; } break; case SW_CRLF: switch (ch) { case ' ': break; case '\r': state = SW_ALMOST_DONE; break; default: goto error; } break; case SW_ALMOST_DONE: switch (ch) { case '\n': /* rsp_end <- p */ goto done; default: goto error; } break; case SW_SENTINEL: default: NOT_REACHED(); break; } } assert(p == r->buf+r->len); r->pos = r->len; r->state = state; r->result = PARSE_AGAIN; return; done: r->pos = p-r->buf+1; assert(r->pos <= r->len); r->state = SW_START; r->token = NULL; r->result = PARSE_OK; return; error: r->result = PARSE_ERROR; r->state = state; errno = EINVAL; } mcReader *memcachedReaderCreateWithFunctions(mcReplyObjectFunctions *fn) { mcReader *r; r = calloc(sizeof(mcReader),1); if (r == NULL) return NULL; r->err = 0; r->errstr[0] = '\0'; r->buf = sdsempty(); r->maxbuf = MC_READER_MAX_BUF; if (r->buf == NULL) { free(r); return NULL; } r->subreply = NULL; r->alloc_len = 0; r->elements = 0; r->element = NULL; r->state = 0; r->token = NULL; r->str = NULL; r->strlen = 0; r->kflags = 0; r->kversion = -1; r->integer = 0; r->type = RSP_TYPE_UNKNOWN; r->result = PARSE_OK; r->fn = fn; return r; } void memcachedReaderFree(mcReader *r) { memcachedReaderReset(r); if (r->buf != NULL) sdsfree(r->buf); free(r); } int memcachedReaderFeed(mcReader *r, const char *buf, size_t len) { sds newbuf; /* Return early when this reader is in an erroneous state. */ if (r->err) return MC_ERR; /* Copy the provided buffer. */ if (buf != NULL && len >= 1) { /* Destroy internal buffer when it is empty and is quite large. */ if (r->len == 0 && r->maxbuf != 0 && sdsavail(r->buf) > r->maxbuf) { sdsfree(r->buf); r->buf = sdsempty(); r->pos = 0; /* r->buf should not be NULL since we just free'd a larger one. */ assert(r->buf != NULL); } newbuf = sdscatlen(r->buf,buf,len); if (newbuf == NULL) { __memcachedReaderSetErrorOOM(r); return MC_ERR; } r->buf = newbuf; r->len = sdslen(r->buf); } return MC_OK; } static void memcachedReaderReset(mcReader *r) { r->str = NULL; r->strlen = 0; r->kflags = 0; r->kversion = -1; r->state = 0; r->token = 0; r->integer = 0; r->type = RSP_TYPE_UNKNOWN; r->result = PARSE_OK; if (r->subreply != NULL) { if (r->fn && r->fn->freeObject) r->fn->freeObject(r->subreply); r->subreply = NULL; } elementArrayDestroy(r); r->err = 0; r->errstr[0] = '\0'; } static void *getReplyFromReader(mcReader *r) { void *reply; switch (r->type) { case RSP_TYPE_VALUE: if (r->element) { assert(r->subreply == NULL); if (r->fn && r->fn->createArray) { reply = r->fn->createArray(r->elements,r->element); r->element = NULL; r->elements = 0; r->alloc_len = 0; } else { reply = (void*)MC_REPLY_ARRAY; } } else if (r->subreply) { reply = r->subreply; } break; case RSP_TYPE_NUM: if (r->fn && r->fn->createInteger) reply = r->fn->createInteger(r->integer); else reply = (void*)MC_REPLY_INTEGER; break; case RSP_TYPE_END: if (r->fn && r->fn->createNil) reply = r->fn->createNil(); else reply = (void*)MC_REPLY_NIL; break; case RSP_TYPE_CLIENT_ERROR: case RSP_TYPE_SERVER_ERROR: if (r->fn && r->fn->createString) reply = r->fn->createString(MC_REPLY_ERROR, NULL,0,r->str,r->strlen,0,0); else reply = (void*)MC_REPLY_ERROR; break; case RSP_TYPE_ERROR: if (r->fn && r->fn->createString) reply = r->fn->createString(MC_REPLY_ERROR, NULL,0,"",0,0,0); else reply = (void*)MC_REPLY_ERROR; break; case RSP_TYPE_STORED: case RSP_TYPE_NOT_STORED: case RSP_TYPE_EXISTS: case RSP_TYPE_NOT_FOUND: case RSP_TYPE_DELETED: if (r->fn && r->fn->createString) reply = r->fn->createString(MC_REPLY_STATUS, NULL,0,r->str,r->strlen,0,0); else reply = (void*)MC_REPLY_STATUS; break; default: reply = NULL; break; } return reply; } int memcachedReaderGetReply(mcReader *r, void **reply) { /* Default target pointer to NULL. */ if (reply != NULL) *reply = NULL; /* Return early when this reader is in an erroneous state. */ if (r->err) return MC_ERR; /* When the buffer is empty, there will never be a reply. */ if (r->len == 0) return MC_OK; memcachedParseResponse(r); /* Return ASAP when an error occurred. */ if (r->err) return MC_ERR; /* printf("!######### r->result: %d, r->state: %d, r->type: %d, r->pos: %d, r->len: %d," "r->kflags: %d, r->kversion : %lld, r->strlen: %zu, r->integer: %lld r->buf: %s, r->str: %s\n", r->result, r->state, r->type, r->pos, r->len, r->kflags, r->kversion, r->strlen, r->integer, r->buf, r->str); */ /* Emit a reply when there is one. */ if (r->result == PARSE_OK) { if (reply != NULL) { *reply = getReplyFromReader(r); } memcachedReaderReset(r); } /* Discard part of the buffer when we've consumed at least 1k, to avoid * doing unnecessary calls to memmove() in sds.c. */ if (r->pos >= 1024 && r->token == NULL && r->str == NULL) { sdsrange(r->buf,r->pos,-1); r->pos = 0; r->len = sdslen(r->buf); } return MC_OK; } ================================================ FILE: dep/himemcached-0.1.0/himcread.h ================================================ #ifndef _HIMC_READ_H_ #define _HIMC_READ_H_ #include /* for size_t */ #include #define MC_ERR -1 #define MC_OK 0 /* When an error occurs, the err flag in a context is set to hold the type of * error that occured. REDIS_ERR_IO means there was an I/O error and you * should use the "errno" variable to find out what is wrong. * For other values, the "errstr" field will hold a description. */ #define MC_ERR_IO 1 /* Error in read or write */ #define MC_ERR_EOF 3 /* End of file */ #define MC_ERR_PROTOCOL 4 /* Protocol error */ #define MC_ERR_OOM 5 /* Out of memory */ #define MC_ERR_OTHER 2 /* Everything else... */ #define MC_REPLY_STRING 1 #define MC_REPLY_ARRAY 2 #define MC_REPLY_INTEGER 3 #define MC_REPLY_NIL 4 #define MC_REPLY_STATUS 5 #define MC_REPLY_ERROR 6 #define MC_READER_MAX_BUF (1024*16) /* Default max unused reader buffer. */ #ifdef __cplusplus extern "C" { #endif typedef struct mcReplyObjectFunctions { void *(*createString)(int, char*, size_t, char*, size_t, int, long long); void *(*createArray)(size_t, void **); void *(*createInteger)(long long); void *(*createNil)(void); void (*freeObject)(void*); } mcReplyObjectFunctions; typedef struct mcReader { int err; /* Error flags, 0 when there is no error */ char errstr[128]; /* String representation of error when applicable */ char *buf; /* Read buffer */ size_t pos; /* Buffer cursor */ size_t len; /* Buffer length */ size_t maxbuf; /* Max length of unused buffer */ void *subreply; /* Temporary reply for array type */ size_t alloc_len; /* Temporary reply array alloc length */ size_t elements; /* Temporary reply array length */ void **element; /* Temporary reply array */ char *str; size_t strlen; int kflags; /* Used for key flags (get/gets command reply) */ long long kversion; /* Used for key version (gets command reply) */ int state; /* Current parser state */ char *token; /* Token marker */ long long integer; /* Cache the integer if need */ int type; /* Response type */ int result; /* Parsing result */ mcReplyObjectFunctions *fn; void *privdata; } mcReader; /* Public API for the protocol parser. */ mcReader *memcachedReaderCreateWithFunctions(mcReplyObjectFunctions *fn); void memcachedReaderFree(mcReader *r); int memcachedReaderFeed(mcReader *r, const char *buf, size_t len); int memcachedReaderGetReply(mcReader *r, void **reply); #ifdef __cplusplus } #endif #endif ================================================ FILE: dep/himemcached-0.1.0/himemcached.c ================================================ #include #include #include #include "himemcached.h" #define REQ_TYPE_UNKNOWN 0 #define REQ_TYPE_STORAGE 1 #define REQ_TYPE_CAS 2 #define REQ_TYPE_RETRIEVAL 3 #define REQ_TYPE_ARITHMETIC 4 #define REQ_TYPE_DELETE 5 static mcReply *createReplyObject(int type); static void *createStringObject(int type, char *key, size_t keylen, char *str, size_t len, int flags, long long version); static void *createArrayObject(size_t elements, void **element); static void *createIntegerObject(long long value); static void *createNilObject(void); /* Default set of functions to build the reply. Keep in mind that such a * function returning NULL is interpreted as OOM. */ static mcReplyObjectFunctions defaultFunctions = { createStringObject, createArrayObject, createIntegerObject, createNilObject, freeMcReplyObject }; /* Create a reply object */ static mcReply *createReplyObject(int type) { mcReply *r = calloc(1,sizeof(*r)); if (r == NULL) return NULL; r->type = type; return r; } /* Free a reply object */ void freeMcReplyObject(void *reply) { mcReply *r = reply; size_t j; if (r == NULL) return; switch(r->type) { case MC_REPLY_INTEGER: case MC_REPLY_NIL: break; /* Nothing to free */ case MC_REPLY_ARRAY: if (r->element != NULL) { for (j = 0; j < r->elements; j++) if (r->element[j] != NULL) freeMcReplyObject(r->element[j]); free(r->element); } break; case MC_REPLY_ERROR: case MC_REPLY_STATUS: case MC_REPLY_STRING: if (r->key != NULL) free(r->key); if (r->str != NULL) free(r->str); break; default: assert(0); break; } free(r); } static void *createStringObject(int type, char *key, size_t keylen, char *str, size_t len, int flags, long long version) { mcReply *r, *parent; char *buf; assert(type == MC_REPLY_ERROR || type == MC_REPLY_STATUS || type == MC_REPLY_STRING); r = createReplyObject(type); if (r == NULL) return NULL; if (key != NULL) { r->key = malloc(keylen+1); if (r->key == NULL) { freeMcReplyObject(r); return NULL; } if (keylen > 0) /* Copy string value */ memcpy(r->key,key,keylen); r->key[keylen] = '\0'; r->keylen = keylen; } buf = malloc(len+1); if (buf == NULL) { freeMcReplyObject(r); return NULL; } if (len > 0) /* Copy string value */ memcpy(buf,str,len); buf[len] = '\0'; r->str = buf; r->len = len; r->flags = flags; r->version = version; return r; } static void *createArrayObject(size_t elements, void **element) { mcReply *r; r = createReplyObject(MC_REPLY_ARRAY); if (r == NULL) return NULL; r->elements = elements; r->element = (mcReply **)element; return r; } static void *createIntegerObject(long long value) { mcReply *r; r = createReplyObject(MC_REPLY_INTEGER); if (r == NULL) return NULL; r->integer = value; return r; } static void *createNilObject(void) { mcReply *r; r = createReplyObject(MC_REPLY_NIL); if (r == NULL) return NULL; return r; } void __memcachedSetError(mcContext *c, int type, const char *str) { size_t len; c->err = type; if (str != NULL) { len = strlen(str); len = len < (sizeof(c->errstr)-1) ? len : (sizeof(c->errstr)-1); memcpy(c->errstr,str,len); c->errstr[len] = '\0'; } else { /* Only REDIS_ERR_IO may lack a description! */ assert(type == MC_ERR_IO); //__redis_strerror_r(errno, c->errstr, sizeof(c->errstr)); } } /* Write the output buffer to the socket. * * Returns MC_OK when the buffer is empty, or (a part of) the buffer was * succesfully written to the socket. When the buffer is empty after the * write operation, "done" is set to 1 (if given). * * Returns MC_ERR if an error occured trying to write and sets * c->errstr to hold the appropriate error string. */ int memcachedBufferWrite(mcContext *c, int *done) { int nwritten; /* Return early when the context has seen an error. */ if (c->err) return MC_ERR; if (sdslen(c->obuf) > 0) { nwritten = write(c->fd,c->obuf,sdslen(c->obuf)); if (nwritten == -1) { if ((errno == EAGAIN && !(c->flags & MC_BLOCK)) || (errno == EINTR)) { /* Try again later */ } else { __memcachedSetError(c,MC_ERR_IO,NULL); return MC_ERR; } } else if (nwritten > 0) { if (nwritten == (signed)sdslen(c->obuf)) { sdsfree(c->obuf); c->obuf = sdsempty(); } else { sdsrange(c->obuf,nwritten,-1); } } } if (done != NULL) *done = (sdslen(c->obuf) == 0); return MC_OK; } /* Internal helper function to try and get a reply from the reader, * or set an error in the context otherwise. */ int memcachedGetReplyFromReader(mcContext *c, void **reply) { if (memcachedReaderGetReply(c->reader,reply) == MC_ERR) { __memcachedSetError(c,c->reader->err,c->reader->errstr); return MC_ERR; } return MC_OK; } int memcachedGetReply(mcContext *c, void **reply) { int wdone = 0; void *aux = NULL; /* Try to read pending replies */ if (memcachedGetReplyFromReader(c,&aux) == MC_ERR) return MC_ERR; /* For the blocking context, flush output buffer and read reply */ if (aux == NULL && c->flags & MC_BLOCK) { /* Write until done */ do { if (memcachedBufferWrite(c,&wdone) == MC_ERR) return MC_ERR; } while (!wdone); /* Read until there is a reply */ do { if (memcachedBufferRead(c) == MC_ERR) return MC_ERR; if (memcachedGetReplyFromReader(c,&aux) == MC_ERR) return MC_ERR; } while (aux == NULL); } /* Set reply object */ if (reply != NULL) *reply = aux; return MC_OK; } mcReader *memcachedReaderCreate(void) { return memcachedReaderCreateWithFunctions(&defaultFunctions); } mcContext *memcachedContextInit(void) { mcContext *c; c = calloc(1,sizeof(mcContext)); if (c == NULL) return NULL; c->err = 0; c->errstr[0] = '\0'; c->obuf = sdsempty(); c->reader = memcachedReaderCreate(); c->tcp.host = NULL; c->tcp.source_addr = NULL; c->unix_sock.path = NULL; c->timeout = NULL; if (c->obuf == NULL || c->reader == NULL) { memcachedFree(c); return NULL; } return c; } void memcachedFree(mcContext *c) { if (c == NULL) return; if (c->fd > 0) close(c->fd); if (c->obuf != NULL) sdsfree(c->obuf); if (c->reader != NULL) memcachedReaderFree(c->reader); if (c->tcp.host) free(c->tcp.host); if (c->tcp.source_addr) free(c->tcp.source_addr); if (c->unix_sock.path) free(c->unix_sock.path); if (c->timeout) free(c->timeout); free(c); } /* Use this function to handle a read event on the descriptor. It will try * and read some bytes from the socket and feed them to the reply parser. * * After this function is called, you may use memcachedContextReadReply to * see if there is a reply available. */ int memcachedBufferRead(mcContext *c) { char buf[1024*16]; int nread; /* Return early when the context has seen an error. */ if (c->err) return MC_ERR; nread = read(c->fd,buf,sizeof(buf)); if (nread == -1) { if ((errno == EAGAIN && !(c->flags & MC_BLOCK)) || (errno == EINTR)) { /* Try again later */ } else { __memcachedSetError(c,MC_ERR_IO,NULL); return MC_ERR; } } else if (nread == 0) { __memcachedSetError(c,MC_ERR_EOF,"Server closed the connection"); return MC_ERR; } else { if (memcachedReaderFeed(c->reader,buf,nread) != MC_OK) { __memcachedSetError(c,c->reader->err,c->reader->errstr); return MC_ERR; } } return MC_OK; } static int getRequestTypeFromString(char *str, size_t len) { if (str == NULL || len == 0) return -1; if (len == 3) { if (!strncasecmp(str,"set",3) || !strncasecmp(str,"add",3)) { return REQ_TYPE_STORAGE; } else if (!strncasecmp(str,"cas",3)) { return REQ_TYPE_CAS; } else if (!strncasecmp(str,"get",3)) { return REQ_TYPE_RETRIEVAL; } else { return -1; } } else if (len == 4) { if (!strncasecmp(str,"gets",4)) { return REQ_TYPE_RETRIEVAL; } else if (!strncasecmp(str,"incr",4) || !strncasecmp(str,"decr",4)) { return REQ_TYPE_ARITHMETIC; } else { return -1; } } else if (len == 6) { if (!strncasecmp(str,"append",6)) { return REQ_TYPE_STORAGE; } else if (!strncasecmp(str,"delete",6)) { return REQ_TYPE_DELETE; } else { return -1; } } else if (len == 7) { if (!strncasecmp(str,"replace",7) || !strncasecmp(str,"prepend",7)) { return REQ_TYPE_STORAGE; } else { return -1; } } return -1; } #define ARGUMENTLEN(_argtype,_argv,_argvlen,_idx) \ (_argtype==0?sdslen(_argv[_idx]):(_argvlen==NULL?strlen(_argv[_idx]):_argvlen[_idx])) /* argtype==0 : argv is sds array * argtype==1 : argv is c-string array and an array with their lengths. * If the length array is set to NULL, strlen will be used to compute the * argument lengths. */ static int checkCmdValidAndGetTotalLen(int cmdtype, int argtype, int argc, char **argv, size_t *argvlen) { size_t len; int totlen, j; switch (cmdtype) { case REQ_TYPE_STORAGE: if (argc != 6 && argc != 7) { return -1; } if (argc == 7 && (ARGUMENTLEN(argtype,argv,argvlen,5) != 7 || strncasecmp(argv[5],"noreply",7))) { return -1; } totlen = 0; for (j = 0; j < argc-1; j ++) { totlen += ARGUMENTLEN(argtype,argv,argvlen,j) + 1; } totlen += 2 + ARGUMENTLEN(argtype,argv,argvlen,argc-1) + 2; break; case REQ_TYPE_CAS: if (argc != 7 && argc != 8) { return -1; } if (argc == 8 && (ARGUMENTLEN(argtype,argv,argvlen,6) != 7 || strncasecmp(argv[6],"noreply",7))) { return -1; } totlen = 0; for (j = 0; j < argc-1; j ++) { totlen += ARGUMENTLEN(argtype,argv,argvlen,j) + 1; } totlen += 2 + ARGUMENTLEN(argtype,argv,argvlen,argc-1) + 2; break; case REQ_TYPE_ARITHMETIC: if (argc != 3) { return -1; } totlen = ARGUMENTLEN(argtype,argv,argvlen,0) + 1 + ARGUMENTLEN(argtype,argv,argvlen,1) + 1 + ARGUMENTLEN(argtype,argv,argvlen,2) + 2; break; case REQ_TYPE_RETRIEVAL: if (argc <= 1) { return -1; } totlen = 0; for (j = 0; j < argc-1; j ++) { totlen += ARGUMENTLEN(argtype,argv,argvlen,j) + 1; } totlen += ARGUMENTLEN(argtype,argv,argvlen,argc-1) + 2; break; case REQ_TYPE_DELETE: if (argc != 2 && argc != 3) { return -1; } totlen = ARGUMENTLEN(argtype,argv,argvlen,0) + 1 + ARGUMENTLEN(argtype,argv,argvlen,1); if (argc == 3) { if (strncasecmp(argv[2],"noreply",7)) { return -1; } totlen += 1 + ARGUMENTLEN(argtype,argv,argvlen,2); } totlen += 2; break; default: totlen = -1; break; } return totlen; } /* Like the checkCmdValidAndGetTotalLen() function */ static int genericMemcachedCommand(int cmdtype, char *cmd, int argtype, int argc, char **argv, size_t *argvlen) { int j; size_t len; int pos = 0; /* position in final command */ switch (cmdtype) { case REQ_TYPE_STORAGE: case REQ_TYPE_CAS: for (j = 0; j < argc-1; j ++) { len = ARGUMENTLEN(argtype,argv,argvlen,j); memcpy(cmd+pos,argv[j],len); pos += (int)len; cmd[pos++] = ' '; } cmd[pos++] = '\r'; cmd[pos++] = '\n'; len = ARGUMENTLEN(argtype,argv,argvlen,argc-1); memcpy(cmd+pos,argv[argc-1],len); pos += (int)len; cmd[pos++] = '\r'; cmd[pos++] = '\n'; break; case REQ_TYPE_ARITHMETIC: case REQ_TYPE_RETRIEVAL: case REQ_TYPE_DELETE: for (j = 0; j < argc-1; j ++) { len = ARGUMENTLEN(argtype,argv,argvlen,j); memcpy(cmd+pos,argv[j],len); pos += len; cmd[pos++] = ' '; } len = ARGUMENTLEN(argtype,argv,argvlen,argc-1); memcpy(cmd+pos,argv[argc-1],len); pos += (int)len; cmd[pos++] = '\r'; cmd[pos++] = '\n'; break; default: pos = -1; break; } return pos; } /* Format a command according to the Memcached protocol. This function * takes the number of arguments, an array with arguments and an sds array. */ int memcachedFormatCommandSdsArgv(char **target, int argc, const sds *argv) { char *cmd = NULL; /* final command */ int pos; /* position in final command */ int totlen; int type; /* Abort on a NULL target */ if (target == NULL || argc < 1) return -1; type = getRequestTypeFromString(argv[0], sdslen(argv[0])); if (type < 0) goto format_err; totlen = checkCmdValidAndGetTotalLen(type, 0, argc, argv, NULL); if (totlen < 0) { goto format_err; } /* Build the command at protocol level */ cmd = malloc(totlen+1); if (cmd == NULL) goto memory_err; pos = genericMemcachedCommand(type, cmd, 0, argc, argv, NULL); if (pos < 0) goto format_err; assert(pos == totlen); cmd[pos] = '\0'; *target = cmd; return totlen; format_err: if (cmd) free(cmd); return -2; memory_err: return -1; } int memcachedvFormatCommand(char **target, const char *format, va_list ap) { const char *c = format; char *cmd = NULL; /* final command */ int pos; /* position in final command */ sds curarg, newarg; /* current argument */ int touched = 0; /* was the current argument touched? */ char **curargv = NULL, **newargv = NULL; int argc = 0; int totlen; int error_type = 0; /* 0 = no error; -1 = memory error; -2 = format error */ int j; /* Abort if there is not target to set */ if (target == NULL) return -1; /* Build the command string accordingly to protocol */ curarg = sdsempty(); if (curarg == NULL) return -1; while(*c != '\0') { if (*c != '%' || c[1] == '\0') { if (*c == ' ') { if (touched) { newargv = realloc(curargv,sizeof(char*)*(argc+1)); if (newargv == NULL) goto memory_err; curargv = newargv; curargv[argc++] = curarg; /* curarg is put in argv so it can be overwritten. */ curarg = sdsempty(); if (curarg == NULL) goto memory_err; touched = 0; } } else { newarg = sdscatlen(curarg,c,1); if (newarg == NULL) goto memory_err; curarg = newarg; touched = 1; } } else { char *arg; size_t size; /* Set newarg so it can be checked even if it is not touched. */ newarg = curarg; switch(c[1]) { case 's': arg = va_arg(ap,char*); size = strlen(arg); if (size > 0) newarg = sdscatlen(curarg,arg,size); break; case 'b': arg = va_arg(ap,char*); size = va_arg(ap,size_t); if (size > 0) newarg = sdscatlen(curarg,arg,size); break; case '%': newarg = sdscat(curarg,"%"); break; default: /* Try to detect printf format */ { static const char intfmts[] = "diouxX"; static const char flags[] = "#0-+ "; char _format[16]; const char *_p = c+1; size_t _l = 0; va_list _cpy; /* Flags */ while (*_p != '\0' && strchr(flags,*_p) != NULL) _p++; /* Field width */ while (*_p != '\0' && isdigit(*_p)) _p++; /* Precision */ if (*_p == '.') { _p++; while (*_p != '\0' && isdigit(*_p)) _p++; } /* Copy va_list before consuming with va_arg */ va_copy(_cpy,ap); /* Integer conversion (without modifiers) */ if (strchr(intfmts,*_p) != NULL) { va_arg(ap,int); goto fmt_valid; } /* Double conversion (without modifiers) */ if (strchr("eEfFgGaA",*_p) != NULL) { va_arg(ap,double); goto fmt_valid; } /* Size: char */ if (_p[0] == 'h' && _p[1] == 'h') { _p += 2; if (*_p != '\0' && strchr(intfmts,*_p) != NULL) { va_arg(ap,int); /* char gets promoted to int */ goto fmt_valid; } goto fmt_invalid; } /* Size: short */ if (_p[0] == 'h') { _p += 1; if (*_p != '\0' && strchr(intfmts,*_p) != NULL) { va_arg(ap,int); /* short gets promoted to int */ goto fmt_valid; } goto fmt_invalid; } /* Size: long long */ if (_p[0] == 'l' && _p[1] == 'l') { _p += 2; if (*_p != '\0' && strchr(intfmts,*_p) != NULL) { va_arg(ap,long long); goto fmt_valid; } goto fmt_invalid; } /* Size: long */ if (_p[0] == 'l') { _p += 1; if (*_p != '\0' && strchr(intfmts,*_p) != NULL) { va_arg(ap,long); goto fmt_valid; } goto fmt_invalid; } fmt_invalid: va_end(_cpy); goto format_err; fmt_valid: _l = (_p+1)-c; if (_l < sizeof(_format)-2) { memcpy(_format,c,_l); _format[_l] = '\0'; newarg = sdscatvprintf(curarg,_format,_cpy); /* Update current position (note: outer blocks * increment c twice so compensate here) */ c = _p-1; } va_end(_cpy); break; } } if (newarg == NULL) goto memory_err; curarg = newarg; touched = 1; c++; } c++; } /* Add the last argument if needed */ if (touched) { newargv = realloc(curargv,sizeof(char*)*(argc+1)); if (newargv == NULL) goto memory_err; curargv = newargv; curargv[argc++] = curarg; } else { sdsfree(curarg); } /* Clear curarg because it was put in curargv or was free'd. */ curarg = NULL; totlen = memcachedFormatCommandSdsArgv(&cmd, argc,curargv); if (totlen < 0) { error_type = totlen; goto cleanup; } free(curargv); *target = cmd; return totlen; format_err: error_type = -2; goto cleanup; memory_err: error_type = -1; goto cleanup; cleanup: if (curargv) { while(argc--) sdsfree(curargv[argc]); free(curargv); } sdsfree(curarg); /* No need to check cmd since it is the last statement that can fail, * but do it anyway to be as defensive as possible. */ if (cmd != NULL) free(cmd); return error_type; } /* Format a command according to the Memcached protocol. This function * takes a format similar to printf: * * %s represents a C null terminated string you want to interpolate * %b represents a binary safe string * * When using %b you need to provide both the pointer to the string * and the length in bytes as a size_t. Examples: * * len = memcachedFormatCommand(target, "GET %s", mykey); * len = memcachedFormatCommand(target, "SET %s %d, %lld %zu %s", mykey, myflags, myexptime, myvallen, myval); */ int memcachedFormatCommand(char **target, const char *format, ...) { va_list ap; int len; va_start(ap,format); len = memcachedvFormatCommand(target,format,ap); va_end(ap); /* The API says "-1" means bad result, but we now also return "-2" in some * cases. Force the return value to always be -1. */ if (len < 0) len = -1; return len; } /* Format a command according to the Redis protocol. This function takes the * number of arguments, an array with arguments and an array with their * lengths. If the latter is set to NULL, strlen will be used to compute the * argument lengths. */ int memcachedFormatCommandArgv(char **target, int argc, const char **argv, const size_t *argvlen) { char *cmd = NULL; /* final command */ int pos; /* position in final command */ int totlen; int type; /* Abort on a NULL target */ if (target == NULL || argc < 1) return -1; type = getRequestTypeFromString(argv[0], argvlen==NULL?strlen(argv[0]):argvlen[0]); if (type < 0) { goto format_err; } totlen = checkCmdValidAndGetTotalLen(type, 1, argc, argv, argvlen); if (totlen < 0) { goto format_err; } /* Build the command at protocol level */ cmd = malloc(totlen+1); if (cmd == NULL) goto memory_err; pos = genericMemcachedCommand(type, cmd, 1, argc, argv, argvlen); if (pos < 0) { goto format_err; } assert(pos == totlen); cmd[pos] = '\0'; *target = cmd; return totlen; format_err: if (cmd) free(cmd); return -2; memory_err: return -1; } ================================================ FILE: dep/himemcached-0.1.0/himemcached.h ================================================ #ifndef _HIMEMCACHED_H_ #define _HIMEMCACHED_H_ #include "himcread.h" #include "himcdep/sds.h" #define HIMC_MAJOR 0 #define HIMC_MINOR 13 #define HIMC_PATCH 1 /* Connection type can be blocking or non-blocking and is set in the * least significant bit of the flags field in redisContext. */ #define MC_BLOCK 0x1 /* Connection may be disconnected before being free'd. The second bit * in the flags field is set when the context is connected. */ #define MC_CONNECTED 0x2 /* The async API might try to disconnect cleanly and flush the output * buffer and read all subsequent replies before disconnecting. * This flag means no new commands can come in and the connection * should be terminated once all replies have been read. */ #define MC_DISCONNECTING 0x4 /* Flag specific to the async API which means that the context should be clean * up as soon as possible. */ #define MC_FREEING 0x8 /* Flag that is set when an async callback is executed. */ #define MC_IN_CALLBACK 0x10 /* Flag that is set when the async context has one or more subscriptions. */ #define MC_SUBSCRIBED 0x20 /* Flag that is set when monitor mode is active */ #define MC_MONITORING 0x40 /* Flag that is set when we should set SO_REUSEADDR before calling bind() */ #define MC_REUSEADDR 0x80 #define MC_KEEPALIVE_INTERVAL 15 /* seconds */ /* number of times we retry to connect in the case of EADDRNOTAVAIL and * SO_REUSEADDR is being used. */ #define MC_CONNECT_RETRIES 10 /* This is the reply object returned by memcachedCommand() */ typedef struct mcReply { int type; /* MC_REPLY_* */ long long integer; /* The integer when type is MC_REPLY_INTEGER */ int keylen; /* Length of key */ char *key; /* Key string */ int len; /* Length of string */ char *str; /* Used for both REDIS_REPLY_ERROR and MC_REPLY_STRING */ int flags; long long version; size_t elements; /* number of elements, for MC_REPLY_ARRAY */ struct mcReply **element; /* elements vector for MC_REPLY_ARRAY */ } mcReply; mcReader *memcachedReaderCreate(void); /* Function to free the reply objects hiredis returns by default. */ void freeMcReplyObject(void *reply); enum mcConnectionType { MC_CONN_TCP, MC_CONN_UNIX, }; /* Context for a connection to Memcached */ typedef struct mcContext { int err; /* Error flags, 0 when there is no error */ char errstr[128]; /* String representation of error when applicable */ int fd; int flags; char *obuf; /* Write buffer */ mcReader *reader; /* Protocol reader */ enum mcConnectionType connection_type; struct timeval *timeout; struct { char *host; char *source_addr; int port; } tcp; struct { char *path; } unix_sock; } mcContext; int memcachedBufferWrite(mcContext *c, int *done); int memcachedBufferRead(mcContext *c); int memcachedGetReplyFromReader(mcContext *c, void **reply); int memcachedGetReply(mcContext *c, void **reply); mcContext *memcachedContextInit(void); void memcachedFree(mcContext *c) ; int memcachedFormatCommandSdsArgv(char **target, int argc, const sds *argv); int memcachedvFormatCommand(char **target, const char *format, va_list ap); int memcachedFormatCommand(char **target, const char *format, ...); int memcachedFormatCommandArgv(char **target, int argc, const char **argv, const size_t *argvlen); #endif /* _HIMEMCACHED_H_ */ ================================================ FILE: dep/hiredis-0.13.3/.gitignore ================================================ ================================================ FILE: dep/jemalloc-4.2.0/.gitignore ================================================ ================================================ FILE: dep/sds/Makefile.am ================================================ MAINTAINERCLEANFILES = Makefile.in AM_CPPFLAGS = if !OS_SOLARIS AM_CPPFLAGS += -D_GNU_SOURCE endif AM_CPPFLAGS += -I $(top_srcdir)/dep/util AM_CPPFLAGS += -I $(top_srcdir)/dep/jemalloc-4.2.0/include AM_CPPFLAGS += -I $(top_srcdir)/dep/dmalloc AM_CFLAGS = -Wall -Wshadow AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value noinst_LIBRARIES = libsds.a noinst_HEADERS = sds.h sdsalloc.h libsds_a_SOURCES = \ sdsalloc.h \ sds.c sds.h ================================================ FILE: dep/sds/sds.c ================================================ /* SDSLib 2.0 -- A C dynamic strings library * * Copyright (c) 2006-2015, Salvatore Sanfilippo * Copyright (c) 2015, Oran Agra * Copyright (c) 2015, Redis Labs, Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include static inline int sdsHdrSize(char type) { switch(type&SDS_TYPE_MASK) { case SDS_TYPE_5: return sizeof(struct sdshdr5); case SDS_TYPE_8: return sizeof(struct sdshdr8); case SDS_TYPE_16: return sizeof(struct sdshdr16); case SDS_TYPE_32: return sizeof(struct sdshdr32); case SDS_TYPE_64: return sizeof(struct sdshdr64); } return 0; } static inline char sdsReqType(size_t string_size) { if (string_size < 32) return SDS_TYPE_5; if (string_size < 0xff) return SDS_TYPE_8; if (string_size < 0xffff) return SDS_TYPE_16; if (string_size < 0xffffffff) return SDS_TYPE_32; return SDS_TYPE_64; } /* Create a new sds string with the content specified by the 'init' pointer * and 'initlen'. * If NULL is used for 'init' the string is initialized with zero bytes. * * The string is always null-termined (all the sds strings are, always) so * even if you create an sds string with: * * mystring = sdsnewlen("abc",3); * * You can print the string with printf() as there is an implicit \0 at the * end of the string. However the string is binary safe and can contain * \0 characters in the middle, as the length is stored in the sds header. */ sds sdsnewlen(const void *init, size_t initlen) { void *sh; sds s; char type = sdsReqType(initlen); /* Empty strings are usually created in order to append. Use type 8 * since type 5 is not good at this. */ if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8; int hdrlen = sdsHdrSize(type); unsigned char *fp; /* flags pointer. */ sh = s_malloc(hdrlen+initlen+1); if (!init) memset(sh, 0, hdrlen+initlen+1); if (sh == NULL) return NULL; s = (char*)sh+hdrlen; fp = ((unsigned char*)s)-1; switch(type) { case SDS_TYPE_5: { *fp = type | (initlen << SDS_TYPE_BITS); break; } case SDS_TYPE_8: { SDS_HDR_VAR(8,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } case SDS_TYPE_16: { SDS_HDR_VAR(16,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } case SDS_TYPE_32: { SDS_HDR_VAR(32,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } case SDS_TYPE_64: { SDS_HDR_VAR(64,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } } if (initlen && init) memcpy(s, init, initlen); s[initlen] = '\0'; return s; } /* Create an empty (zero length) sds string. Even in this case the string * always has an implicit null term. */ sds sdsempty(void) { return sdsnewlen("",0); } /* Create a new sds string starting from a null terminated C string. */ sds sdsnew(const char *init) { size_t initlen = (init == NULL) ? 0 : strlen(init); return sdsnewlen(init, initlen); } /* Duplicate an sds string. */ sds sdsdup(const sds s) { return sdsnewlen(s, sdslen(s)); } /* Free an sds string. No operation is performed if 's' is NULL. */ void sdsfree(sds s) { if (s == NULL) return; s_free((char*)s-sdsHdrSize(s[-1])); } /* Set the sds string length to the length as obtained with strlen(), so * considering as content only up to the first null term character. * * This function is useful when the sds string is hacked manually in some * way, like in the following example: * * s = sdsnew("foobar"); * s[2] = '\0'; * sdsupdatelen(s); * printf("%d\n", sdslen(s)); * * The output will be "2", but if we comment out the call to sdsupdatelen() * the output will be "6" as the string was modified but the logical length * remains 6 bytes. */ void sdsupdatelen(sds s) { int reallen = strlen(s); sdssetlen(s, reallen); } /* Modify an sds string in-place to make it empty (zero length). * However all the existing buffer is not discarded but set as free space * so that next append operations will not require allocations up to the * number of bytes previously available. */ void sdsclear(sds s) { sdssetlen(s, 0); s[0] = '\0'; } /* Enlarge the free space at the end of the sds string so that the caller * is sure that after calling this function can overwrite up to addlen * bytes after the end of the string, plus one more byte for nul term. * * Note: this does not change the *length* of the sds string as returned * by sdslen(), but only the free buffer space we have. */ sds sdsMakeRoomFor(sds s, size_t addlen) { void *sh, *newsh; size_t avail = sdsavail(s); size_t len, newlen; char type, oldtype = s[-1] & SDS_TYPE_MASK; int hdrlen; /* Return ASAP if there is enough space left. */ if (avail >= addlen) return s; len = sdslen(s); sh = (char*)s-sdsHdrSize(oldtype); newlen = (len+addlen); if (newlen < SDS_MAX_PREALLOC) newlen *= 2; else newlen += SDS_MAX_PREALLOC; type = sdsReqType(newlen); /* Don't use type 5: the user is appending to the string and type 5 is * not able to remember empty space, so sdsMakeRoomFor() must be called * at every appending operation. */ if (type == SDS_TYPE_5) type = SDS_TYPE_8; hdrlen = sdsHdrSize(type); if (oldtype==type) { newsh = s_realloc(sh, hdrlen+newlen+1); if (newsh == NULL) return NULL; s = (char*)newsh+hdrlen; } else { /* Since the header size changes, need to move the string forward, * and can't use realloc */ newsh = s_malloc(hdrlen+newlen+1); if (newsh == NULL) return NULL; memcpy((char*)newsh+hdrlen, s, len+1); s_free(sh); s = (char*)newsh+hdrlen; s[-1] = type; sdssetlen(s, len); } sdssetalloc(s, newlen); return s; } /* Reallocate the sds string so that it has no free space at the end. The * contained string remains not altered, but next concatenation operations * will require a reallocation. * * After the call, the passed sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. */ sds sdsRemoveFreeSpace(sds s) { void *sh, *newsh; char type, oldtype = s[-1] & SDS_TYPE_MASK; int hdrlen; size_t len = sdslen(s); sh = (char*)s-sdsHdrSize(oldtype); type = sdsReqType(len); hdrlen = sdsHdrSize(type); if (oldtype==type) { newsh = s_realloc(sh, hdrlen+len+1); if (newsh == NULL) return NULL; s = (char*)newsh+hdrlen; } else { newsh = s_malloc(hdrlen+len+1); if (newsh == NULL) return NULL; memcpy((char*)newsh+hdrlen, s, len+1); s_free(sh); s = (char*)newsh+hdrlen; s[-1] = type; sdssetlen(s, len); } sdssetalloc(s, len); return s; } /* Return the total size of the allocation of the specifed sds string, * including: * 1) The sds header before the pointer. * 2) The string. * 3) The free buffer at the end if any. * 4) The implicit null term. */ size_t sdsAllocSize(sds s) { size_t alloc = sdsalloc(s); return sdsHdrSize(s[-1])+alloc+1; } /* Return the pointer of the actual SDS allocation (normally SDS strings * are referenced by the start of the string buffer). */ void *sdsAllocPtr(sds s) { return (void*) (s-sdsHdrSize(s[-1])); } /* Increment the sds length and decrements the left free space at the * end of the string according to 'incr'. Also set the null term * in the new end of the string. * * This function is used in order to fix the string length after the * user calls sdsMakeRoomFor(), writes something after the end of * the current string, and finally needs to set the new length. * * Note: it is possible to use a negative increment in order to * right-trim the string. * * Usage example: * * Using sdsIncrLen() and sdsMakeRoomFor() it is possible to mount the * following schema, to cat bytes coming from the kernel to the end of an * sds string without copying into an intermediate buffer: * * oldlen = sdslen(s); * s = sdsMakeRoomFor(s, BUFFER_SIZE); * nread = read(fd, s+oldlen, BUFFER_SIZE); * ... check for nread <= 0 and handle it ... * sdsIncrLen(s, nread); */ void sdsIncrLen(sds s, int incr) { unsigned char flags = s[-1]; size_t len; switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5: { unsigned char *fp = ((unsigned char*)s)-1; unsigned char oldlen = SDS_TYPE_5_LEN(flags); assert((incr > 0 && oldlen+incr < 32) || (incr < 0 && oldlen >= (unsigned int)(-incr))); *fp = SDS_TYPE_5 | ((oldlen+incr) << SDS_TYPE_BITS); len = oldlen+incr; break; } case SDS_TYPE_8: { SDS_HDR_VAR(8,s); assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr))); len = (sh->len += incr); break; } case SDS_TYPE_16: { SDS_HDR_VAR(16,s); assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr))); len = (sh->len += incr); break; } case SDS_TYPE_32: { SDS_HDR_VAR(32,s); assert((incr >= 0 && sh->alloc-sh->len >= (unsigned int)incr) || (incr < 0 && sh->len >= (unsigned int)(-incr))); len = (sh->len += incr); break; } case SDS_TYPE_64: { SDS_HDR_VAR(64,s); assert((incr >= 0 && sh->alloc-sh->len >= (uint64_t)incr) || (incr < 0 && sh->len >= (uint64_t)(-incr))); len = (sh->len += incr); break; } default: len = 0; /* Just to avoid compilation warnings. */ } s[len] = '\0'; } /* Grow the sds to have the specified length. Bytes that were not part of * the original length of the sds will be set to zero. * * if the specified length is smaller than the current length, no operation * is performed. */ sds sdsgrowzero(sds s, size_t len) { size_t curlen = sdslen(s); if (len <= curlen) return s; s = sdsMakeRoomFor(s,len-curlen); if (s == NULL) return NULL; /* Make sure added region doesn't contain garbage */ memset(s+curlen,0,(len-curlen+1)); /* also set trailing \0 byte */ sdssetlen(s, len); return s; } /* Append the specified binary-safe string pointed by 't' of 'len' bytes to the * end of the specified sds string 's'. * * After the call, the passed sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. */ sds sdscatlen(sds s, const void *t, size_t len) { size_t curlen = sdslen(s); s = sdsMakeRoomFor(s,len); if (s == NULL) return NULL; memcpy(s+curlen, t, len); sdssetlen(s, curlen+len); s[curlen+len] = '\0'; return s; } /* Append the specified null termianted C string to the sds string 's'. * * After the call, the passed sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. */ sds sdscat(sds s, const char *t) { return sdscatlen(s, t, strlen(t)); } /* Append the specified sds 't' to the existing sds 's'. * * After the call, the modified sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. */ sds sdscatsds(sds s, const sds t) { return sdscatlen(s, t, sdslen(t)); } /* Destructively modify the sds string 's' to hold the specified binary * safe string pointed by 't' of length 'len' bytes. */ sds sdscpylen(sds s, const char *t, size_t len) { if (sdsalloc(s) < len) { s = sdsMakeRoomFor(s,len-sdslen(s)); if (s == NULL) return NULL; } memcpy(s, t, len); s[len] = '\0'; sdssetlen(s, len); return s; } /* Like sdscpylen() but 't' must be a null-termined string so that the length * of the string is obtained with strlen(). */ sds sdscpy(sds s, const char *t) { return sdscpylen(s, t, strlen(t)); } /* Helper for sdscatlonglong() doing the actual number -> string * conversion. 's' must point to a string with room for at least * SDS_LLSTR_SIZE bytes. * * The function returns the length of the null-terminated string * representation stored at 's'. */ #define SDS_LLSTR_SIZE 21 int sdsll2str(char *s, long long value) { char *p, aux; unsigned long long v; size_t l; /* Generate the string representation, this method produces * an reversed string. */ v = (value < 0) ? -value : value; p = s; do { *p++ = '0'+(v%10); v /= 10; } while(v); if (value < 0) *p++ = '-'; /* Compute length and add null term. */ l = p-s; *p = '\0'; /* Reverse the string. */ p--; while(s < p) { aux = *s; *s = *p; *p = aux; s++; p--; } return l; } /* Identical sdsll2str(), but for unsigned long long type. */ int sdsull2str(char *s, unsigned long long v) { char *p, aux; size_t l; /* Generate the string representation, this method produces * an reversed string. */ p = s; do { *p++ = '0'+(v%10); v /= 10; } while(v); /* Compute length and add null term. */ l = p-s; *p = '\0'; /* Reverse the string. */ p--; while(s < p) { aux = *s; *s = *p; *p = aux; s++; p--; } return l; } /* Create an sds string from a long long value. It is much faster than: * * sdscatprintf(sdsempty(),"%lld\n", value); */ sds sdsfromlonglong(long long value) { char buf[SDS_LLSTR_SIZE]; int len = sdsll2str(buf,value); return sdsnewlen(buf,len); } /* Like sdscatprintf() but gets va_list instead of being variadic. */ sds sdscatvprintf(sds s, const char *fmt, va_list ap) { va_list cpy; char staticbuf[1024], *buf = staticbuf, *t; size_t buflen = strlen(fmt)*2; /* We try to start using a static buffer for speed. * If not possible we revert to heap allocation. */ if (buflen > sizeof(staticbuf)) { buf = s_malloc(buflen); if (buf == NULL) return NULL; } else { buflen = sizeof(staticbuf); } /* Try with buffers two times bigger every time we fail to * fit the string in the current buffer size. */ while(1) { buf[buflen-2] = '\0'; va_copy(cpy,ap); vsnprintf(buf, buflen, fmt, cpy); va_end(cpy); if (buf[buflen-2] != '\0') { if (buf != staticbuf) s_free(buf); buflen *= 2; buf = s_malloc(buflen); if (buf == NULL) return NULL; continue; } break; } /* Finally concat the obtained string to the SDS string and return it. */ t = sdscat(s, buf); if (buf != staticbuf) s_free(buf); return t; } /* Append to the sds string 's' a string obtained using printf-alike format * specifier. * * After the call, the modified sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. * * Example: * * s = sdsnew("Sum is: "); * s = sdscatprintf(s,"%d+%d = %d",a,b,a+b). * * Often you need to create a string from scratch with the printf-alike * format. When this is the need, just use sdsempty() as the target string: * * s = sdscatprintf(sdsempty(), "... your format ...", args); */ sds sdscatprintf(sds s, const char *fmt, ...) { va_list ap; char *t; va_start(ap, fmt); t = sdscatvprintf(s,fmt,ap); va_end(ap); return t; } /* This function is similar to sdscatprintf, but much faster as it does * not rely on sprintf() family functions implemented by the libc that * are often very slow. Moreover directly handling the sds string as * new data is concatenated provides a performance improvement. * * However this function only handles an incompatible subset of printf-alike * format specifiers: * * %s - C String * %S - SDS string * %i - signed int * %I - 64 bit signed integer (long long, int64_t) * %u - unsigned int * %U - 64 bit unsigned integer (unsigned long long, uint64_t) * %% - Verbatim "%" character. */ sds sdscatfmt(sds s, char const *fmt, ...) { size_t initlen = sdslen(s); const char *f = fmt; int i; va_list ap; va_start(ap,fmt); f = fmt; /* Next format specifier byte to process. */ i = initlen; /* Position of the next byte to write to dest str. */ while(*f) { char next, *str; size_t l; long long num; unsigned long long unum; /* Make sure there is always space for at least 1 char. */ if (sdsavail(s)==0) { s = sdsMakeRoomFor(s,1); } switch(*f) { case '%': next = *(f+1); f++; switch(next) { case 's': case 'S': str = va_arg(ap,char*); l = (next == 's') ? strlen(str) : sdslen(str); if (sdsavail(s) < l) { s = sdsMakeRoomFor(s,l); } memcpy(s+i,str,l); sdsinclen(s,l); i += l; break; case 'i': case 'I': if (next == 'i') num = va_arg(ap,int); else num = va_arg(ap,long long); { char buf[SDS_LLSTR_SIZE]; l = sdsll2str(buf,num); if (sdsavail(s) < l) { s = sdsMakeRoomFor(s,l); } memcpy(s+i,buf,l); sdsinclen(s,l); i += l; } break; case 'u': case 'U': if (next == 'u') unum = va_arg(ap,unsigned int); else unum = va_arg(ap,unsigned long long); { char buf[SDS_LLSTR_SIZE]; l = sdsull2str(buf,unum); if (sdsavail(s) < l) { s = sdsMakeRoomFor(s,l); } memcpy(s+i,buf,l); sdsinclen(s,l); i += l; } break; default: /* Handle %% and generally %. */ s[i++] = next; sdsinclen(s,1); break; } break; default: s[i++] = *f; sdsinclen(s,1); break; } f++; } va_end(ap); /* Add null-term */ s[i] = '\0'; return s; } /* Remove the part of the string from left and from right composed just of * contiguous characters found in 'cset', that is a null terminted C string. * * After the call, the modified sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. * * Example: * * s = sdsnew("AA...AA.a.aa.aHelloWorld :::"); * s = sdstrim(s,"Aa. :"); * printf("%s\n", s); * * Output will be just "Hello World". */ sds sdstrim(sds s, const char *cset) { char *start, *end, *sp, *ep; size_t len; sp = start = s; ep = end = s+sdslen(s)-1; while(sp <= end && strchr(cset, *sp)) sp++; while(ep > sp && strchr(cset, *ep)) ep--; len = (sp > ep) ? 0 : ((ep-sp)+1); if (s != sp) memmove(s, sp, len); s[len] = '\0'; sdssetlen(s,len); return s; } /* Turn the string into a smaller (or equal) string containing only the * substring specified by the 'start' and 'end' indexes. * * start and end can be negative, where -1 means the last character of the * string, -2 the penultimate character, and so forth. * * The interval is inclusive, so the start and end characters will be part * of the resulting string. * * The string is modified in-place. * * Example: * * s = sdsnew("Hello World"); * sdsrange(s,1,-1); => "ello World" */ void sdsrange(sds s, int start, int end) { size_t newlen, len = sdslen(s); if (len == 0) return; if (start < 0) { start = len+start; if (start < 0) start = 0; } if (end < 0) { end = len+end; if (end < 0) end = 0; } newlen = (start > end) ? 0 : (end-start)+1; if (newlen != 0) { if (start >= (signed)len) { newlen = 0; } else if (end >= (signed)len) { end = len-1; newlen = (start > end) ? 0 : (end-start)+1; } } else { start = 0; } if (start && newlen) memmove(s, s+start, newlen); s[newlen] = 0; sdssetlen(s,newlen); } /* Apply tolower() to every character of the sds string 's'. */ void sdstolower(sds s) { int len = sdslen(s), j; for (j = 0; j < len; j++) s[j] = tolower(s[j]); } /* Apply toupper() to every character of the sds string 's'. */ void sdstoupper(sds s) { int len = sdslen(s), j; for (j = 0; j < len; j++) s[j] = toupper(s[j]); } /* Compare two sds strings s1 and s2 with memcmp(). * * Return value: * * positive if s1 > s2. * negative if s1 < s2. * 0 if s1 and s2 are exactly the same binary string. * * If two strings share exactly the same prefix, but one of the two has * additional characters, the longer string is considered to be greater than * the smaller one. */ int sdscmp(const sds s1, const sds s2) { size_t l1, l2, minlen; int cmp; l1 = sdslen(s1); l2 = sdslen(s2); minlen = (l1 < l2) ? l1 : l2; cmp = memcmp(s1,s2,minlen); if (cmp == 0) return l1-l2; return cmp; } /* Split 's' with separator in 'sep'. An array * of sds strings is returned. *count will be set * by reference to the number of tokens returned. * * On out of memory, zero length string, zero length * separator, NULL is returned. * * Note that 'sep' is able to split a string using * a multi-character separator. For example * sdssplit("foo_-_bar","_-_"); will return two * elements "foo" and "bar". * * This version of the function is binary-safe but * requires length arguments. sdssplit() is just the * same function but for zero-terminated strings. */ sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count) { int elements = 0, slots = 5, start = 0, j; sds *tokens; if (seplen < 1 || len < 0) return NULL; tokens = s_malloc(sizeof(sds)*slots); if (tokens == NULL) return NULL; if (len == 0) { *count = 0; return tokens; } for (j = 0; j < (len-(seplen-1)); j++) { /* make sure there is room for the next element and the final one */ if (slots < elements+2) { sds *newtokens; slots *= 2; newtokens = s_realloc(tokens,sizeof(sds)*slots); if (newtokens == NULL) goto cleanup; tokens = newtokens; } /* search the separator */ if ((seplen == 1 && *(s+j) == sep[0]) || (memcmp(s+j,sep,seplen) == 0)) { tokens[elements] = sdsnewlen(s+start,j-start); if (tokens[elements] == NULL) goto cleanup; elements++; start = j+seplen; j = j+seplen-1; /* skip the separator */ } } /* Add the final element. We are sure there is room in the tokens array. */ tokens[elements] = sdsnewlen(s+start,len-start); if (tokens[elements] == NULL) goto cleanup; elements++; *count = elements; return tokens; cleanup: { int i; for (i = 0; i < elements; i++) sdsfree(tokens[i]); s_free(tokens); *count = 0; return NULL; } } /* Free the result returned by sdssplitlen(), or do nothing if 'tokens' is NULL. */ void sdsfreesplitres(sds *tokens, int count) { if (!tokens) return; while(count--) sdsfree(tokens[count]); s_free(tokens); } /* Append to the sds string "s" an escaped string representation where * all the non-printable characters (tested with isprint()) are turned into * escapes in the form "\n\r\a...." or "\x". * * After the call, the modified sds string is no longer valid and all the * references must be substituted with the new pointer returned by the call. */ sds sdscatrepr(sds s, const char *p, size_t len) { s = sdscatlen(s,"\"",1); while(len--) { switch(*p) { case '\\': case '"': s = sdscatprintf(s,"\\%c",*p); break; case '\n': s = sdscatlen(s,"\\n",2); break; case '\r': s = sdscatlen(s,"\\r",2); break; case '\t': s = sdscatlen(s,"\\t",2); break; case '\a': s = sdscatlen(s,"\\a",2); break; case '\b': s = sdscatlen(s,"\\b",2); break; default: if (isprint(*p)) s = sdscatprintf(s,"%c",*p); else s = sdscatprintf(s,"\\x%02x",(unsigned char)*p); break; } p++; } return sdscatlen(s,"\"",1); } /* Helper function for sdssplitargs() that returns non zero if 'c' * is a valid hex digit. */ int is_hex_digit(char c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } /* Helper function for sdssplitargs() that converts a hex digit into an * integer from 0 to 15 */ int hex_digit_to_int(char c) { switch(c) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; case 'a': case 'A': return 10; case 'b': case 'B': return 11; case 'c': case 'C': return 12; case 'd': case 'D': return 13; case 'e': case 'E': return 14; case 'f': case 'F': return 15; default: return 0; } } /* Split a line into arguments, where every argument can be in the * following programming-language REPL-alike form: * * foo bar "newline are supported\n" and "\xff\x00otherstuff" * * The number of arguments is stored into *argc, and an array * of sds is returned. * * The caller should free the resulting array of sds strings with * sdsfreesplitres(). * * Note that sdscatrepr() is able to convert back a string into * a quoted string in the same format sdssplitargs() is able to parse. * * The function returns the allocated tokens on success, even when the * input string is empty, or NULL if the input contains unbalanced * quotes or closed quotes followed by non space characters * as in: "foo"bar or "foo' */ sds *sdssplitargs(const char *line, int *argc) { const char *p = line; char *current = NULL; char **vector = NULL; *argc = 0; while(1) { /* skip blanks */ while(*p && isspace(*p)) p++; if (*p) { /* get a token */ int inq=0; /* set to 1 if we are in "quotes" */ int insq=0; /* set to 1 if we are in 'single quotes' */ int done=0; if (current == NULL) current = sdsempty(); while(!done) { if (inq) { if (*p == '\\' && *(p+1) == 'x' && is_hex_digit(*(p+2)) && is_hex_digit(*(p+3))) { unsigned char byte; byte = (hex_digit_to_int(*(p+2))*16)+ hex_digit_to_int(*(p+3)); current = sdscatlen(current,(char*)&byte,1); p += 3; } else if (*p == '\\' && *(p+1)) { char c; p++; switch(*p) { case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'b': c = '\b'; break; case 'a': c = '\a'; break; default: c = *p; break; } current = sdscatlen(current,&c,1); } else if (*p == '"') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { current = sdscatlen(current,p,1); } } else if (insq) { if (*p == '\\' && *(p+1) == '\'') { p++; current = sdscatlen(current,"'",1); } else if (*p == '\'') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { current = sdscatlen(current,p,1); } } else { switch(*p) { case ' ': case '\n': case '\r': case '\t': case '\0': done=1; break; case '"': inq=1; break; case '\'': insq=1; break; default: current = sdscatlen(current,p,1); break; } } if (*p) p++; } /* add the token to the vector */ vector = s_realloc(vector,((*argc)+1)*sizeof(char*)); vector[*argc] = current; (*argc)++; current = NULL; } else { /* Even on empty input string return something not NULL. */ if (vector == NULL) vector = s_malloc(sizeof(void*)); return vector; } } err: while((*argc)--) sdsfree(vector[*argc]); s_free(vector); if (current) sdsfree(current); *argc = 0; return NULL; } /* Modify the string substituting all the occurrences of the set of * characters specified in the 'from' string to the corresponding character * in the 'to' array. * * For instance: sdsmapchars(mystring, "ho", "01", 2) * will have the effect of turning the string "hello" into "0ell1". * * The function returns the sds string pointer, that is always the same * as the input pointer since no resize is needed. */ sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen) { size_t j, i, l = sdslen(s); for (j = 0; j < l; j++) { for (i = 0; i < setlen; i++) { if (s[j] == from[i]) { s[j] = to[i]; break; } } } return s; } /* Join an array of C strings using the specified separator (also a C string). * Returns the result as an sds string. */ sds sdsjoin(char **argv, int argc, char *sep) { sds join = sdsempty(); int j; for (j = 0; j < argc; j++) { join = sdscat(join, argv[j]); if (j != argc-1) join = sdscat(join,sep); } return join; } /* Like sdsjoin, but joins an array of SDS strings. */ sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen) { sds join = sdsempty(); int j; for (j = 0; j < argc; j++) { join = sdscatsds(join, argv[j]); if (j != argc-1) join = sdscatlen(join,sep,seplen); } return join; } int sdsIsNum(sds s) { size_t i; if (s == NULL || sdslen(s) == 0) { return 0; } for (i = 0; i < sdslen(s); i ++) { if(*(s+i) < '0' || *(s+i) > '9'){ return 0; } } return 1; } /* Wrappers to the allocators used by SDS. Note that SDS will actually * just use the macros defined into sdsalloc.h in order to avoid to pay * the overhead of function calls. Here we define these wrappers only for * the programs SDS is linked to, if they want to touch the SDS internals * even if they use a different allocator. */ void *sds_malloc(size_t size) { return s_malloc(size); } void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); } void sds_free(void *ptr) { s_free(ptr); } #if defined(SDS_TEST_MAIN) #include #include "testhelp.h" #include "limits.h" #define UNUSED(x) (void)(x) int sdsTest(void) { { sds x = sdsnew("foo"), y; test_cond("Create a string and obtain the length", sdslen(x) == 3 && memcmp(x,"foo\0",4) == 0) sdsfree(x); x = sdsnewlen("foo",2); test_cond("Create a string with specified length", sdslen(x) == 2 && memcmp(x,"fo\0",3) == 0) x = sdscat(x,"bar"); test_cond("Strings concatenation", sdslen(x) == 5 && memcmp(x,"fobar\0",6) == 0); x = sdscpy(x,"a"); test_cond("sdscpy() against an originally longer string", sdslen(x) == 1 && memcmp(x,"a\0",2) == 0) x = sdscpy(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk"); test_cond("sdscpy() against an originally shorter string", sdslen(x) == 33 && memcmp(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\0",33) == 0) sdsfree(x); x = sdscatprintf(sdsempty(),"%d",123); test_cond("sdscatprintf() seems working in the base case", sdslen(x) == 3 && memcmp(x,"123\0",4) == 0) sdsfree(x); x = sdsnew("--"); x = sdscatfmt(x, "Hello %s World %I,%I--", "Hi!", LLONG_MIN,LLONG_MAX); test_cond("sdscatfmt() seems working in the base case", sdslen(x) == 60 && memcmp(x,"--Hello Hi! World -9223372036854775808," "9223372036854775807--",60) == 0) printf("[%s]\n",x); sdsfree(x); x = sdsnew("--"); x = sdscatfmt(x, "%u,%U--", UINT_MAX, ULLONG_MAX); test_cond("sdscatfmt() seems working with unsigned numbers", sdslen(x) == 35 && memcmp(x,"--4294967295,18446744073709551615--",35) == 0) sdsfree(x); x = sdsnew(" x "); sdstrim(x," x"); test_cond("sdstrim() works when all chars match", sdslen(x) == 0) sdsfree(x); x = sdsnew(" x "); sdstrim(x," "); test_cond("sdstrim() works when a single char remains", sdslen(x) == 1 && x[0] == 'x') sdsfree(x); x = sdsnew("xxciaoyyy"); sdstrim(x,"xy"); test_cond("sdstrim() correctly trims characters", sdslen(x) == 4 && memcmp(x,"ciao\0",5) == 0) y = sdsdup(x); sdsrange(y,1,1); test_cond("sdsrange(...,1,1)", sdslen(y) == 1 && memcmp(y,"i\0",2) == 0) sdsfree(y); y = sdsdup(x); sdsrange(y,1,-1); test_cond("sdsrange(...,1,-1)", sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0) sdsfree(y); y = sdsdup(x); sdsrange(y,-2,-1); test_cond("sdsrange(...,-2,-1)", sdslen(y) == 2 && memcmp(y,"ao\0",3) == 0) sdsfree(y); y = sdsdup(x); sdsrange(y,2,1); test_cond("sdsrange(...,2,1)", sdslen(y) == 0 && memcmp(y,"\0",1) == 0) sdsfree(y); y = sdsdup(x); sdsrange(y,1,100); test_cond("sdsrange(...,1,100)", sdslen(y) == 3 && memcmp(y,"iao\0",4) == 0) sdsfree(y); y = sdsdup(x); sdsrange(y,100,100); test_cond("sdsrange(...,100,100)", sdslen(y) == 0 && memcmp(y,"\0",1) == 0) sdsfree(y); sdsfree(x); x = sdsnew("foo"); y = sdsnew("foa"); test_cond("sdscmp(foo,foa)", sdscmp(x,y) > 0) sdsfree(y); sdsfree(x); x = sdsnew("bar"); y = sdsnew("bar"); test_cond("sdscmp(bar,bar)", sdscmp(x,y) == 0) sdsfree(y); sdsfree(x); x = sdsnew("aar"); y = sdsnew("bar"); test_cond("sdscmp(bar,bar)", sdscmp(x,y) < 0) sdsfree(y); sdsfree(x); x = sdsnewlen("\a\n\0foo\r",7); y = sdscatrepr(sdsempty(),x,sdslen(x)); test_cond("sdscatrepr(...data...)", memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0) { unsigned int oldfree; char *p; int step = 10, j, i; sdsfree(x); sdsfree(y); x = sdsnew("0"); test_cond("sdsnew() free/len buffers", sdslen(x) == 1 && sdsavail(x) == 0); /* Run the test a few times in order to hit the first two * SDS header types. */ for (i = 0; i < 10; i++) { int oldlen = sdslen(x); x = sdsMakeRoomFor(x,step); int type = x[-1]&SDS_TYPE_MASK; test_cond("sdsMakeRoomFor() len", sdslen(x) == oldlen); if (type != SDS_TYPE_5) { test_cond("sdsMakeRoomFor() free", sdsavail(x) >= step); oldfree = sdsavail(x); } p = x+oldlen; for (j = 0; j < step; j++) { p[j] = 'A'+j; } sdsIncrLen(x,step); } test_cond("sdsMakeRoomFor() content", memcmp("0ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ",x,101) == 0); test_cond("sdsMakeRoomFor() final length",sdslen(x)==101); sdsfree(x); } } test_report() return 0; } #endif #ifdef SDS_TEST_MAIN int main(void) { return sdsTest(); } #endif ================================================ FILE: dep/sds/sds.h ================================================ /* SDSLib 2.0 -- A C dynamic strings library * * Copyright (c) 2006-2015, Salvatore Sanfilippo * Copyright (c) 2015, Oran Agra * Copyright (c) 2015, Redis Labs, Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef __SDS_H #define __SDS_H #define SDS_MAX_PREALLOC (1024*1024) #include #include #include typedef char *sds; /* Note: sdshdr5 is never used, we just access the flags byte directly. * However is here to document the layout of type 5 SDS strings. */ struct __attribute__ ((__packed__)) sdshdr5 { unsigned char flags; /* 3 lsb of type, and 5 msb of string length */ char buf[]; }; struct __attribute__ ((__packed__)) sdshdr8 { uint8_t len; /* used */ uint8_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; struct __attribute__ ((__packed__)) sdshdr16 { uint16_t len; /* used */ uint16_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; struct __attribute__ ((__packed__)) sdshdr32 { uint32_t len; /* used */ uint32_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; struct __attribute__ ((__packed__)) sdshdr64 { uint64_t len; /* used */ uint64_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; #define SDS_TYPE_5 0 #define SDS_TYPE_8 1 #define SDS_TYPE_16 2 #define SDS_TYPE_32 3 #define SDS_TYPE_64 4 #define SDS_TYPE_MASK 7 #define SDS_TYPE_BITS 3 #define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T))); #define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T)))) #define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS) static inline size_t sdslen(const sds s) { unsigned char flags = s[-1]; switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5: return SDS_TYPE_5_LEN(flags); case SDS_TYPE_8: return SDS_HDR(8,s)->len; case SDS_TYPE_16: return SDS_HDR(16,s)->len; case SDS_TYPE_32: return SDS_HDR(32,s)->len; case SDS_TYPE_64: return SDS_HDR(64,s)->len; } return 0; } static inline size_t sdsavail(const sds s) { unsigned char flags = s[-1]; switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5: { return 0; } case SDS_TYPE_8: { SDS_HDR_VAR(8,s); return sh->alloc - sh->len; } case SDS_TYPE_16: { SDS_HDR_VAR(16,s); return sh->alloc - sh->len; } case SDS_TYPE_32: { SDS_HDR_VAR(32,s); return sh->alloc - sh->len; } case SDS_TYPE_64: { SDS_HDR_VAR(64,s); return sh->alloc - sh->len; } } return 0; } static inline void sdssetlen(sds s, size_t newlen) { unsigned char flags = s[-1]; switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5: { unsigned char *fp = ((unsigned char*)s)-1; *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS); } break; case SDS_TYPE_8: SDS_HDR(8,s)->len = newlen; break; case SDS_TYPE_16: SDS_HDR(16,s)->len = newlen; break; case SDS_TYPE_32: SDS_HDR(32,s)->len = newlen; break; case SDS_TYPE_64: SDS_HDR(64,s)->len = newlen; break; } } static inline void sdsinclen(sds s, size_t inc) { unsigned char flags = s[-1]; switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5: { unsigned char *fp = ((unsigned char*)s)-1; unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc; *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS); } break; case SDS_TYPE_8: SDS_HDR(8,s)->len += inc; break; case SDS_TYPE_16: SDS_HDR(16,s)->len += inc; break; case SDS_TYPE_32: SDS_HDR(32,s)->len += inc; break; case SDS_TYPE_64: SDS_HDR(64,s)->len += inc; break; } } /* sdsalloc() = sdsavail() + sdslen() */ static inline size_t sdsalloc(const sds s) { unsigned char flags = s[-1]; switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5: return SDS_TYPE_5_LEN(flags); case SDS_TYPE_8: return SDS_HDR(8,s)->alloc; case SDS_TYPE_16: return SDS_HDR(16,s)->alloc; case SDS_TYPE_32: return SDS_HDR(32,s)->alloc; case SDS_TYPE_64: return SDS_HDR(64,s)->alloc; } return 0; } static inline void sdssetalloc(sds s, size_t newlen) { unsigned char flags = s[-1]; switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5: /* Nothing to do, this type has no total allocation info. */ break; case SDS_TYPE_8: SDS_HDR(8,s)->alloc = newlen; break; case SDS_TYPE_16: SDS_HDR(16,s)->alloc = newlen; break; case SDS_TYPE_32: SDS_HDR(32,s)->alloc = newlen; break; case SDS_TYPE_64: SDS_HDR(64,s)->alloc = newlen; break; } } sds sdsnewlen(const void *init, size_t initlen); sds sdsnew(const char *init); sds sdsempty(void); sds sdsdup(const sds s); void sdsfree(sds s); sds sdsgrowzero(sds s, size_t len); sds sdscatlen(sds s, const void *t, size_t len); sds sdscat(sds s, const char *t); sds sdscatsds(sds s, const sds t); sds sdscpylen(sds s, const char *t, size_t len); sds sdscpy(sds s, const char *t); sds sdscatvprintf(sds s, const char *fmt, va_list ap); #ifdef __GNUC__ sds sdscatprintf(sds s, const char *fmt, ...) __attribute__((format(printf, 2, 3))); #else sds sdscatprintf(sds s, const char *fmt, ...); #endif sds sdscatfmt(sds s, char const *fmt, ...); sds sdstrim(sds s, const char *cset); void sdsrange(sds s, int start, int end); void sdsupdatelen(sds s); void sdsclear(sds s); int sdscmp(const sds s1, const sds s2); sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count); void sdsfreesplitres(sds *tokens, int count); void sdstolower(sds s); void sdstoupper(sds s); sds sdsfromlonglong(long long value); sds sdscatrepr(sds s, const char *p, size_t len); sds *sdssplitargs(const char *line, int *argc); sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen); sds sdsjoin(char **argv, int argc, char *sep); sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen); int sdsIsNum(sds s); /* Low level functions exposed to the user API */ sds sdsMakeRoomFor(sds s, size_t addlen); void sdsIncrLen(sds s, int incr); sds sdsRemoveFreeSpace(sds s); size_t sdsAllocSize(sds s); void *sdsAllocPtr(sds s); /* Export the allocator used by SDS to the program using SDS. * Sometimes the program SDS is linked to, may use a different set of * allocators, but may want to allocate or free things that SDS will * respectively free or allocate. */ void *sds_malloc(size_t size); void *sds_realloc(void *ptr, size_t size); void sds_free(void *ptr); #ifdef REDIS_TEST int sdsTest(int argc, char *argv[]); #endif #endif ================================================ FILE: dep/sds/sdsalloc.h ================================================ /* SDSLib 2.0 -- A C dynamic strings library * * Copyright (c) 2006-2015, Salvatore Sanfilippo * Copyright (c) 2015, Redis Labs, Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* SDS allocator selection. * * This file is used in order to change the SDS allocator at compile time. * Just define the following defines to what you want to use. Also add * the include of your alternate allocator if needed (not needed in order * to use the default libc allocator). */ #include #define s_malloc dalloc #define s_realloc drealloc #define s_free dfree ================================================ FILE: dep/util/Makefile.am ================================================ MAINTAINERCLEANFILES = Makefile.in AM_CFLAGS = -Wall -Wshadow AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value noinst_LIBRARIES = libdutil.a noinst_HEADERS = dspecialconfig.h dutil.h dlog.h libdutil_a_SOURCES = \ dspecialconfig.h \ dutil.c dutil.h \ dlog.c dlog.h ================================================ FILE: dep/util/dlog.c ================================================ #include #include #include #include #include #include #include #include #include #include static struct logger logger; int log_init(int level, char *name) { struct logger *l = &logger; l->level = MAX(LOG_EMERG, MIN(level, LOG_PVERB)); l->name = name; if (name == NULL || !strlen(name)) { l->fd = STDERR_FILENO; } else { l->fd = open(name, O_WRONLY | O_APPEND | O_CREAT, 0644); if (l->fd < 0) { log_stderr("opening log file '%s' failed: %s", name, strerror(errno)); return -1; } } return 0; } void log_deinit(void) { struct logger *l = &logger; if (l->fd < 0 || l->fd == STDERR_FILENO) { return; } close(l->fd); } void log_reopen(void) { struct logger *l = &logger; if (l->fd != STDERR_FILENO) { close(l->fd); l->fd = open(l->name, O_WRONLY | O_APPEND | O_CREAT, 0644); if (l->fd < 0) { log_stderr_safe("reopening log file '%s' failed, ignored: %s", l->name, strerror(errno)); } } } void log_level_up(void) { struct logger *l = &logger; if (l->level < LOG_PVERB) { l->level++; log_safe("up log level to %d", l->level); } } void log_level_down(void) { struct logger *l = &logger; if (l->level > LOG_EMERG) { l->level--; log_safe("down log level to %d", l->level); } } void log_level_set(int level) { struct logger *l = &logger; l->level = MAX(LOG_EMERG, MIN(level, LOG_PVERB)); loga("set log level to %d", l->level); } void log_stacktrace(void) { struct logger *l = &logger; if (l->fd < 0) { return; } dstacktrace_fd(l->fd); } int log_loggable(int level) { struct logger *l = &logger; if (level > l->level) { return 0; } return 1; } void _log(const char *file, int line, int level, int panic, const char *fmt, ...) { struct logger *l = &logger; int len, size, errno_save; char buf[LOG_MAX_LEN]; va_list args; ssize_t n; struct timeval tv; if (l->fd < 0) { return; } errno_save = errno; len = 0; /* length of output buffer */ size = LOG_MAX_LEN; /* size of output buffer */ gettimeofday(&tv, NULL); buf[len++] = '['; len += dstrftime(buf + len, size - len, "%Y-%m-%d %H:%M:%S.", localtime(&tv.tv_sec)); len += dscnprintf(buf + len, size - len, "%03ld", tv.tv_usec/1000); len += dscnprintf(buf + len, size - len, "] %s:%d ", file, line); va_start(args, fmt); len += dvscnprintf(buf + len, size - len, fmt, args); va_end(args); buf[len++] = '\n'; n = write(l->fd, buf, len); if (n < 0) { l->nerror++; } errno = errno_save; if (panic) { abort(); } } void _log_stderr(const char *fmt, ...) { struct logger *l = &logger; int len, size, errno_save; char buf[4 * LOG_MAX_LEN]; va_list args; ssize_t n; errno_save = errno; len = 0; /* length of output buffer */ size = 4 * LOG_MAX_LEN; /* size of output buffer */ va_start(args, fmt); len += dvscnprintf(buf, size, fmt, args); va_end(args); buf[len++] = '\n'; n = write(STDERR_FILENO, buf, len); if (n < 0) { l->nerror++; } errno = errno_save; } void _log_stdout(const char *fmt, ...) { struct logger *l = &logger; int len, size, errno_save; char buf[4 * LOG_MAX_LEN]; va_list args; ssize_t n; errno_save = errno; len = 0; /* length of output buffer */ size = 4 * LOG_MAX_LEN; /* size of output buffer */ va_start(args, fmt); len += dvscnprintf(buf, size, fmt, args); va_end(args); buf[len++] = '\n'; n = write(STDOUT_FILENO, buf, len); if (n < 0) { l->nerror++; } errno = errno_save; } /* * Hexadecimal dump in the canonical hex + ascii display * See -C option in man hexdump */ void _log_hexdump(const char *file, int line, char *data, int datalen, const char *fmt, ...) { struct logger *l = &logger; char buf[8 * LOG_MAX_LEN]; int i, off, len, size, errno_save; ssize_t n; if (l->fd < 0) { return; } /* log hexdump */ errno_save = errno; off = 0; /* data offset */ len = 0; /* length of output buffer */ size = 8 * LOG_MAX_LEN; /* size of output buffer */ while (datalen != 0 && (len < size - 1)) { char *save, *str; unsigned char c; int savelen; len += dscnprintf(buf + len, size - len, "%08x ", off); save = data; savelen = datalen; for (i = 0; datalen != 0 && i < 16; data++, datalen--, i++) { c = (unsigned char)(*data); str = (i == 7) ? " " : " "; len += dscnprintf(buf + len, size - len, "%02x%s", c, str); } for ( ; i < 16; i++) { str = (i == 7) ? " " : " "; len += dscnprintf(buf + len, size - len, " %s", str); } data = save; datalen = savelen; len += dscnprintf(buf + len, size - len, " |"); for (i = 0; datalen != 0 && i < 16; data++, datalen--, i++) { c = (unsigned char)(isprint(*data) ? *data : '.'); len += dscnprintf(buf + len, size - len, "%c", c); } len += dscnprintf(buf + len, size - len, "|\n"); off += 16; } n = write(l->fd, buf, len); if (n < 0) { l->nerror++; } if (len >= size - 1) { n = write(l->fd, "\n", 1); if (n < 0) { l->nerror++; } } errno = errno_save; } void _log_safe(const char *fmt, ...) { struct logger *l = &logger; int len, size, errno_save; char buf[LOG_MAX_LEN]; va_list args; ssize_t n; if (l->fd < 0) { return; } errno_save = errno; len = 0; /* length of output buffer */ size = LOG_MAX_LEN; /* size of output buffer */ len += dsafe_snprintf(buf + len, size - len, "[.......................] "); va_start(args, fmt); len += dsafe_vsnprintf(buf + len, size - len, fmt, args); va_end(args); buf[len++] = '\n'; n = write(l->fd, buf, len); if (n < 0) { l->nerror++; } errno = errno_save; } void _log_stderr_safe(const char *fmt, ...) { struct logger *l = &logger; int len, size, errno_save; char buf[LOG_MAX_LEN]; va_list args; ssize_t n; errno_save = errno; len = 0; /* length of output buffer */ size = LOG_MAX_LEN; /* size of output buffer */ len += dsafe_snprintf(buf + len, size - len, "[.......................] "); va_start(args, fmt); len += dsafe_vsnprintf(buf + len, size - len, fmt, args); va_end(args); buf[len++] = '\n'; n = write(STDERR_FILENO, buf, len); if (n < 0) { l->nerror++; } errno = errno_save; } void log_write_len(char *str, size_t len) { struct logger *l = &logger; int errno_save; ssize_t n; if (l->fd < 0) { return; } errno_save = errno; n = write(l->fd, str, len); if (n < 0) { l->nerror++; } errno = errno_save; } ================================================ FILE: dep/util/dlog.h ================================================ #ifndef _DLOG_H_ #define _DLOG_H_ #ifdef HAVE_CONFIG_H # include #endif struct logger { char *name; /* log file name */ int level; /* log level */ int fd; /* log file descriptor */ int nerror; /* # log error */ }; #define LOG_EMERG 0 /* system in unusable */ #define LOG_ALERT 1 /* action must be taken immediately */ #define LOG_CRIT 2 /* critical conditions */ #define LOG_ERR 3 /* error conditions */ #define LOG_WARN 4 /* warning conditions */ #define LOG_NOTICE 5 /* normal but significant condition (default) */ #define LOG_INFO 6 /* informational */ #define LOG_DEBUG 7 /* debug messages */ #define LOG_VERB 8 /* verbose messages */ #define LOG_VVERB 9 /* verbose messages on crack */ #define LOG_VVVERB 10 /* verbose messages on ganga */ #define LOG_PVERB 11 /* periodic verbose messages on crack */ #define LOG_MAX_LEN 256 /* max length of log message */ /* * log_stderr - log to stderr * loga - log always * loga_hexdump - log hexdump always * log_error - error log messages * log_warn - warning log messages * log_panic - log messages followed by a panic * ... * log_debug - debug log messages based on a log level * log_hexdump - hexadump -C of a log buffer */ #ifdef HAVE_DEBUG_LOG #define log_debug(_level, ...) do { \ if (log_loggable(_level) != 0) { \ _log(__FILE__, __LINE__, _level, 0, __VA_ARGS__); \ } \ } while (0) #else #define log_debug(_level, ...) #endif #define log_hexdump(_level, _data, _datalen, ...) do { \ if (log_loggable(_level) != 0) { \ _log(__FILE__, __LINE__, _level, 0, __VA_ARGS__); \ _log_hexdump(__FILE__, __LINE__, (char *)(_data), (int)(_datalen), \ __VA_ARGS__); \ } \ } while (0) #define log_stderr(...) do { \ _log_stderr(__VA_ARGS__); \ } while (0) #define log_stdout(...) do { \ _log_stdout(__VA_ARGS__); \ } while (0) #define log_safe(...) do { \ _log_safe(__VA_ARGS__); \ } while (0) #define log_stderr_safe(...) do { \ _log_stderr_safe(__VA_ARGS__); \ } while (0) #define loga(...) do { \ _log(__FILE__, __LINE__, LOG_EMERG, 0, __VA_ARGS__); \ } while (0) #define loga_hexdump(_data, _datalen, ...) do { \ _log(__FILE__, __LINE__, LOG_EMERG, 0, __VA_ARGS__); \ _log_hexdump(__FILE__, __LINE__, (char *)(_data), (int)(_datalen), \ __VA_ARGS__); \ } while (0) \ #define log_error(...) do { \ if (log_loggable(LOG_ERR) != 0) { \ _log(__FILE__, __LINE__, LOG_ERR, 0, __VA_ARGS__); \ } \ } while (0) #define log_warn(...) do { \ if (log_loggable(LOG_WARN) != 0) { \ _log(__FILE__, __LINE__, LOG_WARN, 0, __VA_ARGS__); \ } \ } while (0) #define log_notice(...) do { \ if (log_loggable(LOG_NOTICE) != 0) { \ _log(__FILE__, __LINE__, LOG_NOTICE, 0, __VA_ARGS__); \ } \ } while (0) #define log_panic(...) do { \ if (log_loggable(LOG_EMERG) != 0) { \ _log(__FILE__, __LINE__, LOG_EMERG, 1, __VA_ARGS__); \ } \ } while (0) int log_init(int level, char *filename); void log_deinit(void); void log_level_up(void); void log_level_down(void); void log_level_set(int level); void log_stacktrace(void); void log_reopen(void); int log_loggable(int level); void _log(const char *file, int line, int level, int panic, const char *fmt, ...); void _log_stderr(const char *fmt, ...); void _log_stdout(const char *fmt, ...); void _log_safe(const char *fmt, ...); void _log_stderr_safe(const char *fmt, ...); void _log_hexdump(const char *file, int line, char *data, int datalen, const char *fmt, ...); void log_write_len(char * str, size_t len); #endif ================================================ FILE: dep/util/dspecialconfig.h ================================================ #ifndef _DSPECIALCONFIG_H_ #define _DSPECIALCONFIG_H_ #ifdef __APPLE__ #include #endif #ifdef __linux__ #include #include #endif #if (__i386 || __amd64 || __powerpc__) && __GNUC__ #define GNUC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) #if defined(__clang__) #define HAVE_ATOMIC #endif #if (defined(__GLIBC__) && defined(__GLIBC_PREREQ)) #if (GNUC_VERSION >= 40100 && __GLIBC_PREREQ(2, 6)) #define HAVE_ATOMIC #endif #endif #endif #if defined(__sun) #if defined(__GNUC__) #include #undef isnan #define isnan(x) \ __extension__({ __typeof (x) __x_a = (x); \ __builtin_expect(__x_a != __x_a, 0); }) #undef isfinite #define isfinite(x) \ __extension__ ({ __typeof (x) __x_f = (x); \ __builtin_expect(!isnan(__x_f - __x_f), 1); }) #undef isinf #define isinf(x) \ __extension__ ({ __typeof (x) __x_i = (x); \ __builtin_expect(!isnan(__x_i) && !isfinite(__x_i), 0); }) #define u_int uint #define u_int32_t uint32_t #endif /* __GNUC__ */ #endif /* __sun */ /* Test for proc filesystem */ #ifdef __linux__ #define HAVE_PROC_STAT 1 #define HAVE_PROC_MAPS 1 #define HAVE_PROC_SMAPS 1 #define HAVE_PROC_SOMAXCONN 1 #endif /* Test for task_info() */ #if defined(__APPLE__) #define HAVE_TASKINFO 1 #endif #endif ================================================ FILE: dep/util/dutil.c ================================================ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_CONFIG_H # include #endif #ifdef HAVE_BACKTRACE # include #endif #include #include /* GCC version >= 4.7 */ #if defined(__ATOMIC_RELAXED) /* GCC version >= 4.1 */ #elif defined(HAVE_ATOMIC) #else pthread_mutex_t atomic_locker = PTHREAD_MUTEX_INITIALIZER; #endif void dassert(const char *cond, const char *file, int line, int panic) { log_error("assert '%s' failed @ (%s, %d)", cond, file, line); if (panic) { dstacktrace(1); abort(); } } void dstacktrace(int skip_count) { #ifdef HAVE_BACKTRACE void *stack[64]; char **symbols; int size, i, j; size = backtrace(stack, 64); symbols = backtrace_symbols(stack, size); if (symbols == NULL) { return; } skip_count++; /* skip the current frame also */ for (i = skip_count, j = 0; i < size; i++, j++) { loga("[%d] %s", j, symbols[i]); } free(symbols); #endif } void dstacktrace_fd(int fd) { #ifdef HAVE_BACKTRACE void *stack[64]; int size; size = backtrace(stack, 64); backtrace_symbols_fd(stack, size, fd); #endif } int _dvscnprintf(char *buf, size_t size, const char *fmt, va_list args) { int n; n = vsnprintf(buf, size, fmt, args); /* * The return value is the number of characters which would be written * into buf not including the trailing '\0'. If size is == 0 the * function returns 0. * * On error, the function also returns 0. This is to allow idiom such * as len += _vscnprintf(...) * * See: http://lwn.net/Articles/69419/ */ if (n <= 0) { return 0; } if (n < (int) size) { return n; } return (int)(size - 1); } int _dscnprintf(char *buf, size_t size, const char *fmt, ...) { va_list args; int n; va_start(args, fmt); n = _dvscnprintf(buf, size, fmt, args); va_end(args); return n; } static char * _safe_utoa(int _base, uint64_t val, char *buf) { char hex[] = "0123456789abcdef"; uint32_t base = (uint32_t) _base; *buf-- = 0; do { *buf-- = hex[val % base]; } while ((val /= base) != 0); return buf + 1; } static char * _safe_itoa(int base, int64_t val, char *buf) { char hex[] = "0123456789abcdef"; char *orig_buf = buf; const int32_t is_neg = (val < 0); *buf-- = 0; if (is_neg) { val = -val; } if (is_neg && base == 16) { int ix; val -= 1; for (ix = 0; ix < 16; ++ix) buf[-ix] = '0'; } do { *buf-- = hex[val % base]; } while ((val /= base) != 0); if (is_neg && base == 10) { *buf-- = '-'; } if (is_neg && base == 16) { int ix; buf = orig_buf - 1; for (ix = 0; ix < 16; ++ix, --buf) { /* *INDENT-OFF* */ switch (*buf) { case '0': *buf = 'f'; break; case '1': *buf = 'e'; break; case '2': *buf = 'd'; break; case '3': *buf = 'c'; break; case '4': *buf = 'b'; break; case '5': *buf = 'a'; break; case '6': *buf = '9'; break; case '7': *buf = '8'; break; case '8': *buf = '7'; break; case '9': *buf = '6'; break; case 'a': *buf = '5'; break; case 'b': *buf = '4'; break; case 'c': *buf = '3'; break; case 'd': *buf = '2'; break; case 'e': *buf = '1'; break; case 'f': *buf = '0'; break; } /* *INDENT-ON* */ } } return buf + 1; } static const char * _safe_check_longlong(const char *fmt, int *have_longlong) { *have_longlong = 0; if (*fmt == 'l') { fmt++; if (*fmt != 'l') { *have_longlong = (sizeof(long) == sizeof(long long)); } else { fmt++; *have_longlong = 1; } } return fmt; } int _safe_vsnprintf(char *to, size_t size, const char *format, va_list ap) { char *start = to; char *end = start + size - 1; for (; *format; ++format) { int have_longlong = 0; if (*format != '%') { if (to == end) { /* end of buffer */ break; } *to++ = *format; /* copy ordinary char */ continue; } ++format; /* skip '%' */ format = _safe_check_longlong(format, &have_longlong); switch (*format) { case 'd': case 'i': case 'u': case 'x': case 'p': { int64_t ival = 0; uint64_t uval = 0; if (*format == 'p') have_longlong = (sizeof(void *) == sizeof(uint64_t)); if (have_longlong) { if (*format == 'u') { uval = va_arg(ap, uint64_t); } else { ival = va_arg(ap, int64_t); } } else { if (*format == 'u') { uval = va_arg(ap, uint32_t); } else { ival = va_arg(ap, int32_t); } } { char buff[22]; const int base = (*format == 'x' || *format == 'p') ? 16 : 10; /* *INDENT-OFF* */ char *val_as_str = (*format == 'u') ? _safe_utoa(base, uval, &buff[sizeof(buff) - 1]) : _safe_itoa(base, ival, &buff[sizeof(buff) - 1]); /* *INDENT-ON* */ /* Strip off "ffffffff" if we have 'x' format without 'll' */ if (*format == 'x' && !have_longlong && ival < 0) { val_as_str += 8; } while (*val_as_str && to < end) { *to++ = *val_as_str++; } continue; } } case 's': { const char *val = va_arg(ap, char *); if (!val) { val = "(null)"; } while (*val && to < end) { *to++ = *val++; } continue; } } } *to = 0; return (int)(to - start); } int _safe_snprintf(char *to, size_t n, const char *fmt, ...) { int result; va_list args; va_start(args, fmt); result = _safe_vsnprintf(to, n, fmt, args); va_end(args); return result; } /* * Return the current time in microseconds since Epoch */ long long dusec_now(void) { struct timeval now; int64_t usec; int status; status = gettimeofday(&now, NULL); if (status < 0) { log_error("gettimeofday failed: %s", strerror(errno)); return -1; } usec = (int64_t)now.tv_sec * 1000000LL + (int64_t)now.tv_usec; return usec; } /* * Return the current time in milliseconds since Epoch */ long long dmsec_now(void) { return dusec_now() / 1000LL; } /* * Return the current time in seconds since Epoch */ long long dsec_now(void) { return dusec_now() / 1000000LL; } /* Glob-style pattern matching. */ int string_match_len(const char *pattern, int patternLen, const char *string, int stringLen, int nocase) { while(patternLen) { switch(pattern[0]) { case '*': while (pattern[1] == '*') { pattern++; patternLen--; } if (patternLen == 1) return 1; /* match */ while(stringLen) { if (string_match_len(pattern+1, patternLen-1, string, stringLen, nocase)) return 1; /* match */ string++; stringLen--; } return 0; /* no match */ break; case '?': if (stringLen == 0) return 0; /* no match */ string++; stringLen--; break; case '[': { int not, match; pattern++; patternLen--; not = pattern[0] == '^'; if (not) { pattern++; patternLen--; } match = 0; while(1) { if (pattern[0] == '\\') { pattern++; patternLen--; if (pattern[0] == string[0]) match = 1; } else if (pattern[0] == ']') { break; } else if (patternLen == 0) { pattern--; patternLen++; break; } else if (pattern[1] == '-' && patternLen >= 3) { int start = pattern[0]; int end = pattern[2]; int c = string[0]; if (start > end) { int t = start; start = end; end = t; } if (nocase) { start = tolower(start); end = tolower(end); c = tolower(c); } pattern += 2; patternLen -= 2; if (c >= start && c <= end) match = 1; } else { if (!nocase) { if (pattern[0] == string[0]) match = 1; } else { if (tolower((int)pattern[0]) == tolower((int)string[0])) match = 1; } } pattern++; patternLen--; } if (not) match = !match; if (!match) return 0; /* no match */ string++; stringLen--; break; } case '\\': if (patternLen >= 2) { pattern++; patternLen--; } /* fall through */ default: if (!nocase) { if (pattern[0] != string[0]) return 0; /* no match */ } else { if (tolower((int)pattern[0]) != tolower((int)string[0])) return 0; /* no match */ } string++; stringLen--; break; } pattern++; patternLen--; if (stringLen == 0) { while(*pattern == '*') { pattern++; patternLen--; } break; } } if (patternLen == 0 && stringLen == 0) return 1; return 0; } int string_match(const char *pattern, const char *string, int nocase) { return string_match_len(pattern,strlen(pattern),string,strlen(string),nocase); } ================================================ FILE: dep/util/dutil.h ================================================ #ifndef _DUTIL_H_ #define _DUTIL_H_ #include #include #define UNUSED(x) (void)(x) #define LF (uint8_t) 10 #define CR (uint8_t) 13 #define CRLF "\x0d\x0a" #define CRLF_LEN (sizeof("\x0d\x0a") - 1) #define NELEMS(a) ((sizeof(a)) / sizeof((a)[0])) #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define SQUARE(d) ((d) * (d)) #define VAR(s, s2, n) (((n) < 2) ? 0.0 : ((s2) - SQUARE(s)/(n)) / ((n) - 1)) #define STDDEV(s, s2, n) (((n) < 2) ? 0.0 : sqrt(VAR((s), (s2), (n)))) /* * Wrappers for defining custom assert based on whether macro * RMT_ASSERT_PANIC or RMT_ASSERT_LOG was defined at the moment * ASSERT was called. */ #ifdef HAVE_ASSERT_PANIC #define ASSERT(_x) do { \ if (!(_x)) { \ dassert(#_x, __FILE__, __LINE__, 1); \ } \ } while (0) #define NOT_REACHED() ASSERT(0) #elif HAVE_ASSERT_LOG #define ASSERT(_x) do { \ if (!(_x)) { \ dassert(#_x, __FILE__, __LINE__, 0); \ } \ } while (0) #define NOT_REACHED() ASSERT(0) #else #define ASSERT(_x) #define NOT_REACHED() #endif void dassert(const char *cond, const char *file, int line, int panic); void dstacktrace(int skip_count); void dstacktrace_fd(int fd); int _dscnprintf(char *buf, size_t size, const char *fmt, ...); int _dvscnprintf(char *buf, size_t size, const char *fmt, va_list args); long long dusec_now(void); long long dmsec_now(void); long long dsec_now(void); /* * A (very) limited version of snprintf * @param to Destination buffer * @param n Size of destination buffer * @param fmt printf() style format string * @returns Number of bytes written, including terminating '\0' * Supports 'd' 'i' 'u' 'x' 'p' 's' conversion * Supports 'l' and 'll' modifiers for integral types * Does not support any width/precision * Implemented with simplicity, and async-signal-safety in mind */ int _safe_vsnprintf(char *to, size_t size, const char *format, va_list ap); int _safe_snprintf(char *to, size_t n, const char *fmt, ...); #define dsafe_snprintf(_s, _n, ...) \ _safe_snprintf((char *)(_s), (size_t)(_n), __VA_ARGS__) #define dsafe_vsnprintf(_s, _n, _f, _a) \ _safe_vsnprintf((char *)(_s), (size_t)(_n), _f, _a) /* * snprintf(s, n, ...) will write at most n - 1 of the characters printed into * the output string; the nth character then gets the terminating `\0'; if * the return value is greater than or equal to the n argument, the string * was too short and some of the printed characters were discarded; the output * is always null-terminated. * * Note that, the return value of snprintf() is always the number of characters * that would be printed into the output string, assuming n were limited not * including the trailing `\0' used to end output. * * scnprintf(s, n, ...) is same as snprintf() except, it returns the number * of characters printed into the output string not including the trailing '\0' */ #define dsnprintf(_s, _n, ...) \ snprintf((char *)(_s), (size_t)(_n), __VA_ARGS__) #define dscnprintf(_s, _n, ...) \ _dscnprintf((char *)(_s), (size_t)(_n), __VA_ARGS__) #define dvsnprintf(_s, _n, _f, _a) \ vsnprintf((char *)(_s), (size_t)(_n), _f, _a) #define dvscnprintf(_s, _n, _f, _a) \ _dvscnprintf((char *)(_s), (size_t)(_n), _f, _a) #define dstrftime(_s, _n, fmt, tm) \ (int)strftime((char *)(_s), (size_t)(_n), fmt, tm) int string_match_len(const char *pattern, int patternLen, const char *string, int stringLen, int nocase); int string_match(const char *pattern, const char *string, int nocase); /* Atomic API */ /* GCC version >= 4.7 */ #if defined(__ATOMIC_RELAXED) #define atomic_add(_value, _n) __atomic_add_fetch(&_value, (_n), __ATOMIC_RELAXED) #define atomic_sub(_value, _n) __atomic_sub_fetch(&_value, (_n), __ATOMIC_RELAXED) #define atomic_set(_value, _n) __atomic_store_n(&_value, (_n), __ATOMIC_RELAXED) #define atomic_get(_value, _v) do { \ __atomic_load(&_value, _v, __ATOMIC_RELAXED); \ } while(0) #define ATOMIC_LOCK_TYPE "__ATOMIC_RELAXED" /* GCC version >= 4.1 */ #elif defined(HAVE_ATOMIC) #define atomic_add(_value, _n) __sync_add_and_fetch(&_value, (_n)) #define atomic_sub(_value, _n) __sync_sub_and_fetch(&_value, (_n)) #define atomic_set(_value, _n) __sync_lock_test_and_set(&_value, (_n)) #define atomic_get(_value, _v) do { \ (*_v) = __sync_add_and_fetch(&_value, 0); \ } while(0) #define ATOMIC_LOCK_TYPE "HAVE_ATOMIC" #else extern pthread_mutex_t atomic_locker; #define atomic_add(_value, _n) do { \ pthread_mutex_lock(&atomic_locker); \ _value += (_n); \ pthread_mutex_unlock(&atomic_locker); \ } while(0) #define atomic_sub(_value, _n) do { \ pthread_mutex_lock(&atomic_locker); \ _value -= (_n); \ pthread_mutex_unlock(&atomic_locker); \ } while(0) #define atomic_set(_value, _n) do { \ pthread_mutex_lock(&atomic_locker); \ _value = (_n); \ pthread_mutex_unlock(&atomic_locker); \ } while(0) #define atomic_get(_value, _v) do { \ pthread_mutex_lock(&atomic_locker); \ (*_v) = _value; \ pthread_mutex_unlock(&atomic_locker); \ } while(0) #define ATOMIC_LOCK_TYPE "pthread_mutex_lock" #endif #endif ================================================ FILE: m4/.gitignore ================================================ # Ignore everything * # Except me !.gitignore ================================================ FILE: notes/c-styleguide.txt ================================================ - No literal tabs. Expand tabs to 4 spaces. - Indentation is 4 spaces. - No more than 3 levels of indentation, otherwise you should think about refactoring your code. - Use one statement per line. - Make sure that your editor does not leave space at the end of the line. - snake_case for variable, function and file names. - Use your own judgment when naming variables and functions. Be as Spartan as possible. Eg: Using name like this_variable_is_a_temporary_counter will usually be frowned upon. - Don’t use local variables or parameters that shadow global identifiers. GCC’s ‘-Wshadow’ option can help you to detect this problem. - Avoid using int, char, short, long. Instead use int8_t uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t, which are available in . However, when interfacing with system calls and libraries you cannot get away from using int and char. - Use bool for boolean variables. You have to include - Avoid using a bool as type for struct member names. Instead use unsigned 1-bit bit field. Eg: struct foo { unsigned is_bar:1; }; - Always use size_t type when dealing with sizes of objects or memory ranges. - Your code should be 64-bit and 32-bit friendly. Bear in mind problems of printing, comparisons, and structure alignment. You have to include to get generic format specifier macros for printing. - 80 column line limit. - If you have to wrap a long statement (> 80 column), put the operator at the end of the line and indent the next line at the same column as the arguments in the previous column. Eg: while (cnt < 20 && this_variable_name_is_too_long && ep != NULL) { z = a + really + long + statement + that + needs + three + lines + gets + indented + on + the + same + column + as + the + previous + column } and: int a = function(param_a, param_b, param_c, param_d, param_e, param_f, param_g, param_h, param_i, param_j, param_k, param_l); - Always use braces for all conditional blocks (if, switch, for, while, do). This holds good even for single statement conditional blocks. Eg: if (cond) { stmt; } - Placement of braces for non-function statement blocks - put opening brace last on the line and closing brace first. Eg: if (x is true) { we do y } - Placement of brace for functions - put the opening brace at the beginning of the next line and closing brace first. This is useful because several tools look for opening brace in column one to find beginning of C functions. Eg: int function(int x) { body of the function } - Closing brace is empty on a line of its own, except in cases where it is followed by a continuation of the same statement, i.e. a "while" in a do-statement or an "else" in an if-statement, like this: do { body of do-loop } while (condition); and, if (x == y) { .. } else if (x > y) { ... } else { .... } - Column align switch keyword and the corresponding case/default keyword. Eg: switch (alphabet) { case 'a': case 'b': printf("I am a or b\n"); break; default: break; } - Forever loops are done with for, and not while. Eg: for (;;) { stmt; } - Don't use a space after a function name. - Do not needlessly surround the return expression with parentheses. - Use space after keywords. Exceptions are sizeof, typeof, alignof and __attribute__, which look like functions. - Do not add spaces around (inside) parenthesized expressions. s = sizeof( sizeof(*p)) ); /* bad example */ s = sizeof(sizeof(*p)); /* good example */ - Casts should not be followed by space. Eg: int q = *(int *)&p - There is no need to type cast when assigning a void pointer to a non-void pointer, or vice versa. - Avoid using goto statements. However there are some exceptions to this rule when a single goto label within a function and one or more goto statements come in handy when a function exits from multiple locations and some common work such as cleanup has to be done. Eg: int fun(void) { int result = 0; char *buffer; buffer = malloc(1024); if (buffer == NULL) { return -1; } if (condition1) { while (loop1) { ... } result = 1; goto out; } ... out: free(buffer); return result; } - When declaring pointer data, use '*' adjacent to the data name and not adjacent to the type name. Eg: int function(int *p) { char *p; } - Use one space around (on each side of) most binary and ternary operators, such as any of these: = + - < > * / % | & ^ <= >= == != ? : but no space after unary operators: & * + - ~ ! sizeof typeof alignof __attribute__ defined no space before the postfix increment & decrement unary operators: ++ -- and no space around the '.' and "->" structure member operators. - 0 and NULL; use 0 for integers, 0.0 for doubles, NULL for pointers, and '\0' for chars. - Test pointers against NULL. E.g, use: if (p == NULL) not: !(p) - Do not use ! for tests unless it is a boolean. E.g. use: if (*p == '\0') not: if (!*p) - Don't use assignments inside if or while-conditions. E.g, use: struct foo *foo; foo = malloc(sizeof(*foo)); if (foo == NULL) { return -1 } not: struct foo *foo; if ((foo = malloc(sizeof(*foo))) == NULL) { return -1; } - Don't ever use typedef for structure types. Typedefs are problematic because they do not properly hide their underlying type; for example you need to know if the typedef is the structure itself or a pointer to the structure. In addition they must be declared exactly once, whereas an incomplete structure type can be mentioned as many times as necessary. Typedefs are difficult to use in stand-alone header files: the header that defines the typedef must be included before the header that uses it, or by the header that uses it (which causes namespace pollution), or there must be a back-door mechanism for obtaining the typedef. - The only exception for using a typedef is when you are defining a type for a function pointer or a type for an enum. Eg: typedef void (*foo_handler_t)(int, void *); or: typedef enum types { TYPE_1, TYPE_2 } types_t; - Use just one variable declaration per line when variables are part of a struct. This leaves you room for a small comment on each item, explaining its use. Declarations should also be aligned. Eg, use: struct foo { int *foo_a; /* comment for foo_a */ int foo_b; /* comment for foo_b */ unsigned foo_c:1; /* comment for foo_c */ }; and not: struct foo { int *foo_a, foo_b; unsigned foo_c:1; }; - For variable declaration outside a struct, either collect all the declarations of the same type on a single line, or use one variable per line if the variables purpose needs to be commented. Eg: char *a, *b, c; or: char *a, *b; char c; /* comments for c */ - Avoid magic numbers because no-one has a clue (including the author) of what it means after a month. - Function definitions should start the name of the function in column one. This is useful because it makes searching for function definitions fairly trivial. Eg: static char * concat(char *s1, char *s2) { body of the function } - Function and variables local to a file should be static. - Separate two successive functions with one blank line. - Include parameter names with their datypes in function declaration. Eg: void function(int param); - Functions should be short and sweet, and do just one thing. They should fit on one or two screenfuls of text (80 x 24 screen size), and do one thing and do that well. The maximum length of a function is inversely proportional to the complexity and indentation level of that function. So, if you have a conceptually simple function that is just one long (but simple) case-statement, where you have to do lots of small things for a lot of different cases, it's OK to have a longer function. Another measure of the function is the number of local variables. They shouldn't exceed 5-10, or you're doing something wrong. Re-think the function, and split it into smaller pieces. A human brain can generally easily keep track of about 7 different things, anything more and it gets confused. You know you're brilliant, but maybe you'd like to understand what you did 2 weeks from now. - Use const for function parameters passed by reference, if the passed pointer has no side effect. - C style comments only. Don't use // for single line comments. Instead use /* ... */ style. - For multi-line comments use the following style /* * This is the preferred style for multi-line * comments in the Linux kernel source code. * Please use it consistently. * * Description: A column of asterisks on the left side, * with beginning and ending almost-blank lines. */ - To comment out block of code spanning several lines use preprocessor directive "#ifdef 0 ... #endif" - Please write a brief comment at the start of each source file, with the file name and a line or two about the overall purpose of the file. - All major functions should have comments describing what they do at the head of the function. Avoid putting comments in the function body unless absolutely needed. If possible, add a comment on what sorts of arguments the function gets, and what the possible values of arguments mean and what they are used for and the significance of return value if there is one. It is not necessary to duplicate in words the meaning of the C argument declarations, if a C type is being used in its customary fashion. If there is anything nonstandard about its use (such as an argument of type char * which is really the address of the second character of a string, not the first), or any possible values that would not work the way one would expect (such as, that strings containing newlines are not guaranteed to work), be sure to say so. Eg: /* * Try to acquire a physical address lock while a pmap is locked. If we * fail to trylock we unlock and lock the pmap directly and cache the * locked pa in *locked. The caller should then restart their loop in case * the virtual to physical mapping has changed. * * Returns 0 on success and -1 on failure. */ int vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) { ... - The comment on a function is much clearer if you use the argument names to speak about the argument values. The variable name itself should be lower case, but write it in upper case when you are speaking about the value rather than the variable itself. Thus, “the inode number NODE_NUM” rather than “an inode”. - Every struct definition should have an accompanying comment that describes what it is for and how it should be used. - Finally, while comments are absolutely important to keep the code readable, remember that the best code is self-documenting. Giving sensible names to types and variables is much better than using obscure names that you must then explain through comments. - Recommend using UPPERCASE for macro names. However, sometimes using lowercase for macro names makes sense when macros masquerade as well-known function calls. Eg, it makes sense to write the wrapper for the standard free() function in lowercase to keep the readability consistent: #define my_free(_p) do { \ free(_p); \ (_p) = NULL; \ } while (0) - Use enums when defining more than one related constants. All enumeration values are in UPPERCASE. - Avoid macros as much as possible and use inline functions, enums and const variables wherever you can. - For macros encapsulating compound statements, right justify the backslashes and enclose it in do { ... } while (0) - For parameterized macros, all the parameters used in the macro body must be surrounded by parentheses. Eg: #define ADD_1(_x) ((_x) + 1) - Use sizeof(varname) instead of sizeof(type) whenever possible. Eg: char *p; p = malloc(sizeof(*p)); /* good example */ p = malloc(sizeof(char)); /* bad example */ - All variables should be declared at the beginning of a scope block {..}. It is even preferred to declare all variables at the beginning of the function so that all the local variable declarations is in one place and we can see the comprehensive list in one glance. - Global structs should be declared at the top of the file in which they are used, or in separate header files if they are used in multiple source files. - Declarations of external functions and functions to appear later in the source file should all go in one place near the beginning of the file, somewhere before the first function definition in the file or else should go in a header file. - Use of extern should be considered as evil, if it is used in header files to reference global variables. - Don’t put extern declarations inside functions. - Usually every *.c file should have an associated *.h file. There are some exceptions to this rule, such as unit tests and small *.c files containing just the main() function. - Every header file in the source code must have preprocessor conditional to prevent the header file from being scanned multiple times and avoiding mutual dependency cycles. Alternatively you can use #pragma once directive, as it avoids name clashes and increases the compile speed. Eg, for a header file named foo.h, the entire contents of the header file must be between the guard macros as follows: #ifndef _FOO_H_ #define _FOO_H_ ... #endif /* _FOO_H_ */ Or, #pragma once #ifndef _FOO_H_ #define _FOO_H_ ... #endif /* _FOO_H_ */ - Don't use #include when a forward declaration would suffice. - Functions defined in header files should be static inline. - Don’t make the program ugly just to placate GCC when extra warnings options such as ‘-Wconversion’ or ‘-Wundef’ are used. These options can help in finding bugs, but they can also generate so many false alarms that that it hurts readability to silence them with unnecessary casts, wrappers, and other complications. - Conditional compilation: when supporting configuration options already known when building your program we prefer using if (... ) over conditional compilation, as in the former case the compiler is able to perform more extensive checking of all possible code paths. Eg, use: if (HAS_FOO) ... else ... instead of: #ifdef HAS_FOO ... #else ... #endif A modern compiler such as GCC will generate exactly the same code in both cases and of course, the former method assumes that HAS_FOO is defined as either 0 or 1. - Finally, rules are rules. Sometimes they are sensible and sometimes not and regardless of your preference, we would like you to follow them. A project is easier to follow if all project contributors follow the style rules so that they can all read and understand everyone's code easily. But remember, like all good rules, they are exceptions where it makes sense not to be too rigid on the grounds of common sense and consistency! ================================================ FILE: notes/debug.txt ================================================ - strace strace -o strace.txt -ttT -s 1024 -p `pgrep nutcracker` - libyaml (yaml-0.1.4) - yaml tokens: 0 YAML_NO_TOKEN, 1 YAML_STREAM_START_TOKEN, 2 YAML_STREAM_END_TOKEN, 3 YAML_VERSION_DIRECTIVE_TOKEN, 4 YAML_TAG_DIRECTIVE_TOKEN, 5 YAML_DOCUMENT_START_TOKEN, 6 YAML_DOCUMENT_END_TOKEN, 7 YAML_BLOCK_SEQUENCE_START_TOKEN, 8 YAML_BLOCK_MAPPING_START_TOKEN, 9 YAML_BLOCK_END_TOKEN, 10 YAML_FLOW_SEQUENCE_START_TOKEN, 11 YAML_FLOW_SEQUENCE_END_TOKEN, 12 YAML_FLOW_MAPPING_START_TOKEN, 13 YAML_FLOW_MAPPING_END_TOKEN, 14 YAML_BLOCK_ENTRY_TOKEN, 15 YAML_FLOW_ENTRY_TOKEN, 16 YAML_KEY_TOKEN, 17 YAML_VALUE_TOKEN, 18 YAML_ALIAS_TOKEN, 19 YAML_ANCHOR_TOKEN, 20 YAML_TAG_TOKEN, 21 YAML_SCALAR_TOKEN - yaml events 0 YAML_NO_EVENT, 1 YAML_STREAM_START_EVENT, 2 YAML_STREAM_END_EVENT, 3 YAML_DOCUMENT_START_EVENT, 4 YAML_DOCUMENT_END_EVENT, 5 YAML_ALIAS_EVENT, 6 YAML_SCALAR_EVENT, 7 YAML_SEQUENCE_START_EVENT, 8 YAML_SEQUENCE_END_EVENT, 9 YAML_MAPPING_START_EVENT, 10 YAML_MAPPING_END_EVENT - sys/queue.h queue.h is a generic linked list library adapted from BSD. It has three macro knobs that are useful for debugging: - QUEUE_MACRO_SCRUB nullifies links (next and prev pointers) of deleted elements and catches cases where we are attempting to do operations on an element that has already been unlinked. - QUEUE_MACRO_TRACE keeps track of __FILE__ and __LINE__ of last two updates to the list data structure. - QUEUE_MACRO_ASSERT verifies the sanity of list data structure on every operation. - valgrind valgrind --tool=memcheck --leak-check=yes - Core dump ulimit -c unlimited - Generate ENOMEM to test "Out of Memory" ulimit -m # limit maximum memory size ulimit -v # limit virtual memory - get nutcracker stats printf "" | socat - TCP:localhost:22222 | tee stats.txt printf "" | nc localhost 22222 | python -mjson.tool - Signalling and Logging SIGTTIN - To up the log level SIGTTOU - To down the log level SIGHUP - To reopen log file - Error codes: http://www.cs.utah.edu/dept/old/texinfo/glibc-manual-0.02/library_2.html /usr/include/asm-generic/errno-base.h /usr/include/asm-generic/errno.h - epoll (linux) union epoll_data { void *ptr; int fd; uint32_t u32; uint64_t u64; }; struct epoll_event { uint32_t events; /* epoll events */ struct epoll_data data; /* user data variable */ }; /* events */ EPOLLIN = 0x001, EPOLLPRI = 0x002, EPOLLOUT = 0x004, EPOLLERR = 0x008, EPOLLHUP = 0x010, EPOLLRDNORM = 0x040, EPOLLRDBAND = 0x080, EPOLLWRNORM = 0x100, EPOLLWRBAND = 0x200, EPOLLMSG = 0x400, EPOLLRDHUP = 0x2000, EPOLLONESHOT = (1 << 30), EPOLLET = (1 << 31) /* opcodes */ EPOLL_CTL_ADD = 1 /* add a file decriptor to the interface */ EPOLL_CTL_DEL = 2 /* remove a file decriptor from the interface */ EPOLL_CTL_MOD = 3 /* change file decriptor epoll_event structure */ - kqueue (bsd) struct kevent { uintptr_t ident; /* identifier for this event */ int16_t filter; /* filter for event */ uint16_t flags; /* general flags */ uint32_t fflags; /* filter-specific flags */ intptr_t data; /* filter-specific data */ void *udata; /* opaque user data identifier */ }; /* flags / events */ EV_ADD = 0x0001 /* action - add event to kq (implies enable) */ EV_DELETE = 0x0002 /* action - delete event from kq */ EV_ENABLE = 0x0004 /* action - enable event */ EV_DISABLE = 0x0008 /* action - disable event (not reported) */ EV_RECEIPT = 0x0040 /* action - force EV_ERROR on success, data == 0 */ EV_ONESHOT = 0x0010 /* flags - only report one occurrence */ EV_CLEAR = 0x0020 /* flags - clear event state after reporting */ EV_DISPATCH = 0x0080 /* flags - disable event after reporting */ EV_SYSFLAGS = 0xF000 /* flags - reserved by system */ EV_FLAG0 = 0x1000 /* flags - filter-specific flag */ EV_FLAG1 = 0x2000 /* flags - filter-specific flag */ EV_EOF = 0x8000 /* returned values - EOF detected */ EV_ERROR = 0x4000 /* returned values - error, data contains errno */ /* filters */ EVFILT_READ (-1) /* readable */ EVFILT_WRITE (-2) /* writable */ EVFILT_AIO (-3) /* attached to aio requests */ EVFILT_VNODE (-4) /* attached to vnodes */ EVFILT_PROC (-5) /* attached to struct proc */ EVFILT_SIGNAL (-6) /* attached to struct proc */ EVFILT_TIMER (-7) /* timers */ EVFILT_MACHPORT (-8) /* mach portsets */ EVFILT_FS (-9) /* filesystem events */ EVFILT_USER (-10) /* user events */ EVFILT_VM (-12) /* virtual memory events */ EV_CLEAR behaves like EPOLLET because it resets the event after it is returned; without this flag, the event would be repeatedly returned. - poll (unix) POLLIN 0x001 /* there is data to read */ POLLPRI 0x002 /* there is urgent data to read */ POLLOUT 0x004 /* writing now will not block */ POLLRDNORM 0x040 /* normal data may be read */ POLLRDBAND 0x080 /* priority data may be read */ POLLWRNORM 0x100 /* writing now will not block */ POLLWRBAND 0x200 /* priority data may be written */ POLLMSG 0x400 POLLREMOVE 0x1000 POLLRDHUP 0x2000 POLLERR 0x008 /* error condition */ POLLHUP 0x010 /* hung up */ POLLNVAL 0x020 /* invalid polling request */ - event ports (solaris) typedef struct port_event { int portev_events; /* event data is source specific */ ushort_t portev_source; /* event source */ ushort_t portev_pad; /* port internal use */ uintptr_t portev_object; /* source specific object */ void *portev_user; /* user cookie */ } port_event_t; /* port sources */ PORT_SOURCE_AIO 1 PORT_SOURCE_TIMER 2 PORT_SOURCE_USER 3 PORT_SOURCE_FD 4 PORT_SOURCE_ALERT 5 PORT_SOURCE_MQ 6 PORT_SOURCE_FILE 7 ================================================ FILE: notes/socket.txt ================================================ - int listen(int sockfd, int backlog); Linux: The backlog argument defines the maximum length to which the queue of pending connections for sockfd may grow. If a connection request arrives when the queue is full, the client may receive an error with an indication of ECONNREFUSED or, if the underlying protocol supports retransmission, the request may be ignored so that a later reattempt at connection succeeds. backlog specifies the queue length for completely established sockets waiting to be accepted, instead of the number of incomplete connection requests. The maximum length of the queue for incomplete sockets can be set using /proc/sys/net/ipv4/tcp_max_syn_backlog. If the backlog argument is greater than the value in /proc/sys/net/core/somaxconn, then it is silently truncated to that value; the default value in this file is 128. In kernels before 2.4.25, this limit was a hard coded value, SOMAXCONN, with the value 128. BSD: The backlog argument defines the maximum length the queue of pending connections may grow to. The real maximum queue length will be 1.5 times more than the value specified in the backlog argument. A subsequent listen() system call on the listening socket allows the caller to change the maximum queue length using a new backlog argument. If a connection request arrives with the queue full the client may receive an error with an indication of ECONNREFUSED, or, in the case of TCP, the connection will be silently dropped. The listen() system call appeared in 4.2BSD. The ability to configure the maximum backlog at run-time, and to use a negative backlog to request the maximum allowable value, was introduced in FreeBSD 2.2. - SO_LINGER (linger) socket option This option specifies what should happen when the socket of a type that promises reliable delivery still has untransmitted messages when it is closed struct linger { int l_onoff; /* nonzero to linger on close */ int l_linger; /* time to linger (in secs) */ }; l_onoff = 0 (default), then l_linger value is ignored and close returns immediately. But if there is any data still remaining in the socket send buffer, the system will try to deliver the data to the peer l_onoff = nonzero, then close blocks until data is transmitted or the l_linger timeout period expires a) l_linger = 0, TCP aborts connection, discards any data still remaining in the socket send buffer and sends RST to peer. This avoids the TCP's TIME_WAIT state b) l_linger = nonzero, then kernel will linger when socket is closed. If there is any pending data in the socket send buffer, the kernel waits until all the data is sent and acknowledged by peer TCP, or the linger time expires If a socket is set as nonblocking, it will not wait for close to complete even if linger time is nonzero - TIME_WAIT state The end that performs active close i.e. the end that sends the first FIN goes into TIME_WAIT state. After a FIN packet is sent to the peer and after that peers FIN/ACK arrvies and is ACKed, we go into a TIME_WAIT state. The duration that the end point remains in this state is 2 x MSL (maximum segment lifetime). The reason that the duration of the TIME_WAIT state is 2 x MSL is because the maximum amount of time a packet can wander around a network is assumed to be MSL seconds. The factor of 2 is for the round-trip. The recommended value for MSL is 120 seconds, but Berkeley derived implementations normally use 30 seconds instead. This means a TIME_WAIT delay is between 1 and 4 minutes. For Linux, the TIME_WAIT state duration is 1 minute (net/tcp.h): #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ TIME_WAIT state on client, combined with limited number of ephermeral ports available for TCP connections severely limits the rate at which new connections to the server can be created. On Linux, by default ephemeral ports are in the range of 32768 to 61000: $ cat /proc/sys/net/ipv4/ip_local_port_range 32768 61000 So with a TIME_WAIT state duration of 1 minute, the maximum sustained rate for any client is ~470 new connections per second - TCP keepalive TCP keepalive packet (TCP packet with no data and the ACK flag turned on) is used to assert that connection is still up and running. This is useful because if the remote peer goes away without closing their connection, the keepalive probe will detect this and notice that the connection is broken even if there is no traffic on it. Imagine, the following scenario: You have a valid TCP connection established between two endpoints A and B. B terminates abnormally (think kernel panic or unplugging of network cable) without sending anything over the network to notify A that connection is broken. A, from its side, is ready to receive data, and has no idea that B has gone away. Now B comes back up again, and while A knows about a connection with B and still thinks that it active, B has no such idea. A tries to send data to B over a dead connection, and B replies with an RST packet, causing A to finally close the connection. So, without a keepalive probe A would never close the connection if it never sent data over it. - There are four socket functions that pass a socket address structure from the process to the kernel - bind, connect, sendmsg and sendto. These function are also responsible for passing the length of the sockaddr that they are passing (socklen_t). There are five socket functions that pass a socket from the kernel to the process - accept, recvfrom, recvmsg, getpeername, getsockname. The kernel is also responsible for returning the length of the sockaddr struct that it returns back to the userspace Different sockaddr structs: 1. sockaddr_in 2. sockaddr_in6 3. sockaddr_un Special types of in_addr_t /* Address to accept any incoming messages */ #define INADDR_ANY ((in_addr_t) 0x00000000) /* Address to send to all hosts */ #define INADDR_BROADCAST ((in_addr_t) 0xffffffff) /* Address indicating an error return */ #define INADDR_NONE ((in_addr_t) 0xffffffff) ================================================ FILE: scripts/.gitignore ================================================ *.pyc *.out *.log ================================================ FILE: src/Makefile.am ================================================ MAINTAINERCLEANFILES = Makefile.in AM_CPPFLAGS = if !OS_SOLARIS AM_CPPFLAGS += -D_GNU_SOURCE endif AM_CPPFLAGS += -I $(top_srcdir)/dep/dhashkit AM_CPPFLAGS += -I $(top_srcdir)/dep/ae AM_CPPFLAGS += -I $(top_srcdir)/dep/util AM_CPPFLAGS += -I $(top_srcdir)/dep/jemalloc-4.2.0/include AM_CPPFLAGS += -I $(top_srcdir)/dep/dmalloc AM_CPPFLAGS += -I $(top_srcdir)/dep/sds AM_CPPFLAGS += -I $(top_srcdir)/dep/darray AM_CPPFLAGS += -I $(top_srcdir)/dep/dlist AM_CFLAGS = AM_CFLAGS += -fno-strict-aliasing AM_CFLAGS += -Wall -Wshadow AM_CFLAGS += -Wpointer-arith AM_CFLAGS += -Winline AM_CFLAGS += -Wunused-function -Wunused-variable -Wunused-value AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value AM_CFLAGS += -Wconversion -Wsign-compare AM_CFLAGS += -Wstrict-prototypes -Wmissing-prototypes -Wredundant-decls -Wmissing-declarations AM_LDFLAGS = AM_LDFLAGS += -lm -lpthread -rdynamic if !OS_DARWIN AM_LDFLAGS += -lrt endif if OS_SOLARIS AM_LDFLAGS += -lnsl -lsocket endif if OS_FREEBSD AM_LDFLAGS += -lexecinfo endif sbin_PROGRAMS = vire vire_SOURCES = \ vr_aof.c vr_aof.h \ vr_block.c vr_block.h \ vr_client.c vr_client.h \ vr_command.c vr_command.h \ vr_conf.c vr_conf.h \ vr_connection.c vr_connection.h \ vr_core.c vr_core.h \ vr_db.c vr_db.h \ vr_dict.c vr_dict.h \ vr_eventloop.c vr_eventloop.h \ vr_intset.c vr_intset.h \ vr_listen.c vr_listen.h \ vr_lzf.h vr_lzfP.h \ vr_lzf_c.c vr_lzf_d.c \ vr_master.c vr_master.h \ vr_multi.c vr_multi.h \ vr_notify.c vr_notify.h \ vr_object.c vr_object.h \ vr_pubsub.c vr_pubsub.h \ vr_quicklist.c vr_quicklist.h \ vr_rbtree.c vr_rbtree.h \ vr_rdb.c vr_rdb.h \ vr_replication.c vr_replication.h \ vr_scripting.c vr_scripting.h \ vr_server.c vr_server.h \ vr_signal.c vr_signal.h \ vr_slowlog.c vr_slowlog.h \ vr_specialconfig.h \ vr_stats.c vr_stats.h \ vr_thread.c vr_thread.h \ vr_t_hash.c vr_t_hash.h \ vr_t_list.c vr_t_list.h \ vr_t_set.c vr_t_set.h \ vr_t_string.c vr_t_string.h \ vr_t_zset.c vr_t_zset.h \ vr_util.c vr_util.h \ vr_worker.c vr_worker.h \ vr_backend.c vr_backend.h \ vr_ziplist.c vr_ziplist.h \ vr_zipmap.c vr_zipmap.h \ vr_bitops.c vr_bitops.h \ vr_hyperloglog.c vr_hyperloglog.h \ vr.c vire_LDADD = $(top_builddir)/dep/util/libdutil.a vire_LDADD += $(top_builddir)/dep/ae/libae.a vire_LDADD += $(top_builddir)/dep/sds/libsds.a vire_LDADD += $(top_builddir)/dep/darray/libdarray.a vire_LDADD += $(top_builddir)/dep/dlist/libdlist.a vire_LDADD += $(top_builddir)/dep/dhashkit/libdhashkit.a vire_LDADD += $(top_builddir)/dep/jemalloc-4.2.0/lib/libjemalloc.a vire_LDADD += $(top_builddir)/dep/dmalloc/libdmalloc.a ================================================ FILE: src/vr.c ================================================ #include #include #include #include #include #include #include #include #include #include #include #define VR_CONF_PATH "conf/vire.conf" #define VR_LOG_DEFAULT LOG_NOTICE #define VR_LOG_MIN LOG_EMERG #define VR_LOG_MAX LOG_PVERB #define VR_LOG_PATH NULL #define VR_PORT 8889 #define VR_ADDR "0.0.0.0" #define VR_INTERVAL (30 * 1000) /* in msec */ #define VR_PID_FILE NULL #define VR_THREAD_NUM_DEFAULT (sysconf(_SC_NPROCESSORS_ONLN)>6?6:sysconf(_SC_NPROCESSORS_ONLN)) static int show_help; static int show_version; static int test_conf; static int daemonize; static struct option long_options[] = { { "help", no_argument, NULL, 'h' }, { "version", no_argument, NULL, 'V' }, { "test-conf", no_argument, NULL, 't' }, { "daemonize", no_argument, NULL, 'd' }, { "verbose", required_argument, NULL, 'v' }, { "output", required_argument, NULL, 'o' }, { "conf-file", required_argument, NULL, 'c' }, { "pid-file", required_argument, NULL, 'p' }, { "thread-num", required_argument, NULL, 'T' }, { NULL, 0, NULL, 0 } }; static char short_options[] = "hVtdv:o:c:p:T:"; static rstatus_t vr_daemonize(int dump_core) { rstatus_t status; pid_t pid, sid; int fd; pid = fork(); switch (pid) { case -1: log_error("fork() failed: %s", strerror(errno)); return VR_ERROR; case 0: break; default: /* parent terminates */ _exit(0); } /* 1st child continues and becomes the session leader */ sid = setsid(); if (sid < 0) { log_error("setsid() failed: %s", strerror(errno)); return VR_ERROR; } if (signal(SIGHUP, SIG_IGN) == SIG_ERR) { log_error("signal(SIGHUP, SIG_IGN) failed: %s", strerror(errno)); return VR_ERROR; } pid = fork(); switch (pid) { case -1: log_error("fork() failed: %s", strerror(errno)); return VR_ERROR; case 0: break; default: /* 1st child terminates */ _exit(0); } /* 2nd child continues */ /* change working directory */ if (dump_core == 0) { status = chdir("/"); if (status < 0) { log_error("chdir(\"/\") failed: %s", strerror(errno)); return VR_ERROR; } } /* clear file mode creation mask */ umask(0); /* redirect stdin, stdout and stderr to "/dev/null" */ fd = open("/dev/null", O_RDWR); if (fd < 0) { log_error("open(\"/dev/null\") failed: %s", strerror(errno)); return VR_ERROR; } status = dup2(fd, STDIN_FILENO); if (status < 0) { log_error("dup2(%d, STDIN) failed: %s", fd, strerror(errno)); close(fd); return VR_ERROR; } status = dup2(fd, STDOUT_FILENO); if (status < 0) { log_error("dup2(%d, STDOUT) failed: %s", fd, strerror(errno)); close(fd); return VR_ERROR; } status = dup2(fd, STDERR_FILENO); if (status < 0) { log_error("dup2(%d, STDERR) failed: %s", fd, strerror(errno)); close(fd); return VR_ERROR; } if (fd > STDERR_FILENO) { status = close(fd); if (status < 0) { log_error("close(%d) failed: %s", fd, strerror(errno)); return VR_ERROR; } } return VR_OK; } static void vr_print_run(struct instance *nci) { int status; struct utsname name; status = uname(&name); if (nci->log_filename) { char *ascii_logo = " _._ \n" " _.-``__ ''-._ \n" " _.-`` `. *_. ''-._ Vire %s %s bit\n" " .-`` .-```. ```\-/ _.,_ ''-._ \n" " ( | | .-` `, ) Running in %s mode\n" " |`-._`-...-` __...-.``-._;'` _.-'| Port: %d\n" " | `-._ `._ / _.-' | PID: %ld\n" " `-._ `-._ `-./ _.-' _.-' OS: %s %s %s\n" " |`-._`-._ `-.__.-' _.-'_.-'| \n" " | `-._`-._ _.-'_.-' | https://github.com/vipshop/vire\n" " `-._ `-._`-.__.-'_.-' _.-' \n" " |`-._`-._ `-.__.-' _.-'_.-'| \n" " | `-._`-._ _.-'_.-' | \n" " `-._ `-._`-.__.-'_.-' _.-' \n" " `-._ `-.__.-' _.-' \n" " `-._ _.-' \n" " `-.__.-' \n\n"; char *buf = dalloc(1024*16); snprintf(buf,1024*16,ascii_logo, VR_VERSION_STRING, (sizeof(long) == 8) ? "64" : "32", "standalone", server.port, (long) nci->pid, status < 0 ? " ":name.sysname, status < 0 ? " ":name.release, status < 0 ? " ":name.machine); log_write_len(buf, strlen(buf)); dfree(buf); }else { char buf[256]; snprintf(buf,256,"Vire %s, %s bit, %s mode, port %d, pid %ld, built for %s %s %s ready to run.\n", VR_VERSION_STRING, (sizeof(long) == 8) ? "64" : "32", "standalone", server.port, (long) nci->pid, status < 0 ? " ":name.sysname, status < 0 ? " ":name.release, status < 0 ? " ":name.machine); log_write_len(buf, strlen(buf)); } } static void vr_print_done(void) { loga("done, rabbit done"); } static void vr_show_usage(void) { log_stderr( "Usage: vire [-?hVdt] [-v verbosity level] [-o output file]" CRLF " [-c conf file] [-p pid file]" CRLF " [-T worker threads number]" CRLF ""); log_stderr( "Options:" CRLF " -h, --help : this help" CRLF " -V, --version : show version and exit" CRLF " -t, --test-conf : test configuration for syntax errors and exit" CRLF " -d, --daemonize : run as a daemon"); log_stderr( " -v, --verbose=N : set logging level (default: %d, min: %d, max: %d)" CRLF " -o, --output=S : set logging file (default: %s)" CRLF " -c, --conf-file=S : set configuration file (default: %s)" CRLF " -p, --pid-file=S : set pid file (default: %s)" CRLF " -T, --thread_num=N : set the worker threads number (default: %d)" CRLF "", VR_LOG_DEFAULT, VR_LOG_MIN, VR_LOG_MAX, VR_LOG_PATH != NULL ? VR_LOG_PATH : "stderr", VR_CONF_PATH, VR_PID_FILE != NULL ? VR_PID_FILE : "off", VR_THREAD_NUM_DEFAULT); } static rstatus_t vr_create_pidfile(struct instance *nci) { char pid[VR_UINTMAX_MAXLEN]; int fd, pid_len; ssize_t n; fd = open(nci->pid_filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); if (fd < 0) { log_error("opening pid file '%s' failed: %s", nci->pid_filename, strerror(errno)); return VR_ERROR; } nci->pidfile = 1; pid_len = dsnprintf(pid, VR_UINTMAX_MAXLEN, "%d", nci->pid); n = vr_write(fd, pid, pid_len); if (n < 0) { log_error("write to pid file '%s' failed: %s", nci->pid_filename, strerror(errno)); return VR_ERROR; } close(fd); return VR_OK; } static void vr_remove_pidfile(struct instance *nci) { int status; status = unlink(nci->pid_filename); if (status < 0) { log_error("unlink of pid file '%s' failed, ignored: %s", nci->pid_filename, strerror(errno)); } } static void vr_set_default_options(struct instance *nci) { int status; nci->log_level = VR_LOG_DEFAULT; nci->log_filename = VR_LOG_PATH; nci->conf_filename = VR_CONF_PATH; status = vr_gethostname(nci->hostname, VR_MAXHOSTNAMELEN); if (status < 0) { log_warn("gethostname failed, ignored: %s", strerror(errno)); dsnprintf(nci->hostname, VR_MAXHOSTNAMELEN, "unknown"); } nci->hostname[VR_MAXHOSTNAMELEN - 1] = '\0'; nci->pid = (pid_t)-1; nci->pid_filename = NULL; nci->pidfile = 0; nci->thread_num = (int)VR_THREAD_NUM_DEFAULT; } static rstatus_t vr_get_options(int argc, char **argv, struct instance *nci) { int c, value; opterr = 0; for (;;) { c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { /* no more options */ break; } switch (c) { case 'h': show_version = 1; show_help = 1; break; case 'V': show_version = 1; break; case 't': test_conf = 1; break; case 'd': daemonize = 1; break; case 'v': value = vr_atoi(optarg, strlen(optarg)); if (value < 0) { log_stderr("vire: option -v requires a number"); return VR_ERROR; } nci->log_level = value; break; case 'o': nci->log_filename = optarg; break; case 'c': nci->conf_filename = optarg; break; case 'p': nci->pid_filename = optarg; break; case 'T': value = vr_atoi(optarg, strlen(optarg)); if (value < 0) { log_stderr("vire: option -T requires a number"); return VR_ERROR; } nci->thread_num = value; break; case '?': switch (optopt) { case 'o': case 'c': case 'p': log_stderr("vire: option -%c requires a file name", optopt); break; case 'v': case 'T': log_stderr("vire: option -%c requires a number", optopt); break; default: log_stderr("vire: invalid option -- '%c'", optopt); break; } return VR_ERROR; default: log_stderr("vire: invalid option -- '%c'", optopt); return VR_ERROR; } } return VR_OK; } /* * Returns true if configuration file has a valid syntax, otherwise * returns false */ static bool vr_test_conf(struct instance *nci, int test) { vr_conf *cf; cf = conf_create(nci->conf_filename); if (cf == NULL) { if (test) log_stderr("vire: configuration file '%s' syntax is invalid", nci->conf_filename); return false; } conf_destroy(cf); if (test) log_stderr("vire: configuration file '%s' syntax is ok", nci->conf_filename); return true; } static int vr_pre_run(struct instance *nci) { int ret; ret = log_init(nci->log_level, nci->log_filename); if (ret != VR_OK) { return ret; } log_debug(LOG_VERB, "Vire used logfile: %s", nci->conf_filename); if (!vr_test_conf(nci, false)) { log_error("conf file %s is error", nci->conf_filename); return VR_ERROR; } if (daemonize) { ret = vr_daemonize(1); if (ret != VR_OK) { return ret; } } nci->pid = getpid(); ret = signal_init(); if (ret != VR_OK) { return ret; } if (nci->pid_filename) { ret = vr_create_pidfile(nci); if (ret != VR_OK) { return VR_ERROR; } } ret = init_server(nci); if (ret != VR_OK) { return VR_ERROR; } vr_print_run(nci); return VR_OK; } static void vr_post_run(struct instance *nci) { /* deinit the threads */ workers_deinit(); backends_deinit(); master_deinit(); if (nci->pidfile) { vr_remove_pidfile(nci); } signal_deinit(); vr_print_done(); log_deinit(); } static void vr_run(struct instance *nci) { if (nci->thread_num <= 0) { log_error("number of work threads must be greater than 0"); return; } else if (nci->thread_num > 64) { log_warn("WARNING: Setting a high number of worker threads is not recommended." " Set this value to the number of cores in your machine or less."); } /* run the threads */ master_run(); workers_run(); backends_run(); /* wait for the threads finish */ workers_wait(); backends_wait(); } int main(int argc, char **argv) { rstatus_t status; struct instance nci; vr_set_default_options(&nci); status = vr_get_options(argc, argv, &nci); if (status != VR_OK) { vr_show_usage(); exit(1); } if (show_version) { log_stderr("This is vire-%s" CRLF, VR_VERSION_STRING); if (show_help) { vr_show_usage(); } exit(0); } if (test_conf) { if (!vr_test_conf(&nci, true)) { exit(1); } exit(0); } status = vr_pre_run(&nci); if (status != VR_OK) { vr_post_run(&nci); exit(1); } server.executable = getAbsolutePath(argv[0]); vr_run(&nci); vr_post_run(&nci); return VR_OK; } ================================================ FILE: src/vr_aof.c ================================================ #include /* Return the current size of the AOF rewrite buffer. */ unsigned long aofRewriteBufferSize(void) { dlistNode *ln; dlistIter li; unsigned long size = 0; dlistRewind(server.aof_rewrite_buf_blocks,&li); while((ln = dlistNext(&li))) { aofrwblock *block = dlistNodeValue(ln); size += block->used; } return size; } /* Create the sds representation of an PEXPIREAT command, using * 'seconds' as time to live and 'cmd' to understand what command * we are translating into a PEXPIREAT. * * This command is used in order to translate EXPIRE and PEXPIRE commands * into PEXPIREAT command so that we retain precision in the append only * file, and the time is always absolute and not relative. */ sds catAppendOnlyExpireAtCommand(sds buf, struct redisCommand *cmd, robj *key, robj *seconds) { long long when; robj *argv[3]; /* Make sure we can use strtoll */ seconds = getDecodedObject(seconds); when = strtoll(seconds->ptr,NULL,10); /* Convert argument into milliseconds for EXPIRE, SETEX, EXPIREAT */ if (cmd->proc == expireCommand || cmd->proc == setexCommand || cmd->proc == expireatCommand) { when *= 1000; } /* Convert into absolute time for EXPIRE, PEXPIRE, SETEX, PSETEX */ if (cmd->proc == expireCommand || cmd->proc == pexpireCommand || cmd->proc == setexCommand || cmd->proc == psetexCommand) { when += vr_msec_now(); } decrRefCount(seconds); argv[0] = createStringObject("PEXPIREAT",9); argv[1] = key; argv[2] = createStringObjectFromLongLong(when); buf = catAppendOnlyGenericCommand(buf, 3, argv); decrRefCount(argv[0]); decrRefCount(argv[2]); return buf; } sds catAppendOnlyGenericCommand(sds dst, int argc, robj **argv) { char buf[32]; int len, j; robj *o; buf[0] = '*'; len = 1+ll2string(buf+1,sizeof(buf)-1,argc); buf[len++] = '\r'; buf[len++] = '\n'; dst = sdscatlen(dst,buf,len); for (j = 0; j < argc; j++) { o = getDecodedObject(argv[j]); buf[0] = '$'; len = 1+ll2string(buf+1,sizeof(buf)-1,sdslen(o->ptr)); buf[len++] = '\r'; buf[len++] = '\n'; dst = sdscatlen(dst,buf,len); dst = sdscatlen(dst,o->ptr,sdslen(o->ptr)); dst = sdscatlen(dst,"\r\n",2); decrRefCount(o); } return dst; } /* Event handler used to send data to the child process doing the AOF * rewrite. We send pieces of our AOF differences buffer so that the final * write when the child finishes the rewrite will be small. */ void aofChildWriteDiffData(aeEventLoop *el, int fd, void *privdata, int mask) { dlistNode *ln; aofrwblock *block; ssize_t nwritten; UNUSED(el); UNUSED(fd); UNUSED(privdata); UNUSED(mask); while(1) { ln = dlistFirst(server.aof_rewrite_buf_blocks); block = ln ? ln->value : NULL; if (server.aof_stop_sending_diff || !block) { aeDeleteFileEvent(server.el,server.aof_pipe_write_data_to_child, AE_WRITABLE); return; } if (block->used > 0) { nwritten = vr_write(server.aof_pipe_write_data_to_child, block->buf,block->used); if (nwritten <= 0) return; memmove(block->buf,block->buf+nwritten,block->used-nwritten); block->used -= nwritten; } if (block->used == 0) dlistDelNode(server.aof_rewrite_buf_blocks,ln); } } /* Append data to the AOF rewrite buffer, allocating new blocks if needed. */ void aofRewriteBufferAppend(unsigned char *s, unsigned long len) { dlistNode *ln = dlistLast(server.aof_rewrite_buf_blocks); aofrwblock *block = ln ? ln->value : NULL; while(len) { /* If we already got at least an allocated block, try appending * at least some piece into it. */ if (block) { unsigned long thislen = (block->free < len) ? block->free : len; if (thislen) { /* The current block is not already full. */ memcpy(block->buf+block->used, s, thislen); block->used += thislen; block->free -= thislen; s += thislen; len -= thislen; } } if (len) { /* First block to allocate, or need another block. */ int numblocks; block = dalloc(sizeof(*block)); block->free = AOF_RW_BUF_BLOCK_SIZE; block->used = 0; dlistAddNodeTail(server.aof_rewrite_buf_blocks,block); /* Log every time we cross more 10 or 100 blocks, respectively * as a notice or warning. */ numblocks = dlistLength(server.aof_rewrite_buf_blocks); if (((numblocks+1) % 10) == 0) { int level = ((numblocks+1) % 100) == 0 ? LOG_WARN : LOG_NOTICE; log_debug(level, "Background AOF buffer size: %lu MB", aofRewriteBufferSize()/(1024*1024)); } } } /* Install a file event to send data to the rewrite child if there is * not one already. */ if (aeGetFileEvents(server.el,server.aof_pipe_write_data_to_child) == 0) { aeCreateFileEvent(server.el, server.aof_pipe_write_data_to_child, AE_WRITABLE, aofChildWriteDiffData, NULL); } } void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) { sds buf = sdsempty(); robj *tmpargv[3]; /* The DB this command was targeting is not the same as the last command * we appended. To issue a SELECT command is needed. */ if (dictid != server.aof_selected_db) { char seldb[64]; snprintf(seldb,sizeof(seldb),"%d",dictid); buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", (unsigned long)strlen(seldb),seldb); server.aof_selected_db = dictid; } if (cmd->proc == expireCommand || cmd->proc == pexpireCommand || cmd->proc == expireatCommand) { /* Translate EXPIRE/PEXPIRE/EXPIREAT into PEXPIREAT */ buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]); } else if (cmd->proc == setexCommand || cmd->proc == psetexCommand) { /* Translate SETEX/PSETEX to SET and PEXPIREAT */ tmpargv[0] = createStringObject("SET",3); tmpargv[1] = argv[1]; tmpargv[2] = argv[3]; buf = catAppendOnlyGenericCommand(buf,3,tmpargv); decrRefCount(tmpargv[0]); buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]); } else { /* All the other commands don't need translation or need the * same translation already operated in the command vector * for the replication itself. */ buf = catAppendOnlyGenericCommand(buf,argc,argv); } /* Append to the AOF buffer. This will be flushed on disk just before * of re-entering the event loop, so before the client will get a * positive reply about the operation performed. */ if (server.aof_state == AOF_ON) server.aof_buf = sdscatlen(server.aof_buf,buf,sdslen(buf)); /* If a background append only file rewriting is in progress we want to * accumulate the differences between the child DB and the current one * in a buffer, so that when the child process will do its work we * can append the differences to the new append only file. */ if (server.aof_child_pid != -1) aofRewriteBufferAppend((unsigned char*)buf,sdslen(buf)); sdsfree(buf); } ================================================ FILE: src/vr_aof.h ================================================ #ifndef _VR_AOF_H_ #define _VR_AOF_H_ /* AOF states */ #define AOF_OFF 0 /* AOF is off */ #define AOF_ON 1 /* AOF is on */ #define AOF_WAIT_REWRITE 2 /* AOF waits rewrite to start appending */ #define AOF_AUTOSYNC_BYTES (1024*1024*32) /* fdatasync every 32MB */ /* ---------------------------------------------------------------------------- * AOF rewrite buffer implementation. * * The following code implement a simple buffer used in order to accumulate * changes while the background process is rewriting the AOF file. * * We only need to append, but can't just use realloc with a large block * because 'huge' reallocs are not always handled as one could expect * (via remapping of pages at OS level) but may involve copying data. * * For this reason we use a list of blocks, every block is * AOF_RW_BUF_BLOCK_SIZE bytes. * ------------------------------------------------------------------------- */ #define AOF_RW_BUF_BLOCK_SIZE (1024*1024*10) /* 10 MB per block */ typedef struct aofrwblock { unsigned long used, free; char buf[AOF_RW_BUF_BLOCK_SIZE]; } aofrwblock; unsigned long aofRewriteBufferSize(void); void aofChildWriteDiffData(aeEventLoop *el, int fd, void *privdata, int mask); sds catAppendOnlyExpireAtCommand(sds buf, struct redisCommand *cmd, robj *key, robj *seconds); sds catAppendOnlyGenericCommand(sds dst, int argc, robj **argv); void aofRewriteBufferAppend(unsigned char *s, unsigned long len); void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc); #endif ================================================ FILE: src/vr_backend.c ================================================ #include /* Which thread we assigned a connection to most recently. */ static int num_backend_threads; struct darray backends; static void *backend_thread_run(void *args); int vr_backend_init(vr_backend *backend) { rstatus_t status; int threads_num; if (backend == NULL) { return VR_ERROR; } backend->id = 0; backend->current_db = 0; backend->timelimit_exit = 0; backend->last_fast_cycle = 0; backend->resize_db = 0; backend->rehash_db = 0; vr_eventloop_init(&backend->vel, 10); backend->vel.thread.fun_run = backend_thread_run; backend->vel.thread.data = backend; return VR_OK; } void vr_backend_deinit(vr_backend *backend) { if (backend == NULL) { return; } vr_eventloop_deinit(&backend->vel); } static int backend_cron(struct aeEventLoop *eventLoop, long long id, void *clientData) { vr_worker *backend = clientData; vr_eventloop *vel = &backend->vel; size_t stat_used_memory, stats_peak_memory; UNUSED(eventLoop); UNUSED(id); UNUSED(clientData); ASSERT(eventLoop == vel->el); vel->unixtime = time(NULL); vel->mstime = vr_msec_now(); /* Record the max memory used since the server was started. */ stat_used_memory = dalloc_used_memory(); update_stats_get(vel->stats, peak_memory, &stats_peak_memory); if (stat_used_memory > stats_peak_memory) { update_stats_set(vel->stats, peak_memory, stat_used_memory); } databasesCron(backend); /* Update the config cache */ run_with_period(1000, vel->cronloops) { conf_cache_update(&vel->cc); } vel->cronloops ++; return 1000/vel->hz; } static int setup_backend(vr_backend *backend) { /* Create the serverCron() time event, that's our main way to process * background operations. */ if(aeCreateTimeEvent(backend->vel.el, 1, backend_cron, backend, NULL) == AE_ERR) { serverPanic("Can't create the serverCron time event."); return VR_ERROR; } return VR_OK; } static void * backend_thread_run(void *args) { vr_worker *backend = args; /* vire worker run */ aeMain(backend->vel.el); return NULL; } int backends_init(uint32_t backend_count) { rstatus_t status; uint32_t idx; vr_backend *backend; darray_init(&backends, backend_count, sizeof(vr_backend)); for (idx = 0; idx < backend_count; idx ++) { backend = darray_push(&backends); vr_backend_init(backend); backend->id = idx; status = setup_backend(backend); if (status != VR_OK) { exit(1); } } num_backend_threads = (int)darray_n(&backends); return VR_OK; } int backends_run(void) { uint32_t i, thread_count; vr_backend *backend; thread_count = (uint32_t)num_backend_threads; for (i = 0; i < thread_count; i ++) { backend = darray_get(&backends, i); vr_thread_start(&backend->vel.thread); } return VR_OK; } int backends_wait(void) { uint32_t i, thread_count; vr_backend *backend; thread_count = (uint32_t)num_backend_threads; for (i = 0; i < thread_count; i ++) { backend = darray_get(&backends, i); pthread_join(backend->vel.thread.thread_id, NULL); } return VR_OK; } void backends_deinit(void) { vr_backend *backend; while(darray_n(&backends)) { backend = darray_pop(&backends); vr_backend_deinit(backend); } } ================================================ FILE: src/vr_backend.h ================================================ #ifndef _VR_BACKEND_H_ #define _VR_BACKEND_H_ typedef struct vr_backend { int id; vr_eventloop vel; /* Some global state in order to continue the work incrementally * across calls for activeExpireCycle() to expire some keys. */ unsigned int current_db; /* Last DB tested. */ int timelimit_exit; /* Time limit hit in previous call? */ long long last_fast_cycle; /* When last fast cycle ran. */ /* We use global counters so if we stop the computation at a given * DB we'll be able to start from the successive in the next * cron loop iteration for databasesCron() to resize and reshash db. */ unsigned int resize_db; unsigned int rehash_db; }vr_backend; extern struct darray backends; int backends_init(uint32_t backend_count); int backends_run(void); int backends_wait(void); void backends_deinit(void); #endif ================================================ FILE: src/vr_bitops.c ================================================ #include /* ----------------------------------------------------------------------------- * Helpers and low level bit functions. * -------------------------------------------------------------------------- */ /* Count number of bits set in the binary array pointed by 's' and long * 'count' bytes. The implementation of this function is required to * work with a input string length up to 512 MB. */ size_t redisPopcount(void *s, long count) { size_t bits = 0; unsigned char *p = s; uint32_t *p4; static const unsigned char bitsinbyte[256] = {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8}; /* Count initial bytes not aligned to 32 bit. */ while((unsigned long)p & 3 && count) { bits += bitsinbyte[*p++]; count--; } /* Count bits 28 bytes at a time */ p4 = (uint32_t*)p; while(count>=28) { uint32_t aux1, aux2, aux3, aux4, aux5, aux6, aux7; aux1 = *p4++; aux2 = *p4++; aux3 = *p4++; aux4 = *p4++; aux5 = *p4++; aux6 = *p4++; aux7 = *p4++; count -= 28; aux1 = aux1 - ((aux1 >> 1) & 0x55555555); aux1 = (aux1 & 0x33333333) + ((aux1 >> 2) & 0x33333333); aux2 = aux2 - ((aux2 >> 1) & 0x55555555); aux2 = (aux2 & 0x33333333) + ((aux2 >> 2) & 0x33333333); aux3 = aux3 - ((aux3 >> 1) & 0x55555555); aux3 = (aux3 & 0x33333333) + ((aux3 >> 2) & 0x33333333); aux4 = aux4 - ((aux4 >> 1) & 0x55555555); aux4 = (aux4 & 0x33333333) + ((aux4 >> 2) & 0x33333333); aux5 = aux5 - ((aux5 >> 1) & 0x55555555); aux5 = (aux5 & 0x33333333) + ((aux5 >> 2) & 0x33333333); aux6 = aux6 - ((aux6 >> 1) & 0x55555555); aux6 = (aux6 & 0x33333333) + ((aux6 >> 2) & 0x33333333); aux7 = aux7 - ((aux7 >> 1) & 0x55555555); aux7 = (aux7 & 0x33333333) + ((aux7 >> 2) & 0x33333333); bits += ((((aux1 + (aux1 >> 4)) & 0x0F0F0F0F) + ((aux2 + (aux2 >> 4)) & 0x0F0F0F0F) + ((aux3 + (aux3 >> 4)) & 0x0F0F0F0F) + ((aux4 + (aux4 >> 4)) & 0x0F0F0F0F) + ((aux5 + (aux5 >> 4)) & 0x0F0F0F0F) + ((aux6 + (aux6 >> 4)) & 0x0F0F0F0F) + ((aux7 + (aux7 >> 4)) & 0x0F0F0F0F))* 0x01010101) >> 24; } /* Count the remaining bytes. */ p = (unsigned char*)p4; while(count--) bits += bitsinbyte[*p++]; return bits; } /* Return the position of the first bit set to one (if 'bit' is 1) or * zero (if 'bit' is 0) in the bitmap starting at 's' and long 'count' bytes. * * The function is guaranteed to return a value >= 0 if 'bit' is 0 since if * no zero bit is found, it returns count*8 assuming the string is zero * padded on the right. However if 'bit' is 1 it is possible that there is * not a single set bit in the bitmap. In this special case -1 is returned. */ long redisBitpos(void *s, unsigned long count, int bit) { unsigned long *l; unsigned char *c; unsigned long skipval, word = 0, one; long pos = 0; /* Position of bit, to return to the caller. */ unsigned long j; /* Process whole words first, seeking for first word that is not * all ones or all zeros respectively if we are lookig for zeros * or ones. This is much faster with large strings having contiguous * blocks of 1 or 0 bits compared to the vanilla bit per bit processing. * * Note that if we start from an address that is not aligned * to sizeof(unsigned long) we consume it byte by byte until it is * aligned. */ /* Skip initial bits not aligned to sizeof(unsigned long) byte by byte. */ skipval = bit ? 0 : UCHAR_MAX; c = (unsigned char*) s; while((unsigned long)c & (sizeof(*l)-1) && count) { if (*c != skipval) break; c++; count--; pos += 8; } /* Skip bits with full word step. */ skipval = bit ? 0 : ULONG_MAX; l = (unsigned long*) c; while (count >= sizeof(*l)) { if (*l != skipval) break; l++; count -= sizeof(*l); pos += sizeof(*l)*8; } /* Load bytes into "word" considering the first byte as the most significant * (we basically consider it as written in big endian, since we consider the * string as a set of bits from left to right, with the first bit at position * zero. * * Note that the loading is designed to work even when the bytes left * (count) are less than a full word. We pad it with zero on the right. */ c = (unsigned char*)l; for (j = 0; j < sizeof(*l); j++) { word <<= 8; if (count) { word |= *c; c++; count--; } } /* Special case: * If bits in the string are all zero and we are looking for one, * return -1 to signal that there is not a single "1" in the whole * string. This can't happen when we are looking for "0" as we assume * that the right of the string is zero padded. */ if (bit == 1 && word == 0) return -1; /* Last word left, scan bit by bit. The first thing we need is to * have a single "1" set in the most significant position in an * unsigned long. We don't know the size of the long so we use a * simple trick. */ one = ULONG_MAX; /* All bits set to 1.*/ one >>= 1; /* All bits set to 1 but the MSB. */ one = ~one; /* All bits set to 0 but the MSB. */ while(one) { if (((one & word) != 0) == bit) return pos; pos++; one >>= 1; } /* If we reached this point, there is a bug in the algorithm, since * the case of no match is handled as a special case before. */ serverPanic("End of redisBitpos() reached."); return 0; /* Just to avoid warnings. */ } /* The following set.*Bitfield and get.*Bitfield functions implement setting * and getting arbitrary size (up to 64 bits) signed and unsigned integers * at arbitrary positions into a bitmap. * * The representation considers the bitmap as having the bit number 0 to be * the most significant bit of the first byte, and so forth, so for example * setting a 5 bits unsigned integer to value 23 at offset 7 into a bitmap * previously set to all zeroes, will produce the following representation: * * +--------+--------+ * |00000001|01110000| * +--------+--------+ * * When offsets and integer sizes are aligned to bytes boundaries, this is the * same as big endian, however when such alignment does not exist, its important * to also understand how the bits inside a byte are ordered. * * Note that this format follows the same convention as SETBIT and related * commands. */ void setUnsignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits, uint64_t value) { uint64_t byte, bit, byteval, bitval, j; for (j = 0; j < bits; j++) { bitval = (value & ((uint64_t)1<<(bits-1-j))) != 0; byte = offset >> 3; bit = 7 - (offset & 0x7); byteval = p[byte]; byteval &= ~(1 << bit); byteval |= bitval << bit; p[byte] = byteval & 0xff; offset++; } } void setSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits, int64_t value) { uint64_t uv; if (value >= 0) uv = value; else uv = UINT64_MAX + value + 1; setUnsignedBitfield(p,offset,bits,uv); } uint64_t getUnsignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits) { uint64_t byte, bit, byteval, bitval, j, value = 0; for (j = 0; j < bits; j++) { byte = offset >> 3; bit = 7 - (offset & 0x7); byteval = p[byte]; bitval = (byteval >> bit) & 1; value = (value<<1) | bitval; offset++; } return value; } int64_t getSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits) { int64_t value = getUnsignedBitfield(p,offset,bits); /* If the top significant bit is 1, propagate it to all the * higher bits for two complement representation of signed * integers. */ if (value & ((uint64_t)1 << (bits-1))) value |= ((uint64_t)-1) << bits; return value; } /* The following two functions detect overflow of a value in the context * of storing it as an unsigned or signed integer with the specified * number of bits. The functions both take the value and a possible increment. * If no overflow could happen and the value+increment fit inside the limits, * then zero is returned, otherwise in case of overflow, 1 is returned, * otherwise in case of underflow, -1 is returned. * * When non-zero is returned (oferflow or underflow), if not NULL, *limit is * set to the value the operation should result when an overflow happens, * depending on the specified overflow semantics: * * For BFOVERFLOW_SAT if 1 is returned, *limit it is set maximum value that * you can store in that integer. when -1 is returned, *limit is set to the * minimum value that an integer of that size can represent. * * For BFOVERFLOW_WRAP *limit is set by performing the operation in order to * "wrap" around towards zero for unsigned integers, or towards the most * negative number that is possible to represent for signed integers. */ #define BFOVERFLOW_WRAP 0 #define BFOVERFLOW_SAT 1 #define BFOVERFLOW_FAIL 2 /* Used by the BITFIELD command implementation. */ int checkUnsignedBitfieldOverflow(uint64_t value, int64_t incr, uint64_t bits, int owtype, uint64_t *limit) { uint64_t max = (bits == 64) ? UINT64_MAX : (((uint64_t)1< max || (incr > 0 && incr > maxincr)) { if (limit) { if (owtype == BFOVERFLOW_WRAP) { goto handle_wrap; } else if (owtype == BFOVERFLOW_SAT) { *limit = max; } } return 1; } else if (incr < 0 && incr < minincr) { if (limit) { if (owtype == BFOVERFLOW_WRAP) { goto handle_wrap; } else if (owtype == BFOVERFLOW_SAT) { *limit = 0; } } return -1; } return 0; handle_wrap: { uint64_t mask = ((int64_t)-1) << bits; uint64_t res = value+incr; res &= ~mask; *limit = res; } return 1; } int checkSignedBitfieldOverflow(int64_t value, int64_t incr, uint64_t bits, int owtype, int64_t *limit) { int64_t max = (bits == 64) ? INT64_MAX : (((int64_t)1<<(bits-1))-1); int64_t min = (-max)-1; /* Note that maxincr and minincr could overflow, but we use the values * only after checking 'value' range, so when we use it no overflow * happens. */ int64_t maxincr = max-value; int64_t minincr = min-value; if (value > max || (bits != 64 && incr > maxincr) || (value >= 0 && incr > 0 && incr > maxincr)) { if (limit) { if (owtype == BFOVERFLOW_WRAP) { goto handle_wrap; } else if (owtype == BFOVERFLOW_SAT) { *limit = max; } } return 1; } else if (value < min || (bits != 64 && incr < minincr) || (value < 0 && incr < 0 && incr < minincr)) { if (limit) { if (owtype == BFOVERFLOW_WRAP) { goto handle_wrap; } else if (owtype == BFOVERFLOW_SAT) { *limit = min; } } return -1; } return 0; handle_wrap: { uint64_t mask = ((int64_t)-1) << bits; uint64_t msb = (uint64_t)1 << (bits-1); uint64_t a = value, b = incr, c; c = a+b; /* Perform addition as unsigned so that's defined. */ /* If the sign bit is set, propagate to all the higher order * bits, to cap the negative value. If it's clear, mask to * the positive integer limit. */ if (c & msb) { c |= mask; } else { c &= ~mask; } *limit = c; } return 1; } /* Debugging function. Just show bits in the specified bitmap. Not used * but here for not having to rewrite it when debugging is needed. */ void printBits(unsigned char *p, unsigned long count) { unsigned long j, i, byte; for (j = 0; j < count; j++) { byte = p[j]; for (i = 0x80; i > 0; i /= 2) printf("%c", (byte & i) ? '1' : '0'); printf("|"); } printf("\n"); } /* ----------------------------------------------------------------------------- * Bits related string commands: GETBIT, SETBIT, BITCOUNT, BITOP. * -------------------------------------------------------------------------- */ #define BITOP_AND 0 #define BITOP_OR 1 #define BITOP_XOR 2 #define BITOP_NOT 3 #define BITFIELDOP_GET 0 #define BITFIELDOP_SET 1 #define BITFIELDOP_INCRBY 2 /* This helper function used by GETBIT / SETBIT parses the bit offset argument * making sure an error is returned if it is negative or if it overflows * Redis 512 MB limit for the string value. * * If the 'hash' argument is true, and 'bits is positive, then the command * will also parse bit offsets prefixed by "#". In such a case the offset * is multiplied by 'bits'. This is useful for the BITFIELD command. */ int getBitOffsetFromArgument(client *c, robj *o, size_t *offset, int hash, int bits) { long long loffset; char *err = "bit offset is not an integer or out of range"; char *p = o->ptr; size_t plen = sdslen(p); int usehash = 0; /* Handle # form. */ if (p[0] == '#' && hash && bits > 0) usehash = 1; if (string2ll(p+usehash,plen-usehash,&loffset) == 0) { addReplyError(c,err); return VR_ERROR; } /* Adjust the offset by 'bits' for # form. */ if (usehash) loffset *= bits; /* Limit offset to 512MB in bytes */ if ((loffset < 0) || ((unsigned long long)loffset >> 3) >= (512*1024*1024)) { addReplyError(c,err); return VR_ERROR; } *offset = (size_t)loffset; return VR_OK; } /* This helper function for BITFIELD parses a bitfield type in the form * where sign is 'u' or 'i' for unsigned and signed, and * the bits is a value between 1 and 64. However 64 bits unsigned integers * are reported as an error because of current limitations of Redis protocol * to return unsigned integer values greater than INT64_MAX. * * On error VR_ERROR is returned and an error is sent to the client. */ int getBitfieldTypeFromArgument(client *c, robj *o, int *sign, int *bits) { char *p = o->ptr; char *err = "Invalid bitfield type. Use something like i16 u8. Note that u64 is not supported but i64 is."; long long llbits; if (p[0] == 'i') { *sign = 1; } else if (p[0] == 'u') { *sign = 0; } else { addReplyError(c,err); return VR_ERROR; } if ((string2ll(p+1,strlen(p+1),&llbits)) == 0 || llbits < 1 || (*sign == 1 && llbits > 64) || (*sign == 0 && llbits > 63)) { addReplyError(c,err); return VR_ERROR; } *bits = llbits; return VR_OK; } /* This is an helper function for commands implementations that need to write * bits to a string object. The command creates or pad with zeroes the string * so that the 'maxbit' bit can be addressed. The object is finally * returned. Otherwise if the key holds a wrong type NULL is returned and * an error is sent to the client. */ robj *lookupStringForBitCommand(client *c, size_t maxbit, int *expired) { size_t byte = maxbit >> 3; robj *o = lookupKeyWrite(c->db,c->argv[1],expired); if (o == NULL) { o = createObject(OBJ_STRING,sdsnewlen(NULL, byte+1)); dbAdd(c->db,c->argv[1],o); } else { if (checkType(c,o,OBJ_STRING)) return NULL; o = dbUnshareStringValue(c->db,c->argv[1],o); o->ptr = sdsgrowzero(o->ptr,byte+1); } return o; } /* SETBIT key offset bitvalue */ void setbitCommand(client *c) { robj *o; char *err = "bit is not an integer or out of range"; size_t bitoffset; ssize_t byte, bit; int byteval, bitval; long on; int expired = 0; if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset,0,0) != VR_OK) return; if (getLongFromObjectOrReply(c,c->argv[3],&on,err) != VR_OK) return; /* Bits can only be set or cleared... */ if (on & ~1) { addReplyError(c,err); return; } fetchInternalDbByKey(c, c->argv[1]); lockDbWrite(c->db); if ((o = lookupStringForBitCommand(c,bitoffset,&expired)) == NULL) { unlockDb(c->db); if (expired) update_stats_add(c->vel->stats,expiredkeys,1); return; } /* Get current values */ byte = bitoffset >> 3; byteval = ((uint8_t*)o->ptr)[byte]; bit = 7 - (bitoffset & 0x7); bitval = byteval & (1 << bit); /* Update byte with new bit value and return original value */ byteval &= ~(1 << bit); byteval |= ((on & 0x1) << bit); ((uint8_t*)o->ptr)[byte] = byteval; signalModifiedKey(c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id); c->vel->dirty++; addReply(c, bitval ? shared.cone : shared.czero); unlockDb(c->db); if (expired) update_stats_add(c->vel->stats,expiredkeys,1); } /* GETBIT key offset */ void getbitCommand(client *c) { robj *o; char llbuf[32]; size_t bitoffset; size_t byte, bit; size_t bitval = 0; if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset,0,0) != VR_OK) return; fetchInternalDbByKey(c, c->argv[1]); lockDbRead(c->db); if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL) { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_misses, 1); return; } else if (checkType(c,o,OBJ_STRING)) { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); return; } byte = bitoffset >> 3; bit = 7 - (bitoffset & 0x7); if (sdsEncodedObject(o)) { if (byte < sdslen(o->ptr)) bitval = ((uint8_t*)o->ptr)[byte] & (1 << bit); } else { if (byte < (size_t)ll2string(llbuf,sizeof(llbuf),(long)o->ptr)) bitval = llbuf[byte] & (1 << bit); } addReply(c, bitval ? shared.cone : shared.czero); unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); } /* BITOP op_name target_key src_key1 src_key2 src_key3 ... src_keyN */ void bitopCommand(client *c) { char *opname = c->argv[1]->ptr; robj *o, *targetkey = c->argv[2]; unsigned long op, j, numkeys; robj **objects; /* Array of source objects. */ unsigned char **src; /* Array of source strings pointers. */ unsigned long *len, maxlen = 0; /* Array of length of src strings, and max len. */ unsigned long minlen = 0; /* Min len among the input keys. */ unsigned char *res = NULL; /* Resulting string. */ /* Parse the operation name. */ if ((opname[0] == 'a' || opname[0] == 'A') && !strcasecmp(opname,"and")) op = BITOP_AND; else if((opname[0] == 'o' || opname[0] == 'O') && !strcasecmp(opname,"or")) op = BITOP_OR; else if((opname[0] == 'x' || opname[0] == 'X') && !strcasecmp(opname,"xor")) op = BITOP_XOR; else if((opname[0] == 'n' || opname[0] == 'N') && !strcasecmp(opname,"not")) op = BITOP_NOT; else { addReply(c,shared.syntaxerr); return; } /* Sanity check: NOT accepts only a single key argument. */ if (op == BITOP_NOT && c->argc != 4) { addReplyError(c,"BITOP NOT must be called with a single source key."); return; } /* Lookup keys, and store pointers to the string objects into an array. */ numkeys = c->argc - 3; src = dalloc(sizeof(unsigned char*) * numkeys); len = dalloc(sizeof(long) * numkeys); objects = dalloc(sizeof(robj*) * numkeys); for (j = 0; j < numkeys; j++) { o = lookupKeyRead(c->db,c->argv[j+3]); /* Handle non-existing keys as empty strings. */ if (o == NULL) { objects[j] = NULL; src[j] = NULL; len[j] = 0; minlen = 0; continue; } /* Return an error if one of the keys is not a string. */ if (checkType(c,o,OBJ_STRING)) { unsigned long i; for (i = 0; i < j; i++) { if (objects[i]) decrRefCount(objects[i]); } dfree(src); dfree(len); dfree(objects); return; } objects[j] = getDecodedObject(o); src[j] = objects[j]->ptr; len[j] = sdslen(objects[j]->ptr); if (len[j] > maxlen) maxlen = len[j]; if (j == 0 || len[j] < minlen) minlen = len[j]; } /* Compute the bit operation, if at least one string is not empty. */ if (maxlen) { res = (unsigned char*) sdsnewlen(NULL,maxlen); unsigned char output, byte; unsigned long i; /* Fast path: as far as we have data for all the input bitmaps we * can take a fast path that performs much better than the * vanilla algorithm. */ j = 0; if (minlen >= sizeof(unsigned long)*4 && numkeys <= 16) { unsigned long *lp[16]; unsigned long *lres = (unsigned long*) res; /* Note: sds pointer is always aligned to 8 byte boundary. */ memcpy(lp,src,sizeof(unsigned long*)*numkeys); memcpy(res,src[0],minlen); /* Different branches per different operations for speed (sorry). */ if (op == BITOP_AND) { while(minlen >= sizeof(unsigned long)*4) { for (i = 1; i < numkeys; i++) { lres[0] &= lp[i][0]; lres[1] &= lp[i][1]; lres[2] &= lp[i][2]; lres[3] &= lp[i][3]; lp[i]+=4; } lres+=4; j += sizeof(unsigned long)*4; minlen -= sizeof(unsigned long)*4; } } else if (op == BITOP_OR) { while(minlen >= sizeof(unsigned long)*4) { for (i = 1; i < numkeys; i++) { lres[0] |= lp[i][0]; lres[1] |= lp[i][1]; lres[2] |= lp[i][2]; lres[3] |= lp[i][3]; lp[i]+=4; } lres+=4; j += sizeof(unsigned long)*4; minlen -= sizeof(unsigned long)*4; } } else if (op == BITOP_XOR) { while(minlen >= sizeof(unsigned long)*4) { for (i = 1; i < numkeys; i++) { lres[0] ^= lp[i][0]; lres[1] ^= lp[i][1]; lres[2] ^= lp[i][2]; lres[3] ^= lp[i][3]; lp[i]+=4; } lres+=4; j += sizeof(unsigned long)*4; minlen -= sizeof(unsigned long)*4; } } else if (op == BITOP_NOT) { while(minlen >= sizeof(unsigned long)*4) { lres[0] = ~lres[0]; lres[1] = ~lres[1]; lres[2] = ~lres[2]; lres[3] = ~lres[3]; lres+=4; j += sizeof(unsigned long)*4; minlen -= sizeof(unsigned long)*4; } } } /* j is set to the next byte to process by the previous loop. */ for (; j < maxlen; j++) { output = (len[0] <= j) ? 0 : src[0][j]; if (op == BITOP_NOT) output = ~output; for (i = 1; i < numkeys; i++) { byte = (len[i] <= j) ? 0 : src[i][j]; switch(op) { case BITOP_AND: output &= byte; break; case BITOP_OR: output |= byte; break; case BITOP_XOR: output ^= byte; break; } } res[j] = output; } } for (j = 0; j < numkeys; j++) { if (objects[j]) decrRefCount(objects[j]); } dfree(src); dfree(len); dfree(objects); /* Store the computed value into the target key */ if (maxlen) { o = createObject(OBJ_STRING,res); setKey(c->db,targetkey,o,NULL); notifyKeyspaceEvent(NOTIFY_STRING,"set",targetkey,c->db->id); decrRefCount(o); } else if (dbDelete(c->db,targetkey)) { signalModifiedKey(c->db,targetkey); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",targetkey,c->db->id); } server.dirty++; addReplyLongLong(c,maxlen); /* Return the output string length in bytes. */ } /* BITCOUNT key [start end] */ void bitcountCommand(client *c) { robj *o; long start, end, strlen; unsigned char *p; char llbuf[32]; fetchInternalDbByKey(c, c->argv[1]); lockDbRead(c->db); /* Lookup, check for type, and return 0 for non existing keys. */ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL) { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_misses, 1); return; } else if (checkType(c,o,OBJ_STRING)) { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); return; } /* Set the 'p' pointer to the string, that can be just a stack allocated * array if our string was integer encoded. */ if (o->encoding == OBJ_ENCODING_INT) { p = (unsigned char*) llbuf; strlen = ll2string(llbuf,sizeof(llbuf),(long)o->ptr); } else { p = (unsigned char*) o->ptr; strlen = sdslen(o->ptr); } /* Parse start/end range if any. */ if (c->argc == 4) { if (getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != VR_OK) { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); return; } if (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != VR_OK) { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); return; } /* Convert negative indexes */ if (start < 0) start = strlen+start; if (end < 0) end = strlen+end; if (start < 0) start = 0; if (end < 0) end = 0; if (end >= strlen) end = strlen-1; } else if (c->argc == 2) { /* The whole string. */ start = 0; end = strlen-1; } else { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); /* Syntax error. */ addReply(c,shared.syntaxerr); return; } /* Precondition: end >= 0 && end < strlen, so the only condition where * zero can be returned is: start > end. */ if (start > end) { addReply(c,shared.czero); } else { long bytes = end-start+1; addReplyLongLong(c,redisPopcount(p+start,bytes)); } unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); } /* BITPOS key bit [start [end]] */ void bitposCommand(client *c) { robj *o; long bit, start, end, strlen; unsigned char *p; char llbuf[32]; int end_given = 0; /* Parse the bit argument to understand what we are looking for, set * or clear bits. */ if (getLongFromObjectOrReply(c,c->argv[2],&bit,NULL) != VR_OK) return; if (bit != 0 && bit != 1) { addReplyError(c, "The bit argument must be 1 or 0."); return; } fetchInternalDbByKey(c, c->argv[1]); lockDbRead(c->db); /* If the key does not exist, from our point of view it is an infinite * array of 0 bits. If the user is looking for the fist clear bit return 0, * If the user is looking for the first set bit, return -1. */ if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_misses, 1); addReplyLongLong(c, bit ? -1 : 0); return; } if (checkType(c,o,OBJ_STRING)) { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); return; } /* Set the 'p' pointer to the string, that can be just a stack allocated * array if our string was integer encoded. */ if (o->encoding == OBJ_ENCODING_INT) { p = (unsigned char*) llbuf; strlen = ll2string(llbuf,sizeof(llbuf),(long)o->ptr); } else { p = (unsigned char*) o->ptr; strlen = sdslen(o->ptr); } /* Parse start/end range if any. */ if (c->argc == 4 || c->argc == 5) { if (getLongFromObjectOrReply(c,c->argv[3],&start,NULL) != VR_OK) { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); return; } if (c->argc == 5) { if (getLongFromObjectOrReply(c,c->argv[4],&end,NULL) != VR_OK) { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); return; } end_given = 1; } else { end = strlen-1; } /* Convert negative indexes */ if (start < 0) start = strlen+start; if (end < 0) end = strlen+end; if (start < 0) start = 0; if (end < 0) end = 0; if (end >= strlen) end = strlen-1; } else if (c->argc == 3) { /* The whole string. */ start = 0; end = strlen-1; } else { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); /* Syntax error. */ addReply(c,shared.syntaxerr); return; } /* For empty ranges (start > end) we return -1 as an empty range does * not contain a 0 nor a 1. */ if (start > end) { addReplyLongLong(c, -1); } else { long bytes = end-start+1; long pos = redisBitpos(p+start,bytes,bit); /* If we are looking for clear bits, and the user specified an exact * range with start-end, we can't consider the right of the range as * zero padded (as we do when no explicit end is given). * * So if redisBitpos() returns the first bit outside the range, * we return -1 to the caller, to mean, in the specified range there * is not a single "0" bit. */ if (end_given && bit == 0 && pos == bytes*8) { unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); addReplyLongLong(c,-1); return; } if (pos != -1) pos += start*8; /* Adjust for the bytes we skipped. */ addReplyLongLong(c,pos); } unlockDb(c->db); update_stats_add(c->vel->stats, keyspace_hits, 1); } /* BITFIELD key subcommmand-1 arg ... subcommand-2 arg ... subcommand-N ... * * Supported subcommands: * * GET * SET * INCRBY * OVERFLOW [WRAP|SAT|FAIL] */ struct bitfieldOp { uint64_t offset; /* Bitfield offset. */ int64_t i64; /* Increment amount (INCRBY) or SET value */ int opcode; /* Operation id. */ int owtype; /* Overflow type to use. */ int bits; /* Integer bitfield bits width. */ int sign; /* True if signed, otherwise unsigned op. */ }; void bitfieldCommand(client *c) { robj *o; size_t bitoffset; int j, numops = 0, changes = 0; struct bitfieldOp *ops = NULL; /* Array of ops to execute at end. */ int owtype = BFOVERFLOW_WRAP; /* Overflow type. */ for (j = 2; j < c->argc; j++) { int remargs = c->argc-j-1; /* Remaining args other than current. */ char *subcmd = c->argv[j]->ptr; /* Current command name. */ int opcode; /* Current operation code. */ long long i64 = 0; /* Signed SET value. */ int sign = 0; /* Signed or unsigned type? */ int bits = 0; /* Bitfield width in bits. */ if (!strcasecmp(subcmd,"get") && remargs >= 2) opcode = BITFIELDOP_GET; else if (!strcasecmp(subcmd,"set") && remargs >= 3) opcode = BITFIELDOP_SET; else if (!strcasecmp(subcmd,"incrby") && remargs >= 3) opcode = BITFIELDOP_INCRBY; else if (!strcasecmp(subcmd,"overflow") && remargs >= 1) { char *owtypename = c->argv[j+1]->ptr; j++; if (!strcasecmp(owtypename,"wrap")) owtype = BFOVERFLOW_WRAP; else if (!strcasecmp(owtypename,"sat")) owtype = BFOVERFLOW_SAT; else if (!strcasecmp(owtypename,"fail")) owtype = BFOVERFLOW_FAIL; else { addReplyError(c,"Invalid OVERFLOW type specified"); dfree(ops); return; } continue; } else { addReply(c,shared.syntaxerr); dfree(ops); return; } /* Get the type and offset arguments, common to all the ops. */ if (getBitfieldTypeFromArgument(c,c->argv[j+1],&sign,&bits) != VR_OK) { dfree(ops); return; } if (getBitOffsetFromArgument(c,c->argv[j+2],&bitoffset,1,bits) != VR_OK){ dfree(ops); return; } /* INCRBY and SET require another argument. */ if (opcode != BITFIELDOP_GET) { if (getLongLongFromObjectOrReply(c,c->argv[j+3],&i64,NULL) != VR_OK){ dfree(ops); return; } } /* Populate the array of operations we'll process. */ ops = drealloc(ops,sizeof(*ops)*(numops+1)); ops[numops].offset = bitoffset; ops[numops].i64 = i64; ops[numops].opcode = opcode; ops[numops].owtype = owtype; ops[numops].bits = bits; ops[numops].sign = sign; numops++; j += 3 - (opcode == BITFIELDOP_GET); } addReplyMultiBulkLen(c,numops); /* Actually process the operations. */ for (j = 0; j < numops; j++) { struct bitfieldOp *thisop = ops+j; /* Execute the operation. */ if (thisop->opcode == BITFIELDOP_SET || thisop->opcode == BITFIELDOP_INCRBY) { /* SET and INCRBY: We handle both with the same code path * for simplicity. SET return value is the previous value so * we need fetch & store as well. */ /* Lookup by making room up to the farest bit reached by * this operation. */ if ((o = lookupStringForBitCommand(c, thisop->offset + (thisop->bits-1), NULL)) == NULL) return; /* We need two different but very similar code paths for signed * and unsigned operations, since the set of functions to get/set * the integers and the used variables types are different. */ if (thisop->sign) { int64_t oldval, newval, wrapped, retval; int overflow; oldval = getSignedBitfield(o->ptr,thisop->offset, thisop->bits); if (thisop->opcode == BITFIELDOP_INCRBY) { newval = oldval + thisop->i64; overflow = checkSignedBitfieldOverflow(oldval, thisop->i64,thisop->bits,thisop->owtype,&wrapped); if (overflow) newval = wrapped; retval = newval; } else { newval = thisop->i64; overflow = checkSignedBitfieldOverflow(newval, 0,thisop->bits,thisop->owtype,&wrapped); if (overflow) newval = wrapped; retval = oldval; } /* On overflow of type is "FAIL", don't write and return * NULL to signal the condition. */ if (!(overflow && thisop->owtype == BFOVERFLOW_FAIL)) { addReplyLongLong(c,retval); setSignedBitfield(o->ptr,thisop->offset, thisop->bits,newval); } else { addReply(c,shared.nullbulk); } } else { uint64_t oldval, newval, wrapped, retval; int overflow; oldval = getUnsignedBitfield(o->ptr,thisop->offset, thisop->bits); if (thisop->opcode == BITFIELDOP_INCRBY) { newval = oldval + thisop->i64; overflow = checkUnsignedBitfieldOverflow(oldval, thisop->i64,thisop->bits,thisop->owtype,&wrapped); if (overflow) newval = wrapped; retval = newval; } else { newval = thisop->i64; overflow = checkUnsignedBitfieldOverflow(newval, 0,thisop->bits,thisop->owtype,&wrapped); if (overflow) newval = wrapped; retval = oldval; } /* On overflow of type is "FAIL", don't write and return * NULL to signal the condition. */ if (!(overflow && thisop->owtype == BFOVERFLOW_FAIL)) { addReplyLongLong(c,retval); setUnsignedBitfield(o->ptr,thisop->offset, thisop->bits,newval); } else { addReply(c,shared.nullbulk); } } changes++; } else { /* GET */ o = lookupKeyRead(c->db,c->argv[1]); size_t olen = (o == NULL) ? 0 : sdslen(o->ptr); unsigned char buf[9]; /* For GET we use a trick: before executing the operation * copy up to 9 bytes to a local buffer, so that we can easily * execute up to 64 bit operations that are at actual string * object boundaries. */ memset(buf,0,9); unsigned char *src = o ? o->ptr : NULL; int i; size_t byte = thisop->offset >> 3; for (i = 0; i < 9; i++) { if (src == NULL || i+byte >= olen) break; buf[i] = src[i+byte]; } /* Now operate on the copied buffer which is guaranteed * to be zero-padded. */ if (thisop->sign) { int64_t val = getSignedBitfield(buf,thisop->offset-(byte*8), thisop->bits); addReplyLongLong(c,val); } else { uint64_t val = getUnsignedBitfield(buf,thisop->offset-(byte*8), thisop->bits); addReplyLongLong(c,val); } } } if (changes) { signalModifiedKey(c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id); server.dirty += changes; } dfree(ops); } ================================================ FILE: src/vr_bitops.h ================================================ #ifndef _VR_BITOPS_H_ #define _VR_BITOPS_H_ size_t redisPopcount(void *s, long count); long redisBitpos(void *s, unsigned long count, int bit); void setUnsignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits, uint64_t value); void setSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits, int64_t value); uint64_t getUnsignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits); int64_t getSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits); int checkUnsignedBitfieldOverflow(uint64_t value, int64_t incr, uint64_t bits, int owtype, uint64_t *limit); int checkSignedBitfieldOverflow(int64_t value, int64_t incr, uint64_t bits, int owtype, int64_t *limit); void printBits(unsigned char *p, unsigned long count); int getBitOffsetFromArgument(struct client *c, robj *o, size_t *offset, int hash, int bits); int getBitfieldTypeFromArgument(struct client *c, robj *o, int *sign, int *bits); robj *lookupStringForBitCommand(struct client *c, size_t maxbit, int *expired); void setbitCommand(struct client *c); void getbitCommand(struct client *c); void bitopCommand(struct client *c); void bitcountCommand(struct client *c); void bitposCommand(struct client *c); void bitfieldCommand(client *c); #endif ================================================ FILE: src/vr_block.c ================================================ #include /* Unblock a client calling the right function depending on the kind * of operation the client is blocking for. */ void unblockClient(client *c) { if (c->btype == BLOCKED_LIST) { unblockClientWaitingData(c); } else if (c->btype == BLOCKED_WAIT) { unblockClientWaitingReplicas(c); } else { serverPanic("Unknown btype in unblockClient()."); } /* Clear the flags, and put the client in the unblocked list so that * we'll process new commands in its query buffer ASAP. */ c->flags &= ~CLIENT_BLOCKED; c->btype = BLOCKED_NONE; c->vel->bpop_blocked_clients--; /* The client may already be into the unblocked list because of a previous * blocking operation, don't add back it into the list multiple times. */ if (!(c->flags & CLIENT_UNBLOCKED)) { c->flags |= CLIENT_UNBLOCKED; dlistAddNodeTail(c->vel->unblocked_clients,c); } } /* Get a timeout value from an object and store it into 'timeout'. * The final timeout is always stored as milliseconds as a time where the * timeout will expire, however the parsing is performed according to * the 'unit' that can be seconds or milliseconds. * * Note that if the timeout is zero (usually from the point of view of * commands API this means no timeout) the value stored into 'timeout' * is zero. */ int getTimeoutFromObjectOrReply(client *c, robj *object, long long *timeout, int unit) { long long tval; if (getLongLongFromObjectOrReply(c,object,&tval, "timeout is not an integer or out of range") != VR_OK) return VR_ERROR; if (tval < 0) { addReplyError(c,"timeout is negative"); return VR_ERROR; } if (tval > 0) { if (unit == UNIT_SECONDS) tval *= 1000; tval += vr_msec_now(); } *timeout = tval; return VR_OK; } /* Block a client for the specific operation type. Once the CLIENT_BLOCKED * flag is set client query buffer is not longer processed, but accumulated, * and will be processed when the client is unblocked. */ void blockClient(client *c, int btype) { c->flags |= CLIENT_BLOCKED; c->btype = btype; c->vel->bpop_blocked_clients++; } ================================================ FILE: src/vr_block.h ================================================ #ifndef _VR_BLOCK_H_ #define _VR_BLOCK_H_ /* This structure holds the blocking operation state for a client. * The fields used depend on client->btype. */ typedef struct blockingState { /* Generic fields. */ long long timeout; /* Blocking operation timeout. If UNIX current time * is > timeout then the operation timed out. */ /* BLOCKED_LIST */ dict *keys; /* The keys we are waiting to terminate a blocking * operation such as BLPOP. Otherwise NULL. */ robj *target; /* The key that should receive the element, * for BRPOPLPUSH. */ /* BLOCKED_WAIT */ int numreplicas; /* Number of replicas we are waiting for ACK. */ long long reploffset; /* Replication offset to reach. */ } blockingState; void blockClient(struct client *c, int btype); void unblockClient(struct client *c); int getTimeoutFromObjectOrReply(struct client *c, robj *object, long long *timeout, int unit); #endif ================================================ FILE: src/vr_client.c ================================================ #include int ncurr_cconn = 0; /* current # client connections */ static void setProtocolError(client *c, int pos); /* Return the size consumed from the allocator, for the specified SDS string, * including internal fragmentation. This function is used in order to compute * the client output buffer size. */ size_t sdsZmallocSize(sds s) { void *sh = sdsAllocPtr(s); return dmalloc_size(sh); } void *dupClientReplyValue(void *o) { return o; } void freeClientReplyValue(void *o) { freeObject(o); } int listMatchObjects(void *a, void *b) { return equalStringObjects(a,b); } client *createClient(vr_eventloop *vel, struct conn *conn) { client *c = dalloc(sizeof(client)); /* passing -1 as fd it is possible to create a non connected client. * This is useful since all the commands needs to be executed * in the context of a client. When commands are executed in other * contexts (for instance a Lua script) we need a non connected client. */ if (conn->sd != -1) { vr_set_nonblocking(conn->sd); vr_set_tcpnodelay(conn->sd); if (server.tcpkeepalive) vr_set_tcpkeepalive(conn->sd,server.tcpkeepalive,0,0); if (aeCreateFileEvent(vel->el,conn->sd,AE_READABLE, readQueryFromClient, c) == AE_ERR) { log_error("Unrecoverable error creating client ipfd file event."); dfree(c); return NULL; } } selectDb(c,0); c->id = vel->next_client_id++; c->conn = conn; c->vel = vel; c->scanid = -1; c->name = NULL; c->bufpos = 0; c->querybuf = sdsempty(); c->querybuf_peak = 0; c->reqtype = 0; c->argc = 0; c->argv = NULL; c->cmd = c->lastcmd = NULL; c->multibulklen = 0; c->bulklen = -1; c->sentlen = 0; c->flags = 0; c->ctime = c->lastinteraction = vel->unixtime; c->authenticated = 0; c->replstate = REPL_STATE_NONE; c->repl_put_online_on_ack = 0; c->reploff = 0; c->repl_ack_off = 0; c->repl_ack_time = 0; c->slave_listening_port = 0; c->slave_capa = SLAVE_CAPA_NONE; c->reply = dlistCreate(); c->reply_bytes = 0; c->obuf_soft_limit_reached_time = 0; dlistSetFreeMethod(c->reply,freeClientReplyValue); dlistSetDupMethod(c->reply,dupClientReplyValue); c->btype = BLOCKED_NONE; c->bpop.timeout = 0; c->bpop.keys = dictCreate(&setDictType,NULL); c->bpop.target = NULL; c->bpop.numreplicas = 0; c->bpop.reploffset = 0; c->woff = 0; c->watched_keys = dlistCreate(); c->pubsub_channels = dictCreate(&setDictType,NULL); c->pubsub_patterns = dlistCreate(); c->peerid = NULL; c->curidx = -1; c->taridx = -1; c->steps = 0; c->cache = NULL; dlistSetFreeMethod(c->pubsub_patterns,decrRefCountVoid); dlistSetMatchMethod(c->pubsub_patterns,listMatchObjects); if (conn->sd != -1) dlistAddNodeTail(vel->clients,c); initClientMultiState(c); return c; } /* This function is called every time we are going to transmit new data * to the client. The behavior is the following: * * If the client should receive new data (normal clients will) the function * returns VR_OK, and make sure to install the write handler in our event * loop so that when the socket is writable new data gets written. * * If the client should not receive new data, because it is a fake client * (used to load AOF in memory), a master or because the setup of the write * handler failed, the function returns VR_ERROR. * * The function may return VR_OK without actually installing the write * event handler in the following cases: * * 1) The event handler should already be installed since the output buffer * already contained something. * 2) The client is a slave but not yet online, so we want to just accumulate * writes in the buffer but not actually sending them yet. * * Typically gets called every time a reply is built, before adding more * data to the clients output buffers. If the function returns VR_ERROR no * data should be appended to the output buffers. */ int prepareClientToWrite(client *c) { /* If it's the Lua client we always return ok without installing any * handler since there is no socket at all. */ if (c->flags & CLIENT_LUA) return VR_OK; /* CLIENT REPLY OFF / SKIP handling: don't send replies. */ if (c->flags & (CLIENT_REPLY_OFF|CLIENT_REPLY_SKIP)) return VR_ERROR; /* Masters don't receive replies, unless CLIENT_MASTER_FORCE_REPLY flag * is set. */ if ((c->flags & CLIENT_MASTER) && !(c->flags & CLIENT_MASTER_FORCE_REPLY)) return VR_ERROR; if (c->conn->sd <= 0) return VR_ERROR; /* Fake client for AOF loading. */ /* Schedule the client to write the output buffers to the socket only * if not already done (there were no pending writes already and the client * was yet not flagged), and, for slaves, if the slave can actually * receive writes at this stage. */ if (!clientHasPendingReplies(c) && !(c->flags & CLIENT_PENDING_WRITE) && (c->replstate == REPL_STATE_NONE || (c->replstate == SLAVE_STATE_ONLINE && !c->repl_put_online_on_ack))) { /* Here instead of installing the write handler, we just flag the * client and put it into a list of clients that have something * to write to the socket. This way before re-entering the event * loop, we can try to directly write to the client sockets avoiding * a system call. We'll only really install the write handler if * we'll not be able to write the whole reply at once. */ c->flags |= CLIENT_PENDING_WRITE; dlistAddNodeHead(c->vel->clients_pending_write,c); } /* Authorize the caller to queue in the output buffer of this client. */ return VR_OK; } /* Create a duplicate of the last object in the reply list when * it is not exclusively owned by the reply list. */ robj *dupLastObjectIfNeeded(dlist *reply) { robj *new, *cur; dlistNode *ln; ASSERT(dlistLength(reply) > 0); ln = dlistLast(reply); cur = dlistNodeValue(ln); if (cur->constant) { new = dupStringObject(cur); dlistNodeValue(ln) = new; } return dlistNodeValue(ln); } /* ----------------------------------------------------------------------------- * Low level functions to add more data to output buffers. * -------------------------------------------------------------------------- */ int _addReplyToBuffer(client *c, const char *s, size_t len) { size_t available = sizeof(c->buf)-c->bufpos; if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return VR_OK; /* If there already are entries in the reply list, we cannot * add anything more to the static buffer. */ if (dlistLength(c->reply) > 0) return VR_ERROR; /* Check that the buffer has enough space available for this string. */ if (len > available) return VR_ERROR; memcpy(c->buf+c->bufpos,s,len); c->bufpos+=len; return VR_OK; } void _addReplyObjectToList(client *c, robj *o) { robj *tail, *obj; if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return; if (dlistLength(c->reply) == 0) { if (o->constant) obj = o; else obj = dupStringObject(o); dlistAddNodeTail(c->reply,obj); c->reply_bytes += getStringObjectSdsUsedMemory(obj); } else { tail = dlistNodeValue(dlistLast(c->reply)); /* Append to this object when possible. */ if (tail->ptr != NULL && tail->encoding == OBJ_ENCODING_RAW && sdslen(tail->ptr)+sdslen(o->ptr) <= PROTO_REPLY_CHUNK_BYTES) { c->reply_bytes -= sdsZmallocSize(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,o->ptr,sdslen(o->ptr)); c->reply_bytes += sdsZmallocSize(tail->ptr); } else { if (o->constant) obj = o; else obj = dupStringObject(o); dlistAddNodeTail(c->reply,obj); c->reply_bytes += getStringObjectSdsUsedMemory(obj); } } asyncCloseClientOnOutputBufferLimitReached(c); } /* This method takes responsibility over the sds. When it is no longer * needed it will be free'd, otherwise it ends up in a robj. */ void _addReplySdsToList(client *c, sds s) { robj *tail; if (c->flags & CLIENT_CLOSE_AFTER_REPLY) { sdsfree(s); return; } if (dlistLength(c->reply) == 0) { dlistAddNodeTail(c->reply,createObject(OBJ_STRING,s)); c->reply_bytes += sdsZmallocSize(s); } else { tail = dlistNodeValue(dlistLast(c->reply)); /* Append to this object when possible. */ if (tail->ptr != NULL && tail->encoding == OBJ_ENCODING_RAW && sdslen(tail->ptr)+sdslen(s) <= PROTO_REPLY_CHUNK_BYTES) { c->reply_bytes -= sdsZmallocSize(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,s,sdslen(s)); c->reply_bytes += sdsZmallocSize(tail->ptr); sdsfree(s); } else { dlistAddNodeTail(c->reply,createObject(OBJ_STRING,s)); c->reply_bytes += sdsZmallocSize(s); } } asyncCloseClientOnOutputBufferLimitReached(c); } void _addReplyStringToList(client *c, const char *s, size_t len) { robj *tail; if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return; if (dlistLength(c->reply) == 0) { robj *o = createStringObject(s,len); dlistAddNodeTail(c->reply,o); c->reply_bytes += getStringObjectSdsUsedMemory(o); } else { tail = dlistNodeValue(dlistLast(c->reply)); /* Append to this object when possible. */ if (tail->ptr != NULL && tail->encoding == OBJ_ENCODING_RAW && sdslen(tail->ptr)+len <= PROTO_REPLY_CHUNK_BYTES) { c->reply_bytes -= sdsZmallocSize(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,s,len); c->reply_bytes += sdsZmallocSize(tail->ptr); } else { robj *o = createStringObject(s,len); dlistAddNodeTail(c->reply,o); c->reply_bytes += getStringObjectSdsUsedMemory(o); } } asyncCloseClientOnOutputBufferLimitReached(c); } /* ----------------------------------------------------------------------------- * Higher level functions to queue data on the client output buffer. * The following functions are the ones that commands implementations will call. * -------------------------------------------------------------------------- */ void addReply(client *c, robj *obj) { if (prepareClientToWrite(c) != VR_OK) return; /* This is an important place where we can avoid copy-on-write * when there is a saving child running, avoiding touching the * refcount field of the object if it's not needed. * * If the encoding is RAW and there is room in the static buffer * we'll be able to send the object to the client without * messing with its page. */ if (sdsEncodedObject(obj)) { if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != VR_OK) _addReplyObjectToList(c,obj); } else if (obj->encoding == OBJ_ENCODING_INT) { robj *obj_new; /* Optimization: if there is room in the static buffer for 32 bytes * (more than the max chars a 64 bit integer can take as string) we * avoid decoding the object and go for the lower level approach. */ if (dlistLength(c->reply) == 0 && (sizeof(c->buf) - c->bufpos) >= 32) { char buf[32]; int len; len = ll2string(buf,sizeof(buf),(long)obj->ptr); if (_addReplyToBuffer(c,buf,len) == VR_OK) return; /* else... continue with the normal code path, but should never * happen actually since we verified there is room. */ } obj_new = getDecodedObject(obj); if (_addReplyToBuffer(c,obj_new->ptr,sdslen(obj_new->ptr)) != VR_OK) _addReplyObjectToList(c,obj_new); if (obj_new != obj) freeObject(obj_new); } else { serverPanic("Wrong obj->encoding in addReply()"); } } void addReplySds(client *c, sds s) { if (prepareClientToWrite(c) != VR_OK) { /* The caller expects the sds to be free'd. */ sdsfree(s); return; } if (_addReplyToBuffer(c,s,sdslen(s)) == VR_OK) { sdsfree(s); } else { /* This method free's the sds when it is no longer needed. */ _addReplySdsToList(c,s); } } void addReplyString(client *c, const char *s, size_t len) { if (prepareClientToWrite(c) != VR_OK) return; if (_addReplyToBuffer(c,s,len) != VR_OK) _addReplyStringToList(c,s,len); } void addReplyErrorLength(client *c, const char *s, size_t len) { addReplyString(c,"-ERR ",5); addReplyString(c,s,len); addReplyString(c,"\r\n",2); } void addReplyError(client *c, const char *err) { addReplyErrorLength(c,err,strlen(err)); } void addReplyErrorFormat(client *c, const char *fmt, ...) { size_t l, j; va_list ap; va_start(ap,fmt); sds s = sdscatvprintf(sdsempty(),fmt,ap); va_end(ap); /* Make sure there are no newlines in the string, otherwise invalid protocol * is emitted. */ l = sdslen(s); for (j = 0; j < l; j++) { if (s[j] == '\r' || s[j] == '\n') s[j] = ' '; } addReplyErrorLength(c,s,sdslen(s)); sdsfree(s); } void addReplyStatusLength(client *c, const char *s, size_t len) { addReplyString(c,"+",1); addReplyString(c,s,len); addReplyString(c,"\r\n",2); } void addReplyStatus(client *c, const char *status) { addReplyStatusLength(c,status,strlen(status)); } void addReplyStatusFormat(client *c, const char *fmt, ...) { va_list ap; va_start(ap,fmt); sds s = sdscatvprintf(sdsempty(),fmt,ap); va_end(ap); addReplyStatusLength(c,s,sdslen(s)); sdsfree(s); } /* Adds an empty object to the reply list that will contain the multi bulk * length, which is not known when this function is called. */ void *addDeferredMultiBulkLength(client *c) { /* Note that we install the write event here even if the object is not * ready to be sent, since we are sure that before returning to the * event loop setDeferredMultiBulkLength() will be called. */ if (prepareClientToWrite(c) != VR_OK) return NULL; dlistAddNodeTail(c->reply,createObject(OBJ_STRING,NULL)); return dlistLast(c->reply); } /* Populate the length object and try gluing it to the next chunk. */ void setDeferredMultiBulkLength(client *c, void *node, long length) { dlistNode *ln = (dlistNode*)node; robj *len, *next; /* Abort when *node is NULL (see addDeferredMultiBulkLength). */ if (node == NULL) return; len = dlistNodeValue(ln); len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length); len->encoding = OBJ_ENCODING_RAW; /* in case it was an EMBSTR. */ c->reply_bytes += sdsZmallocSize(len->ptr); if (ln->next != NULL) { next = dlistNodeValue(ln->next); /* Only glue when the next node is non-NULL (an sds in this case) */ if (next->ptr != NULL) { c->reply_bytes -= sdsZmallocSize(len->ptr); c->reply_bytes -= getStringObjectSdsUsedMemory(next); len->ptr = sdscatlen(len->ptr,next->ptr,sdslen(next->ptr)); c->reply_bytes += sdsZmallocSize(len->ptr); dlistDelNode(c->reply,ln->next); } } asyncCloseClientOnOutputBufferLimitReached(c); } /* Add a double as a bulk reply */ void addReplyDouble(client *c, double d) { char dbuf[128], sbuf[128]; int dlen, slen; if (isinf(d)) { /* Libc in odd systems (Hi Solaris!) will format infinite in a * different way, so better to handle it in an explicit way. */ addReplyBulkCString(c, d > 0 ? "inf" : "-inf"); } else { dlen = snprintf(dbuf,sizeof(dbuf),"%.17g",d); slen = snprintf(sbuf,sizeof(sbuf),"$%d\r\n%s\r\n",dlen,dbuf); addReplyString(c,sbuf,slen); } } /* Add a long double as a bulk reply, but uses a human readable formatting * of the double instead of exposing the crude behavior of doubles to the * dear user. */ void addReplyHumanLongDouble(client *c, long double d) { robj *o = createStringObjectFromLongDouble(d,1); addReplyBulk(c,o); decrRefCount(o); } /* Add a long long as integer reply or bulk len / multi bulk count. * Basically this is used to output . */ void addReplyLongLongWithPrefix(client *c, long long ll, char prefix) { char buf[128]; int len; /* Things like $3\r\n or *2\r\n are emitted very often by the protocol * so we have a few shared objects to use if the integer is small * like it is most of the times. */ if (prefix == '*' && ll < OBJ_SHARED_BULKHDR_LEN && ll >= 0) { addReply(c,shared.mbulkhdr[ll]); return; } else if (prefix == '$' && ll < OBJ_SHARED_BULKHDR_LEN && ll >= 0) { addReply(c,shared.bulkhdr[ll]); return; } buf[0] = prefix; len = ll2string(buf+1,sizeof(buf)-1,ll); buf[len+1] = '\r'; buf[len+2] = '\n'; addReplyString(c,buf,len+3); } void addReplyLongLong(client *c, long long ll) { if (ll == 0) addReply(c,shared.czero); else if (ll == 1) addReply(c,shared.cone); else addReplyLongLongWithPrefix(c,ll,':'); } void addReplyMultiBulkLen(client *c, long length) { if (length < OBJ_SHARED_BULKHDR_LEN) addReply(c,shared.mbulkhdr[length]); else addReplyLongLongWithPrefix(c,length,'*'); } /* Create the length prefix of a bulk reply, example: $2234 */ void addReplyBulkLen(client *c, robj *obj) { size_t len; if (sdsEncodedObject(obj)) { len = sdslen(obj->ptr); } else { long n = (long)obj->ptr; /* Compute how many bytes will take this integer as a radix 10 string */ len = 1; if (n < 0) { len++; n = -n; } while((n = n/10) != 0) { len++; } } if (len < OBJ_SHARED_BULKHDR_LEN) addReply(c,shared.bulkhdr[len]); else addReplyLongLongWithPrefix(c,len,'$'); } /* Add a Redis Object as a bulk reply */ void addReplyBulk(client *c, robj *obj) { addReplyBulkLen(c,obj); addReply(c,obj); addReply(c,shared.crlf); } /* Add a C buffer as bulk reply */ void addReplyBulkCBuffer(client *c, const void *p, size_t len) { addReplyLongLongWithPrefix(c,len,'$'); addReplyString(c,p,len); addReply(c,shared.crlf); } /* Add sds to reply (takes ownership of this sds and frees it) */ void addReplyBulkSds(client *c, sds s) { addReplySds(c,sdscatfmt(sdsempty(),"$%u\r\n", (unsigned long)sdslen(s))); addReplySds(c,s); addReply(c,shared.crlf); } /* Add a C nul term string as bulk reply */ void addReplyBulkCString(client *c, const char *s) { if (s == NULL) { addReply(c,shared.nullbulk); } else { addReplyBulkCBuffer(c,s,strlen(s)); } } /* Add a long long as a bulk reply */ void addReplyBulkLongLong(client *c, long long ll) { char buf[64]; int len; len = ll2string(buf,64,ll); addReplyBulkCBuffer(c,buf,len); } /* Copy 'src' client output buffers into 'dst' client output buffers. * The function takes care of freeing the old output buffers of the * destination client. */ void copyClientOutputBuffer(client *dst, client *src) { dlistRelease(dst->reply); dst->reply = dlistDup(src->reply); memcpy(dst->buf,src->buf,src->bufpos); dst->bufpos = src->bufpos; dst->reply_bytes = src->reply_bytes; } /* Return true if the specified client has pending reply buffers to write to * the socket. */ int clientHasPendingReplies(client *c) { return c->bufpos || dlistLength(c->reply); } static void freeClientArgv(client *c) { int j; for (j = 0; j < c->argc; j++) freeObject(c->argv[j]); c->argc = 0; c->cmd = NULL; } /* Close all the slaves connections. This is useful in chained replication * when we resync with our own master and want to force all our slaves to * resync with us as well. */ void disconnectSlaves(void) { while (dlistLength(repl.slaves)) { dlistNode *ln = dlistFirst(repl.slaves); freeClient((client*)ln->value); } } /* Remove the specified client from eventloop lists where the client could * be referenced from this eventloop, not including the Pub/Sub channels. * This is used by clients jump between workers. */ void unlinkClientFromEventloop(client *c) { dlistNode *ln; vr_eventloop *vel = c->vel; c->vel = NULL; if (c->steps >= 1) return; /* If this is marked as current client unset it. */ if (vel->current_client == c) vel->current_client = NULL; /* Certain operations must be done only if the client has an active socket. * If the client was already unlinked or if it's a "fake client" the * fd is already set to -1. */ if (c->conn->sd != -1) { /* Remove from the list of active clients. */ ln = dlistSearchKey(vel->clients,c); ASSERT(ln != NULL); dlistDelNode(vel->clients,ln); /* Unregister async I/O handlers and close the socket. */ aeDeleteFileEvent(vel->el,c->conn->sd,AE_READABLE); aeDeleteFileEvent(vel->el,c->conn->sd,AE_WRITABLE); } /* Remove from the list of pending writes if needed. */ if (c->flags & CLIENT_PENDING_WRITE) { ln = dlistSearchKey(vel->clients_pending_write,c); ASSERT(ln != NULL); dlistDelNode(vel->clients_pending_write,ln); c->flags &= ~CLIENT_PENDING_WRITE; } /* When client was just unblocked because of a blocking operation, * remove it from the list of unblocked clients. */ if (c->flags & CLIENT_UNBLOCKED) { ln = dlistSearchKey(vel->unblocked_clients,c); ASSERT(ln != NULL); dlistDelNode(vel->unblocked_clients,ln); c->flags &= ~CLIENT_UNBLOCKED; } } void linkClientToEventloop(client *c,vr_eventloop *vel) { dlistPush(vel->clients,c); c->vel = vel; if (aeCreateFileEvent(vel->el,c->conn->sd,AE_READABLE, readQueryFromClient,c) == AE_ERR) { freeClient(c); return; } /* Handle the remain query buffer */ processInputBuffer(c); if (c->flags&CLIENT_JUMP) { dispatch_conn_exist(c,c->taridx); } else { if (clientHasPendingReplies(c) && !(c->flags&CLIENT_PENDING_WRITE)) { if (aeCreateFileEvent(vel->el, c->conn->sd, AE_WRITABLE, sendReplyToClient, c) == AE_ERR) { freeClientAsync(c); } } } } /* Remove the specified client from global lists where the client could * be referenced, not including the Pub/Sub channels. * This is used by freeClient() and replicationCacheMaster(). */ void unlinkClient(client *c) { dlistNode *ln; /* If this is marked as current client unset it. */ if (c->vel->current_client == c) c->vel->current_client = NULL; /* Certain operations must be done only if the client has an active socket. * If the client was already unlinked or if it's a "fake client" the * fd is already set to -1. */ if (c->conn->sd != -1) { /* Remove from the list of active clients. */ ln = dlistSearchKey(c->vel->clients,c); ASSERT(ln != NULL); dlistDelNode(c->vel->clients,ln); /* Unregister async I/O handlers and close the socket. */ aeDeleteFileEvent(c->vel->el,c->conn->sd,AE_READABLE); aeDeleteFileEvent(c->vel->el,c->conn->sd,AE_WRITABLE); conn_put(c->conn); c->conn = NULL; } /* Remove from the list of pending writes if needed. */ if (c->flags & CLIENT_PENDING_WRITE) { ln = dlistSearchKey(c->vel->clients_pending_write,c); ASSERT(ln != NULL); dlistDelNode(c->vel->clients_pending_write,ln); c->flags &= ~CLIENT_PENDING_WRITE; } /* When client was just unblocked because of a blocking operation, * remove it from the list of unblocked clients. */ if (c->flags & CLIENT_UNBLOCKED) { ln = dlistSearchKey(c->vel->unblocked_clients,c); ASSERT(ln != NULL); dlistDelNode(c->vel->unblocked_clients,ln); c->flags &= ~CLIENT_UNBLOCKED; } } void freeClient(client *c) { dlistNode *ln; /* If it is our master that's beging disconnected we should make sure * to cache the state to try a partial resynchronization later. * * Note that before doing this we make sure that the client is not in * some unexpected state, by checking its flags. */ if (repl.role == REPLICATION_ROLE_MASTER && c->flags & CLIENT_MASTER) { log_warn("connection with master lost."); if (!(c->flags & (CLIENT_CLOSE_AFTER_REPLY| CLIENT_CLOSE_ASAP| CLIENT_BLOCKED| CLIENT_UNBLOCKED))) { replicationCacheMaster(c); return; } } /* Log link disconnection with slave */ if ((c->flags & CLIENT_SLAVE) && !(c->flags & CLIENT_MONITOR)) { log_warn("connection with slave %s lost.", replicationGetSlaveName(c)); } /* Free the query buffer */ sdsfree(c->querybuf); c->querybuf = NULL; /* Deallocate structures used to block on blocking ops. */ if (c->flags & CLIENT_BLOCKED) unblockClient(c); dictRelease(c->bpop.keys); /* UNWATCH all the keys */ unwatchAllKeys(c); dlistRelease(c->watched_keys); /* Unsubscribe from all the pubsub channels */ pubsubUnsubscribeAllChannels(c,0); pubsubUnsubscribeAllPatterns(c,0); dictRelease(c->pubsub_channels); dlistRelease(c->pubsub_patterns); /* Free data structures. */ dlistRelease(c->reply); freeClientArgv(c); /* Unlink the client: this will close the socket, remove the I/O * handlers, and remove references of the client from different * places where active clients may be referenced. */ unlinkClient(c); /* Master/slave cleanup Case 1: * we lost the connection with a slave. */ if (c->flags & CLIENT_SLAVE) { if (c->replstate == SLAVE_STATE_SEND_BULK) { if (c->repldbfd != -1) close(c->repldbfd); if (c->replpreamble) sdsfree(c->replpreamble); } dlist *l = (c->flags & CLIENT_MONITOR) ? server.monitors : repl.slaves; ln = dlistSearchKey(l,c); ASSERT(ln != NULL); dlistDelNode(l,ln); /* We need to remember the time when we started to have zero * attached slaves, as after some time we'll free the replication * backlog. */ if (c->flags & CLIENT_SLAVE && dlistLength(repl.slaves) == 0) repl.repl_no_slaves_since = c->vel->unixtime; refreshGoodSlavesCount(); } /* Master/slave cleanup Case 2: * we lost the connection with the master. */ if (c->flags & CLIENT_MASTER) replicationHandleMasterDisconnection(); /* If this client was scheduled for async freeing we need to remove it * from the queue. */ if (c->flags & CLIENT_CLOSE_ASAP) { ln = dlistSearchKey(c->vel->clients_to_close,c); ASSERT(ln != NULL); dlistDelNode(c->vel->clients_to_close,ln); } /* Release other dynamically allocated client structure fields, * and finally release the client structure itself. */ if (c->name) freeObject(c->name); if (c->argv) dfree(c->argv); freeClientMultiState(c); sdsfree(c->peerid); dfree(c); } /* Schedule a client to free it at a safe time in the serverCron() function. * This function is useful when we need to terminate a client but we are in * a context where calling freeClient() is not possible, because the client * should be valid for the continuation of the flow of the program. */ void freeClientAsync(client *c) { if (c->flags & CLIENT_CLOSE_ASAP || c->flags & CLIENT_LUA) return; c->flags |= CLIENT_CLOSE_ASAP; dlistAddNodeTail(c->vel->clients_to_close,c); } void freeClientsInAsyncFreeQueue(vr_eventloop *vel) { while (dlistLength(vel->clients_to_close)) { dlistNode *ln = dlistFirst(vel->clients_to_close); client *c = dlistNodeValue(ln); c->flags &= ~CLIENT_CLOSE_ASAP; freeClient(c); dlistDelNode(vel->clients_to_close,ln); } } /* Write data in output buffers to client. Return VR_OK if the client * is still valid after the call, VR_ERROR if it was freed. */ int writeToClient(int fd, client *c, int handler_installed) { ssize_t nwritten = 0, totwritten = 0; size_t objlen; size_t objmem; robj *o; long long maxmemory; maxmemory = c->vel->cc.maxmemory; while(clientHasPendingReplies(c)) { if (c->bufpos > 0) { nwritten = vr_write(fd,c->buf+c->sentlen,c->bufpos-c->sentlen); if (nwritten <= 0) break; c->sentlen += nwritten; totwritten += nwritten; /* If the buffer was sent, set bufpos to zero to continue with * the remainder of the reply. */ if ((int)c->sentlen == c->bufpos) { c->bufpos = 0; c->sentlen = 0; } } else { o = dlistNodeValue(dlistFirst(c->reply)); objlen = sdslen(o->ptr); objmem = getStringObjectSdsUsedMemory(o); if (objlen == 0) { dlistDelNode(c->reply,dlistFirst(c->reply)); c->reply_bytes -= objmem; continue; } nwritten = vr_write(fd, ((char*)o->ptr)+c->sentlen,objlen-c->sentlen); if (nwritten <= 0) break; c->sentlen += nwritten; totwritten += nwritten; /* If we fully sent the object on head go to the next one */ if (c->sentlen == objlen) { dlistDelNode(c->reply,dlistFirst(c->reply)); c->sentlen = 0; c->reply_bytes -= objmem; } } /* Note that we avoid to send more than NET_MAX_WRITES_PER_EVENT * bytes, in a single threaded server it's a good idea to serve * other clients as well, even if a very large request comes from * super fast link that is always able to accept data (in real world * scenario think about 'KEYS *' against the loopback interface). * * However if we are over the maxmemory limit we ignore that and * just deliver as much data as it is possible to deliver. */ if (totwritten > NET_MAX_WRITES_PER_EVENT && (maxmemory == 0 || dalloc_used_memory() < maxmemory)) break; } if (nwritten == -1) { if (errno == EAGAIN) { nwritten = 0; } else { log_debug(LOG_VERB, "error writing to client: %s", strerror(errno)); freeClient(c); return VR_ERROR; } } if (totwritten > 0) { update_stats_add(c->vel->stats, net_output_bytes, (long long)totwritten); /* For clients representing masters we don't count sending data * as an interaction, since we always send REPLCONF ACK commands * that take some time to just fill the socket output buffer. * We just rely on data / pings received for timeout detection. */ if (!(c->flags & CLIENT_MASTER)) c->lastinteraction = c->vel->unixtime; } if (!clientHasPendingReplies(c)) { c->sentlen = 0; if (handler_installed) aeDeleteFileEvent(c->vel->el,c->conn->sd,AE_WRITABLE); /* Close connection after entire reply has been sent. */ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) { freeClient(c); return VR_ERROR; } } return VR_OK; } /* Write event handler. Just send data to the client. */ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { UNUSED(el); UNUSED(mask); writeToClient(fd,privdata,1); } /* This function is called just before entering the event loop, in the hope * we can just write the replies to the client output buffer without any * need to use a syscall in order to install the writable event handler, * get it called, and so forth. */ int handleClientsWithPendingWrites(vr_eventloop *vel) { dlistIter li; dlistNode *ln; int processed = dlistLength(vel->clients_pending_write); dlistRewind(vel->clients_pending_write,&li); while((ln = dlistNext(&li))) { client *c = dlistNodeValue(ln); c->flags &= ~CLIENT_PENDING_WRITE; dlistDelNode(vel->clients_pending_write,ln); /* Try to write buffers to the client socket. */ if (writeToClient(c->conn->sd,c,0) == VR_ERROR) continue; /* If there is nothing left, do nothing. Otherwise install * the write handler. */ if (clientHasPendingReplies(c) && aeCreateFileEvent(vel->el, c->conn->sd, AE_WRITABLE, sendReplyToClient, c) == AE_ERR) { freeClientAsync(c); } } return processed; } /* resetClient prepare the client to process the next command */ void resetClient(client *c) { redisCommandProc *prevcmd = c->cmd ? c->cmd->proc : NULL; if (c->flags&CLIENT_JUMP) return; freeClientArgv(c); c->reqtype = 0; c->multibulklen = 0; c->bulklen = -1; /* Remove the CLIENT_REPLY_SKIP flag if any so that the reply * to the next command will be sent, but set the flag if the command * we just processed was "CLIENT REPLY SKIP". */ c->flags &= ~CLIENT_REPLY_SKIP; if (c->flags & CLIENT_REPLY_SKIP_NEXT) { c->flags |= CLIENT_REPLY_SKIP; c->flags &= ~CLIENT_REPLY_SKIP_NEXT; } } int processInlineBuffer(client *c) { char *newline; int argc, j; sds *argv, aux; size_t querylen; /* Search for end of line */ newline = strchr(c->querybuf,'\n'); /* Nothing to do without a \r\n */ if (newline == NULL) { if (sdslen(c->querybuf) > PROTO_INLINE_MAX_SIZE) { addReplyError(c,"Protocol error: too big inline request"); setProtocolError(c,0); } return VR_ERROR; } /* Handle the \r\n case. */ if (newline && newline != c->querybuf && *(newline-1) == '\r') newline--; /* Split the input buffer up to the \r\n */ querylen = newline-(c->querybuf); aux = sdsnewlen(c->querybuf,querylen); argv = sdssplitargs(aux,&argc); sdsfree(aux); if (argv == NULL) { addReplyError(c,"Protocol error: unbalanced quotes in request"); setProtocolError(c,0); return VR_ERROR; } /* Newline from slaves can be used to refresh the last ACK time. * This is useful for a slave to ping back while loading a big * RDB file. */ if (querylen == 0 && c->flags & CLIENT_SLAVE) c->repl_ack_time = c->vel->unixtime; /* Leave data after the first line of the query in the buffer */ sdsrange(c->querybuf,querylen+2,-1); /* Setup argv array on client structure */ if (argc) { if (c->argv) dfree(c->argv); c->argv = dalloc(sizeof(robj*)*argc); } /* Create redis objects for all arguments. */ for (c->argc = 0, j = 0; j < argc; j++) { if (sdslen(argv[j])) { c->argv[c->argc] = createObject(OBJ_STRING,argv[j]); c->argc++; } else { sdsfree(argv[j]); } } dfree(argv); return VR_OK; } /* Helper function. Trims query buffer to make the function that processes * multi bulk requests idempotent. */ static void setProtocolError(client *c, int pos) { if (log_loggable(LOG_VERB)) { sds client = catClientInfoString(sdsempty(),c); log_debug(LOG_VERB, "Protocol error from client: %s", client); sdsfree(client); } c->flags |= CLIENT_CLOSE_AFTER_REPLY; sdsrange(c->querybuf,pos,-1); } int processMultibulkBuffer(client *c) { char *newline = NULL; int pos = 0, ok; long long ll; if (c->multibulklen == 0) { /* The client should have been reset */ serverAssertWithInfo(c,NULL,c->argc == 0); /* Multi bulk length cannot be read without a \r\n */ newline = strchr(c->querybuf,'\r'); if (newline == NULL) { if (sdslen(c->querybuf) > PROTO_INLINE_MAX_SIZE) { addReplyError(c,"Protocol error: too big mbulk count string"); setProtocolError(c,0); } return VR_ERROR; } /* Buffer should also contain \n */ if (newline-(c->querybuf) > ((signed)sdslen(c->querybuf)-2)) return VR_ERROR; /* We know for sure there is a whole line since newline != NULL, * so go ahead and find out the multi bulk length. */ serverAssertWithInfo(c,NULL,c->querybuf[0] == '*'); ok = string2ll(c->querybuf+1,newline-(c->querybuf+1),&ll); if (!ok || ll > 1024*1024) { addReplyError(c,"Protocol error: invalid multibulk length"); setProtocolError(c,pos); return VR_ERROR; } pos = (newline-c->querybuf)+2; if (ll <= 0) { sdsrange(c->querybuf,pos,-1); return VR_OK; } c->multibulklen = ll; /* Setup argv array on client structure */ if (c->argv) dfree(c->argv); c->argv = dalloc(sizeof(robj*)*c->multibulklen); } serverAssertWithInfo(c,NULL,c->multibulklen > 0); while(c->multibulklen) { /* Read bulk length if unknown */ if (c->bulklen == -1) { newline = strchr(c->querybuf+pos,'\r'); if (newline == NULL) { if (sdslen(c->querybuf) > PROTO_INLINE_MAX_SIZE) { addReplyError(c, "Protocol error: too big bulk count string"); setProtocolError(c,0); return VR_ERROR; } break; } /* Buffer should also contain \n */ if (newline-(c->querybuf) > ((signed)sdslen(c->querybuf)-2)) break; if (c->querybuf[pos] != '$') { addReplyErrorFormat(c, "Protocol error: expected '$', got '%c'", c->querybuf[pos]); setProtocolError(c,pos); return VR_ERROR; } ok = string2ll(c->querybuf+pos+1,newline-(c->querybuf+pos+1),&ll); if (!ok || ll < 0 || ll > 512*1024*1024) { addReplyError(c,"Protocol error: invalid bulk length"); setProtocolError(c,pos); return VR_ERROR; } pos += newline-(c->querybuf+pos)+2; if (ll >= PROTO_MBULK_BIG_ARG) { size_t qblen; /* If we are going to read a large object from network * try to make it likely that it will start at c->querybuf * boundary so that we can optimize object creation * avoiding a large copy of data. */ sdsrange(c->querybuf,pos,-1); pos = 0; qblen = sdslen(c->querybuf); /* Hint the sds library about the amount of bytes this string is * going to contain. */ if (qblen < (size_t)ll+2) c->querybuf = sdsMakeRoomFor(c->querybuf,ll+2-qblen); } c->bulklen = ll; } /* Read bulk argument */ if (sdslen(c->querybuf)-pos < (unsigned)(c->bulklen+2)) { /* Not enough data (+2 == trailing \r\n) */ break; } else { /* Optimization: if the buffer contains JUST our bulk element * instead of creating a new object by *copying* the sds we * just use the current sds string. */ if (pos == 0 && c->bulklen >= PROTO_MBULK_BIG_ARG && (signed) sdslen(c->querybuf) == c->bulklen+2) { c->argv[c->argc++] = createObject(OBJ_STRING,c->querybuf); sdsIncrLen(c->querybuf,-2); /* remove CRLF */ /* Assume that if we saw a fat argument we'll see another one * likely... */ c->querybuf = sdsnewlen(NULL,c->bulklen+2); sdsclear(c->querybuf); pos = 0; } else { c->argv[c->argc++] = createStringObject(c->querybuf+pos,c->bulklen); pos += c->bulklen+2; } c->bulklen = -1; c->multibulklen--; } } /* Trim to pos */ if (pos) sdsrange(c->querybuf,pos,-1); /* We're done when c->multibulk == 0 */ if (c->multibulklen == 0) return VR_OK; /* Still not read to process the command */ return VR_ERROR; } void processInputBuffer(client *c) { c->vel->current_client = c; /* Keep processing while there is something in the input buffer */ while(sdslen(c->querybuf)) { /* Return if clients are paused. */ if (!(c->flags & CLIENT_SLAVE) && clientsArePaused(c->vel)) break; /* Immediately abort if the client is in the middle of something. */ if (c->flags & CLIENT_BLOCKED) break; /* CLIENT_CLOSE_AFTER_REPLY closes the connection once the reply is * written to the client. Make sure to not let the reply grow after * this flag has been set (i.e. don't process more commands). */ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) break; /* Determine request type when unknown. */ if (!c->reqtype) { if (c->querybuf[0] == '*') { c->reqtype = PROTO_REQ_MULTIBULK; } else { c->reqtype = PROTO_REQ_INLINE; } } if (c->reqtype == PROTO_REQ_INLINE) { if (processInlineBuffer(c) != VR_OK) break; } else if (c->reqtype == PROTO_REQ_MULTIBULK) { if (processMultibulkBuffer(c) != VR_OK) break; } else { serverPanic("Unknown request type"); } /* Multibulk processing could see a <= 0 length. */ if (c->argc == 0) { resetClient(c); } else { /* Only reset the client when the command was executed. */ if (processCommand(c) == VR_OK) resetClient(c); /* freeMemoryIfNeeded may flush slave output buffers. This may result * into a slave, that may be the active client, to be freed. */ if (c->vel->current_client == NULL) break; /* If this client need to jump to another worker, * break this while loop. When this client jumped finished, * continue handle the remain query buffer. */ if (c->flags&CLIENT_JUMP) break; } } c->vel->current_client = NULL; } void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { client *c = (client*) privdata; int nread, readlen; size_t qblen; UNUSED(el); UNUSED(mask); readlen = PROTO_IOBUF_LEN; /* If this is a multi bulk request, and we are processing a bulk reply * that is large enough, try to maximize the probability that the query * buffer contains exactly the SDS string representing the object, even * at the risk of requiring more read(2) calls. This way the function * processMultiBulkBuffer() can avoid copying buffers to create the * Redis Object representing the argument. */ if (c->reqtype == PROTO_REQ_MULTIBULK && c->multibulklen && c->bulklen != -1 && c->bulklen >= PROTO_MBULK_BIG_ARG) { int remaining = (unsigned)(c->bulklen+2)-sdslen(c->querybuf); if (remaining < readlen) readlen = remaining; } qblen = sdslen(c->querybuf); if (c->querybuf_peak < qblen) c->querybuf_peak = qblen; c->querybuf = sdsMakeRoomFor(c->querybuf, readlen); nread = vr_read(fd, c->querybuf+qblen, readlen); if (nread == -1) { if (errno == EAGAIN) { return; } else { log_debug(LOG_VERB, "reading from client: %s",strerror(errno)); freeClient(c); return; } } else if (nread == 0) { log_debug(LOG_VERB, "client closed connection"); freeClient(c); return; } sdsIncrLen(c->querybuf,nread); c->lastinteraction = c->vel->unixtime; if (c->flags & CLIENT_MASTER) c->reploff += nread; update_stats_add(c->vel->stats, net_input_bytes, nread); if (sdslen(c->querybuf) > server.client_max_querybuf_len) { sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty(); bytes = sdscatrepr(bytes,c->querybuf,64); log_warn("closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); sdsfree(ci); sdsfree(bytes); freeClient(c); return; } processInputBuffer(c); if (c->flags&CLIENT_JUMP) { dispatch_conn_exist(c,c->taridx); } } void getClientsMaxBuffers(vr_eventloop *vel, unsigned long *longest_output_list, unsigned long *biggest_input_buffer) { client *c; dlistNode *ln; dlistIter li; unsigned long lol = 0, bib = 0; dlistRewind(vel->clients,&li); while ((ln = dlistNext(&li)) != NULL) { c = dlistNodeValue(ln); if (dlistLength(c->reply) > lol) lol = dlistLength(c->reply); if (sdslen(c->querybuf) > bib) bib = sdslen(c->querybuf); } *longest_output_list = lol; *biggest_input_buffer = bib; } /* A Redis "Peer ID" is a colon separated ip:port pair. * For IPv4 it's in the form x.y.z.k:port, example: "127.0.0.1:1234". * For IPv6 addresses we use [] around the IP part, like in "[::1]:1234". * For Unix sockets we use path:0, like in "/tmp/redis:0". * * A Peer ID always fits inside a buffer of NET_PEER_ID_LEN bytes, including * the null term. * * On failure the function still populates 'peerid' with the "?:0" string * in case you want to relax error checking or need to display something * anyway (see anetPeerToString implementation for more info). */ void genClientPeerId(client *client, char *peerid, size_t peerid_len) { if (client->flags & CLIENT_UNIX_SOCKET) { /* Unix socket client. */ snprintf(peerid,peerid_len,"%s:0",server.unixsocket); } else { /* TCP client. */ vr_net_format_peer(client->conn->sd,peerid,peerid_len); } } /* This function returns the client peer id, by creating and caching it * if client->peerid is NULL, otherwise returning the cached value. * The Peer ID never changes during the life of the client, however it * is expensive to compute. */ char *getClientPeerId(client *c) { char peerid[VR_INET_PEER_ID_LEN]; if (c->peerid == NULL) { genClientPeerId(c,peerid,sizeof(peerid)); c->peerid = sdsnew(peerid); } return c->peerid; } /* Concatenate a string representing the state of a client in an human * readable format, into the sds string 's'. */ sds catClientInfoString(sds s, client *client) { char flags[16], events[3], *p; int emask; p = flags; if (client->flags & CLIENT_SLAVE) { if (client->flags & CLIENT_MONITOR) *p++ = 'O'; else *p++ = 'S'; } if (client->flags & CLIENT_MASTER) *p++ = 'M'; if (client->flags & CLIENT_MULTI) *p++ = 'x'; if (client->flags & CLIENT_BLOCKED) *p++ = 'b'; if (client->flags & CLIENT_DIRTY_CAS) *p++ = 'd'; if (client->flags & CLIENT_CLOSE_AFTER_REPLY) *p++ = 'c'; if (client->flags & CLIENT_UNBLOCKED) *p++ = 'u'; if (client->flags & CLIENT_CLOSE_ASAP) *p++ = 'A'; if (client->flags & CLIENT_UNIX_SOCKET) *p++ = 'U'; if (client->flags & CLIENT_READONLY) *p++ = 'r'; if (p == flags) *p++ = 'N'; *p++ = '\0'; emask = client->conn->sd == -1 ? 0 : aeGetFileEvents(client->vel->el,client->conn->sd); p = events; if (emask & AE_READABLE) *p++ = 'r'; if (emask & AE_WRITABLE) *p++ = 'w'; *p = '\0'; return sdscatfmt(s, "oid=%i id=%U addr=%s fd=%i name=%s age=%I idle=%I flags=%s db=%i sub=%i psub=%i multi=%i qbuf=%U qbuf-free=%U obl=%U oll=%U omem=%U events=%s cmd=%s", client->curidx, (unsigned long long) client->id, getClientPeerId(client), client->conn->sd, client->name ? (char*)client->name->ptr : "", (long long)(client->vel->unixtime - client->ctime), (long long)(client->vel->unixtime - client->lastinteraction), flags, client->dictid, (int) dictSize(client->pubsub_channels), (int) dlistLength(client->pubsub_patterns), (client->flags & CLIENT_MULTI) ? client->mstate.count : -1, (unsigned long long) sdslen(client->querybuf), (unsigned long long) sdsavail(client->querybuf), (unsigned long long) client->bufpos, (unsigned long long) dlistLength(client->reply), (unsigned long long) getClientOutputBufferMemoryUsage(client), events, client->lastcmd ? client->lastcmd->name : "NULL"); } sds getAllClientsInfoString(vr_eventloop *vel) { dlistNode *ln; dlistIter li; client *client; sds o = sdsnewlen(NULL,200*dlistLength(vel->clients)); sdsclear(o); dlistRewind(vel->clients,&li); while ((ln = dlistNext(&li)) != NULL) { client = dlistNodeValue(ln); o = catClientInfoString(o,client); o = sdscatlen(o,"\n",1); } return o; } struct clientkilldata { sds addr; int type; uint64_t id; int skipme; int killed; int close_this_client; }; void clientCommand(client *c) { dlistNode *ln; dlistIter li; client *client; if (!strcasecmp(c->argv[1]->ptr,"list") && c->argc == 2) { /* CLIENT LIST */ sds str = c->cache; sds o = getAllClientsInfoString(c->vel); str = sdscatsds(str?str:sdsempty(),o); if (c->steps >= (darray_n(&workers) - 1)) { addReplyBulkCBuffer(c,str,sdslen(str)); c->steps = 0; c->taridx = -1; sdsfree(str); c->cache = NULL; c->flags &= ~CLIENT_JUMP; } else { if (!(c->flags&CLIENT_JUMP)) c->flags |= CLIENT_JUMP; c->taridx = worker_get_next_idx(c->curidx); c->cache = str; } sdsfree(o); return; } else if (!strcasecmp(c->argv[1]->ptr,"kill")) { /* CLIENT KILL * CLIENT KILL