Repository: grimm-co/killerbeez Branch: trunk Commit: 2f327b0c86a6 Files: 406 Total size: 1.9 MB Directory structure: gitextract_drfsjm0i/ ├── .gitattributes ├── .gitignore ├── .gitlab-ci.yml ├── .gitmodules ├── APLv2 ├── CMakeLists.txt ├── LICENSE ├── Makefile ├── README.md ├── afl_progs/ │ ├── Makefile │ ├── afl-as.c │ ├── afl-as.h │ ├── afl-gcc.c │ ├── afl-showmap.c │ ├── alloc-inl.h │ ├── config.h │ ├── debug.h │ ├── hash.h │ ├── llvm_mode/ │ │ ├── Makefile │ │ ├── README.llvm │ │ ├── afl-clang-fast.c │ │ ├── afl-llvm-pass.so.cc │ │ └── afl-llvm-rt.o.c │ ├── qemu_mode/ │ │ ├── README.qemu │ │ ├── build_qemu_support.sh │ │ └── patches/ │ │ ├── afl-qemu-cpu-inl.h │ │ ├── afl-qemu-translate-inl.h │ │ ├── afl_qemu_optimize_entrypoint.diff │ │ ├── configure.diff │ │ ├── cpu-exec.diff │ │ ├── elfload.diff │ │ ├── memfd.diff │ │ ├── syscall.diff │ │ └── translate-all.diff │ ├── test-instr.c │ └── types.h ├── corpus/ │ ├── CMakeLists.txt │ ├── afl_test/ │ │ ├── Makefile │ │ └── test.c │ ├── cgc/ │ │ ├── REMATCH_2--Mail_Server--Crackaddr/ │ │ │ ├── README.md │ │ │ ├── inputs/ │ │ │ │ ├── ADDRESSBOOK.txt │ │ │ │ ├── LIST.txt │ │ │ │ ├── LISTALL.txt │ │ │ │ ├── POST.txt │ │ │ │ ├── READ.txt │ │ │ │ └── crash.txt │ │ │ ├── lib/ │ │ │ │ ├── cgc_ctype.h │ │ │ │ ├── cgc_libc.h │ │ │ │ ├── cgc_malloc.h │ │ │ │ ├── cgc_math.h │ │ │ │ ├── cgc_prng.h │ │ │ │ ├── cgc_shell.h │ │ │ │ ├── cgc_stdarg.h │ │ │ │ ├── cgc_stdint.h │ │ │ │ ├── cgc_stdio.h │ │ │ │ ├── cgc_stdlib.h │ │ │ │ ├── cgc_string.h │ │ │ │ ├── ctype.c │ │ │ │ ├── libc.c │ │ │ │ ├── malloc.c │ │ │ │ ├── math.c │ │ │ │ ├── prng.c │ │ │ │ ├── shell.c │ │ │ │ ├── stdio.c │ │ │ │ ├── stdlib.c │ │ │ │ └── string.c │ │ │ ├── notes.txt │ │ │ └── src/ │ │ │ ├── cgc_crackaddr.h │ │ │ ├── cgc_sendmail.h │ │ │ ├── crackaddr.c │ │ │ └── sendmail.c │ │ ├── SOLFEDGE/ │ │ │ ├── README.md │ │ │ ├── inputs/ │ │ │ │ ├── crash.txt │ │ │ │ ├── crash2.txt │ │ │ │ └── input.txt │ │ │ ├── lib/ │ │ │ │ ├── cgc_libc.h │ │ │ │ └── libc.c │ │ │ ├── notes.txt │ │ │ └── src/ │ │ │ ├── cgc_operation.h │ │ │ ├── cgc_service.h │ │ │ ├── operation.c │ │ │ └── service.c │ │ ├── String_Storage_and_Retrieval/ │ │ │ ├── README.md │ │ │ ├── inputs/ │ │ │ │ ├── crash.txt │ │ │ │ └── input.txt │ │ │ ├── lib/ │ │ │ │ ├── cgc_mymath.h │ │ │ │ ├── cgc_stdarg.h │ │ │ │ ├── cgc_stdint.h │ │ │ │ ├── cgc_stdlib.h │ │ │ │ ├── malloc.c │ │ │ │ ├── mymath.c │ │ │ │ ├── new_printf.c │ │ │ │ └── stdlib.c │ │ │ ├── notes.txt │ │ │ └── src/ │ │ │ ├── bst.c │ │ │ ├── delete_matches.c │ │ │ ├── find_matches.c │ │ │ ├── parse.c │ │ │ ├── service.c │ │ │ └── string_token.c │ │ ├── UTF-late/ │ │ │ ├── README.md │ │ │ ├── inputs/ │ │ │ │ ├── crash.txt │ │ │ │ └── input.txt │ │ │ ├── lib/ │ │ │ │ ├── cgc_libc.h │ │ │ │ ├── cgc_list.h │ │ │ │ ├── cgc_utf8.h │ │ │ │ ├── cgc_vfs.h │ │ │ │ ├── libc.c │ │ │ │ ├── malloc.c │ │ │ │ ├── utf8.c │ │ │ │ └── vfs.c │ │ │ ├── notes.txt │ │ │ └── src/ │ │ │ └── service.c │ │ └── cotton_swab_arithmetic/ │ │ ├── README.md │ │ ├── inputs/ │ │ │ ├── crash.txt │ │ │ ├── crash2.txt │ │ │ └── input.txt │ │ ├── lib/ │ │ │ ├── cgc_libc.h │ │ │ └── libc.c │ │ ├── notes.txt │ │ └── src/ │ │ ├── cgc_service.h │ │ └── service.c │ ├── google/ │ │ ├── README.md │ │ └── vorbis/ │ │ ├── build.sh │ │ ├── decode_fuzzer.cc │ │ ├── decode_fuzzer.exe.stackdump │ │ ├── inputs/ │ │ │ ├── crash-23c2d78e497bf4aebe5859e3092657cb0af4c299 │ │ │ ├── crash-8c5dea6410b0fb0b21ff968a9966a0bd7956405f │ │ │ ├── crash-e86e0482b8d66f924e50e62f5d7cc36a0acb03a7 │ │ │ └── sound.ogg │ │ └── notes.txt │ ├── hang/ │ │ ├── CMakeLists.txt │ │ └── hang.c │ ├── libtest/ │ │ ├── CMakeLists.txt │ │ ├── lib1.c │ │ ├── lib2.c │ │ ├── libs.h │ │ └── test.c │ ├── network/ │ │ ├── CMakeLists.txt │ │ ├── client/ │ │ │ ├── client.cpp │ │ │ ├── client.vcxproj │ │ │ └── client.vcxproj.filters │ │ ├── close.txt │ │ ├── multipart.txt │ │ ├── network.sln │ │ └── server/ │ │ ├── server.cpp │ │ ├── server.vcxproj │ │ └── server.vcxproj.filters │ ├── persist/ │ │ ├── CMakeLists.txt │ │ └── test.c │ └── test/ │ ├── CMakeLists.txt │ ├── inputs/ │ │ ├── close.txt │ │ ├── crash.txt │ │ ├── input.txt │ │ ├── multipart.txt │ │ └── telnet_multipart.txt │ ├── notes.txt │ └── test.c ├── docs/ │ ├── AFL.md │ ├── BUILD.md │ ├── CI.md │ ├── DynamoRIO.md │ ├── IPT.md │ ├── Server.md │ ├── api/ │ │ ├── Makefile │ │ ├── README.txt │ │ ├── api.tex │ │ ├── api_driver.tex │ │ ├── api_instrumentation.tex │ │ ├── api_mutator.tex │ │ ├── api_structures.tex │ │ ├── coverpage.tex │ │ ├── defines.tex │ │ ├── files/ │ │ │ ├── driver_t.c │ │ │ ├── instrumentation_edge_t.c │ │ │ ├── instrumentation_t.c │ │ │ └── mutator_t.c │ │ ├── helpers.tex │ │ └── packages.tex │ └── paper/ │ ├── Makefile │ ├── abstract.tex │ ├── acknowledgments.tex │ ├── background.tex │ ├── conclusion.tex │ ├── data/ │ │ ├── Makefile │ │ └── picker.gnuplot │ ├── future_work.tex │ ├── implementation.tex │ ├── introduction.tex │ ├── killerbeez.tex │ ├── overview.tex │ ├── packages.tex │ ├── references.tex │ └── related_work.tex ├── driver/ │ ├── CMakeLists.txt │ ├── driver.c │ ├── driver.h │ ├── driver_factory.c │ ├── driver_factory.h │ ├── file_driver.c │ ├── file_driver.h │ ├── network_client_driver.c │ ├── network_client_driver.h │ ├── network_server_driver.c │ ├── network_server_driver.h │ ├── stdin_driver.c │ ├── stdin_driver.h │ ├── wmp_driver.cpp │ └── wmp_driver.h ├── fuzzer/ │ ├── CMakeLists.txt │ └── main.c ├── instrumentation/ │ ├── CMakeLists.txt │ ├── afl_instrumentation.c │ ├── afl_instrumentation.h │ ├── debug_instrumentation.c │ ├── debug_instrumentation.h │ ├── dynamorio_instrumentation.c │ ├── dynamorio_instrumentation.h │ ├── forkserver.c │ ├── forkserver.h │ ├── forkserver_config.h │ ├── forkserver_hooking.c │ ├── forkserver_internal.h │ ├── instrumentation.c │ ├── instrumentation.h │ ├── instrumentation_factory.c │ ├── instrumentation_factory.h │ ├── linux_ipt_instrumentation.c │ ├── linux_ipt_instrumentation.h │ ├── return_code_instrumentation.c │ ├── return_code_instrumentation.h │ ├── uthash.h │ ├── winafl_alloc_inl.h │ ├── winafl_config.h │ ├── winafl_debug.h │ ├── winafl_hash.h │ ├── winafl_types.h │ ├── wingui.c │ ├── wingui.h │ ├── xxhash.c │ └── xxhash.h ├── jansson/ │ ├── CMakeLists.txt │ ├── dump.c │ ├── error.c │ ├── hashtable.c │ ├── hashtable.h │ ├── hashtable_seed.c │ ├── jansson.h │ ├── jansson_config.h │ ├── jansson_helper.c │ ├── jansson_helper.h │ ├── jansson_private.h │ ├── jansson_private_config.h │ ├── load.c │ ├── lookup3.h │ ├── memory.c │ ├── pack_unpack.c │ ├── strbuffer.c │ ├── strbuffer.h │ ├── strconv.c │ ├── utf.c │ ├── utf.h │ └── value.c ├── merger/ │ ├── CMakeLists.txt │ └── merger.c ├── mutators/ │ ├── APLv2 │ ├── CMakeLists.txt │ ├── LICENSE │ ├── afl_mutator/ │ │ ├── CMakeLists.txt │ │ ├── afl_mutator.c │ │ └── afl_mutator.h │ ├── arithmetic_mutator/ │ │ ├── CMakeLists.txt │ │ ├── arithmetic_mutator.c │ │ └── arithmetic_mutator.h │ ├── bit_flip_mutator/ │ │ ├── CMakeLists.txt │ │ ├── bit_flip_mutator.c │ │ └── bit_flip_mutator.h │ ├── dictionary_mutator/ │ │ ├── CMakeLists.txt │ │ ├── dictionary_mutator.c │ │ └── dictionary_mutator.h │ ├── havoc_mutator/ │ │ ├── CMakeLists.txt │ │ ├── havoc_mutator.c │ │ └── havoc_mutator.h │ ├── honggfuzz_mutator/ │ │ ├── CMakeLists.txt │ │ ├── honggfuzz_mutator.c │ │ └── honggfuzz_mutator.h │ ├── interesting_value_mutator/ │ │ ├── CMakeLists.txt │ │ ├── interesting_value_mutator.c │ │ └── interesting_value_mutator.h │ ├── multipart_mutator/ │ │ ├── CMakeLists.txt │ │ ├── multipart_mutator.c │ │ └── multipart_mutator.h │ ├── mutator_tester/ │ │ ├── CMakeLists.txt │ │ ├── mutator_tester.c │ │ └── mutator_tester.h │ ├── mutators/ │ │ ├── CMakeLists.txt │ │ ├── afl_config.h │ │ ├── afl_debug.h │ │ ├── afl_helpers.c │ │ ├── afl_helpers.h │ │ ├── afl_types.h │ │ ├── mutators.c │ │ └── mutators.h │ ├── ni_mutator/ │ │ ├── CMakeLists.txt │ │ ├── ni_mutator.c │ │ └── ni_mutator.h │ ├── nop_mutator/ │ │ ├── CMakeLists.txt │ │ ├── nop_mutator.c │ │ └── nop_mutator.h │ ├── radamsa_mutator/ │ │ ├── CMakeLists.txt │ │ ├── radamsa_mutator.c │ │ └── radamsa_mutator.h │ ├── splice_mutator/ │ │ ├── CMakeLists.txt │ │ ├── splice_mutator.c │ │ └── splice_mutator.h │ └── zzuf_mutator/ │ ├── CMakeLists.txt │ ├── zzuf_mutator.c │ └── zzuf_mutator.h ├── picker/ │ ├── CMakeLists.txt │ └── main.c ├── python/ │ └── manager/ │ ├── app/ │ │ ├── __init__.py │ │ ├── config.py │ │ └── encoder.py │ ├── controller/ │ │ ├── Config.py │ │ ├── File.py │ │ ├── Hello.py │ │ ├── Job.py │ │ ├── Log.py │ │ ├── Minimize.py │ │ ├── Results.py │ │ ├── Status.py │ │ ├── Target.py │ │ ├── Update.py │ │ └── __init__.py │ ├── lib/ │ │ ├── __init__.py │ │ ├── boinc.py │ │ ├── errors.py │ │ └── fuzzer.py │ ├── model/ │ │ ├── Config.py │ │ ├── FuzzingJob.py │ │ ├── FuzzingResults.py │ │ ├── FuzzingTarget.py │ │ ├── __init__.py │ │ ├── instrumentation_state.py │ │ ├── job_inputs.py │ │ └── tracer_info.py │ ├── requirements.txt │ ├── server.py │ └── tests/ │ ├── job_query_test.py │ ├── minimizer_test.py │ └── seeds.py ├── server/ │ ├── add_target.py │ ├── boinc_submit.py │ ├── killerbeez_assimilator.py │ └── skel/ │ ├── templates/ │ │ ├── windows_x86_64_in │ │ ├── windows_x86_64_out │ │ ├── x86_64-pc-linux-gnu_in │ │ └── x86_64-pc-linux-gnu_out │ ├── windows_x86_64/ │ │ ├── flatten_results.ps1 │ │ ├── job.xml │ │ ├── unpack_killerbeez.ps1 │ │ └── version.xml │ └── x86_64-pc-linux-gnu/ │ ├── flatten_results.sh │ ├── job.xml │ ├── unpack_killerbeez.sh │ └── version.xml ├── tests/ │ ├── build.bat │ ├── smoke_test.sh │ └── test-fuzzer.sh ├── tools/ │ ├── README.md │ ├── release_excludes.txt │ ├── release_vs2017.bat │ ├── release_vs2019.bat │ └── setup_build_env.ps1 ├── tracer/ │ ├── CMakeLists.txt │ └── main.c ├── utils/ │ ├── XGetopt.c │ ├── XGetopt.h │ ├── global_types.h │ ├── mutator_factory.c │ ├── mutator_factory.h │ ├── utils.c │ └── utils.h ├── vagrant/ │ ├── README.md │ ├── ci_runner/ │ │ ├── Dockerfile │ │ ├── Vagrantfile │ │ ├── register_docker_runner.sh │ │ ├── runner.ps1 │ │ ├── runner.sh │ │ └── runner_vars.example │ ├── debian/ │ │ ├── buster/ │ │ │ └── Vagrantfile │ │ ├── jessie/ │ │ │ └── Vagrantfile │ │ └── stretch/ │ │ └── Vagrantfile │ ├── dependencies.sh │ ├── fedora/ │ │ ├── 29/ │ │ │ └── Vagrantfile │ │ └── 30/ │ │ └── Vagrantfile │ ├── setup.sh │ └── ubuntu/ │ ├── bionic/ │ │ └── Vagrantfile │ ├── disco/ │ │ └── Vagrantfile │ ├── trusty/ │ │ └── Vagrantfile │ └── xenial/ │ └── Vagrantfile └── winafl/ ├── CMakeLists.txt ├── modules.c ├── modules.h ├── utils.h └── winafl.c ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ ############################################################################### # Set default behavior to automatically normalize line endings. ############################################################################### * text=auto ############################################################################### # Set default behavior for command prompt diff. # # This is need for earlier builds of msysgit that does not have it on by # default for csharp files. # Note: This is only used by command line ############################################################################### #*.cs diff=csharp ############################################################################### # Set the merge driver for project and solution files # # Merging from the command prompt will add diff markers to the files if there # are conflicts (Merging from VS is not affected by the settings below, in VS # the diff markers are never inserted). Diff markers may cause the following # file extensions to fail to load in VS. An alternative would be to treat # these files as binary and thus will always conflict and require user # intervention with every merge. To do so, just uncomment the entries below ############################################################################### #*.sln merge=binary #*.csproj merge=binary #*.vbproj merge=binary #*.vcxproj merge=binary #*.vcproj merge=binary #*.dbproj merge=binary #*.fsproj merge=binary #*.lsproj merge=binary #*.wixproj merge=binary #*.modelproj merge=binary #*.sqlproj merge=binary #*.wwaproj merge=binary ############################################################################### # behavior for image files # # image files are treated as binary by default. ############################################################################### #*.jpg binary #*.png binary #*.gif binary ############################################################################### # diff behavior for common document formats # # Convert binary document formats to text before diffing them. This feature # is only available from the command line. Turn it on by uncommenting the # entries below. ############################################################################### #*.doc diff=astextplain #*.DOC diff=astextplain #*.docx diff=astextplain #*.DOCX diff=astextplain #*.dot diff=astextplain #*.DOT diff=astextplain #*.pdf diff=astextplain #*.PDF diff=astextplain #*.rtf diff=astextplain #*.RTF diff=astextplain ================================================ FILE: .gitignore ================================================ ## Ignore Visual Studio temporary files, build results, and ## files generated by popular Visual Studio add-ons. # User-specific files *.suo *.user *.userosscache *.sln.docstates # User-specific files (MonoDevelop/Xamarin Studio) *.userprefs # Build results [Dd]ebug/ [Dd]ebugPublic/ [Rr]elease/ [Rr]eleases/ x64/ x86/ bld/ [Bb]in/ [Oo]bj/ [Ll]og/ # Test program builds corpus/persist/deferred corpus/persist/deferred_nohook corpus/persist/nopersist corpus/persist/persist corpus/persist/persist_hang corpus/libtest/libtest1.so corpus/libtest/libtest2.so corpus/libtest/test corpus/libtest/pie corpus/afl_test/test corpus/afl_test/test32 corpus/afl_test/test-qemu corpus/afl_test/test-fast corpus/afl_test/test-fast-deferred corpus/afl_test/test-fast-persist corpus/afl_test/test-fast-persist-deferred corpus/afl_test/test-fast-persist-hang # Visual Studio 2015 cache/options directory .vs/ # Uncomment if you have tasks that create the project's static files in wwwroot #wwwroot/ # MSTest test Results [Tt]est[Rr]esult*/ [Bb]uild[Ll]og.* # NUNIT *.VisualState.xml TestResult.xml # Build Results of an ATL Project [Dd]ebugPS/ [Rr]eleasePS/ dlldata.c # DNX project.lock.json project.fragment.lock.json artifacts/ *_i.c *_p.c *_i.h *.ilk *.meta *.obj *.pch *.pdb *.pgc *.pgd *.rsp *.sbr *.tlb *.tli *.tlh *.tmp *.tmp_proj *.log *.vspscc *.vssscc .builds *.pidb *.svclog *.scc # Chutzpah Test files _Chutzpah* # Visual C++ cache files ipch/ *.aps *.ncb *.opendb *.opensdf *.sdf *.cachefile *.VC.db *.VC.VC.opendb # Visual Studio profiler *.psess *.vsp *.vspx *.sap # TFS 2012 Local Workspace $tf/ # Guidance Automation Toolkit *.gpState # ReSharper is a .NET coding add-in _ReSharper*/ *.[Rr]e[Ss]harper *.DotSettings.user # JustCode is a .NET coding add-in .JustCode # TeamCity is a build add-in _TeamCity* # DotCover is a Code Coverage Tool *.dotCover # NCrunch _NCrunch_* .*crunch*.local.xml nCrunchTemp_* # MightyMoose *.mm.* AutoTest.Net/ # Web workbench (sass) .sass-cache/ # Installshield output folder [Ee]xpress/ # DocProject is a documentation generator add-in DocProject/buildhelp/ DocProject/Help/*.HxT DocProject/Help/*.HxC DocProject/Help/*.hhc DocProject/Help/*.hhk DocProject/Help/*.hhp DocProject/Help/Html2 DocProject/Help/html # Click-Once directory publish/ # Publish Web Output *.[Pp]ublish.xml *.azurePubxml # TODO: Comment the next line if you want to checkin your web deploy settings # but database connection strings (with potential passwords) will be unencrypted #*.pubxml *.publishproj # Microsoft Azure Web App publish settings. Comment the next line if you want to # checkin your Azure Web App publish settings, but sensitive information contained # in these scripts will be unencrypted PublishScripts/ # NuGet Packages *.nupkg # The packages folder can be ignored because of Package Restore **/packages/* # except build/, which is used as an MSBuild target. !**/packages/build/ # Uncomment if necessary however generally it will be regenerated when needed #!**/packages/repositories.config # NuGet v3's project.json files produces more ignoreable files *.nuget.props *.nuget.targets # Microsoft Azure Build Output csx/ *.build.csdef # Microsoft Azure Emulator ecf/ rcf/ # Windows Store app package directories and files AppPackages/ BundleArtifacts/ Package.StoreAssociation.xml _pkginfo.txt # Visual Studio cache files # files ending in .cache can be ignored *.[Cc]ache # but keep track of directories ending in .cache !*.[Cc]ache/ # Others ClientBin/ ~$* *~ *.dbmdl *.dbproj.schemaview *.jfm *.pfx *.publishsettings node_modules/ orleans.codegen.cs # Since there are multiple workflows, uncomment next line to ignore bower_components # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) #bower_components/ # RIA/Silverlight projects Generated_Code/ # Backup & report files from converting an old project file # to a newer Visual Studio version. Backup files are not needed, # because we have git ;-) _UpgradeReport_Files/ Backup*/ UpgradeLog*.XML UpgradeLog*.htm # SQL Server files *.mdf *.ldf # Business Intelligence projects *.rdl.data *.bim.layout *.bim_*.settings # Microsoft Fakes FakesAssemblies/ # GhostDoc plugin setting file *.GhostDoc.xml # Node.js Tools for Visual Studio .ntvs_analysis.dat # Visual Studio 6 build log *.plg # Visual Studio 6 workspace options file *.opt # Visual Studio LightSwitch build output **/*.HTMLClient/GeneratedArtifacts **/*.DesktopClient/GeneratedArtifacts **/*.DesktopClient/ModelManifest.xml **/*.Server/GeneratedArtifacts **/*.Server/ModelManifest.xml _Pvt_Extensions # Paket dependency manager .paket/paket.exe paket-files/ # FAKE - F# Make .fake/ # JetBrains Rider .idea/ *.sln.iml # CodeRush .cr/ # Python Tools for Visual Studio (PTVS) __pycache__/ *.pyc # VIM files *.swp *.swo # LaTeX files *.toc *.aux *.pdf *.out # sqlite database files *.db # CMake directory build/ # AFL program files afl_progs/afl-as afl_progs/afl-clang afl_progs/afl-clang++ afl_progs/afl-g++ afl_progs/afl-gcc afl_progs/afl-qemu-trace afl_progs/afl-showmap afl_progs/as afl_progs/qemu_mode/qemu-2.10.0.tar.xz afl_progs/qemu_mode/qemu-2.10.0/ afl_progs/afl-clang-fast afl_progs/afl-clang-fast++ afl_progs/afl-llvm-pass.so afl_progs/afl-llvm-rt-64.o afl_progs/afl-llvm-rt-32.o afl_progs/afl-llvm-rt.o ================================================ FILE: .gitlab-ci.yml ================================================ stages: - test - release build-windows-vs2017: script: - call tools\release_vs2017.bat variables: GIT_STRATEGY: clone GIT_SUBMODULE_STRATEGY: recursive tags: - windows - vs2017 only: - web artifacts: paths: - release\killerbeez-*.zip expire_in: 1 week stage: release build-windows-vs2019: script: - call tools\release_vs2019.bat variables: GIT_STRATEGY: clone GIT_SUBMODULE_STRATEGY: recursive tags: - windows - vs2019 only: - web artifacts: paths: - release\killerbeez-*.zip expire_in: 1 week stage: release release-linux: script: - mkdir build - cd build - cmake .. - make release image: killerbeez-builder:latest variables: GIT_STRATEGY: clone GIT_SUBMODULE_STRATEGY: recursive tags: - ubuntu-16.04 only: - web artifacts: paths: - build/killerbeez-*.zip expire_in: 1 week stage: release # Smoke tests on all platforms .smoketest-linux: &smoketest-linux script: - cd .. - killerbeez/tests/smoke_test.sh variables: GIT_STRATEGY: clone GIT_SUBMODULE_STRATEGY: recursive only: - web stage: test # The build is broken on most of the platforms allow_failure: true smoketest-ubuntu-14.04: <<: *smoketest-linux tags: - ubuntu-14.04 smoketest-ubuntu-16.04: <<: *smoketest-linux tags: - ubuntu-16.04 # This platform is supposed to be working allow_failure: false smoketest-ubuntu-18.04: <<: *smoketest-linux tags: - ubuntu-18.04 smoketest-ubuntu-19.04: <<: *smoketest-linux tags: - ubuntu-19.04 smoketest-fedora-29: <<: *smoketest-linux tags: - fedora-29 smoketest-fedora-30: <<: *smoketest-linux tags: - fedora-30 smoketest-debian-8: <<: *smoketest-linux tags: - debian-8 smoketest-debian-9: <<: *smoketest-linux tags: - debian-9 smoketest-debian-10: <<: *smoketest-linux tags: - debian-10 ================================================ FILE: .gitmodules ================================================ [submodule "server/boinc"] path = server/boinc url = https://github.com/BOINC/boinc.git ================================================ FILE: APLv2 ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (killerbeez) include(ExternalProject) set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -D_DEBUG") # for _DEBUG ifdefs in utils.h set(CMAKE_POSITION_INDEPENDENT_CODE ON) if (UNIX) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wfatal-errors") # quit after first error endif (UNIX) if (APPLE) set(CMAKE_MACOSX_RPATH 1) # https://github.com/liballeg/allegro5/issues/532#issuecomment-170338164 endif (APPLE) if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Debug) # for gcc -g message("WARNING: Building with debug options; performance will be impacted. Try cmake -DCMAKE_BUILD_TYPE=Release ..") endif() if (WIN32) # windows/visual studio build convention eg build/X86/Debug SET ( BUILD_DIRECTORY ${CMAKE_SOURCE_DIR}/build/${CMAKE_C_COMPILER_ARCHITECTURE_ID}/${CMAKE_BUILD_TYPE} ) add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_DEPRECATION_DISABLE -D_CRT_NONSTDC_NO_DEPRECATE) else (WIN32) SET ( BUILD_DIRECTORY ${CMAKE_BINARY_DIR} ) endif (WIN32) SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${BUILD_DIRECTORY}/killerbeez/ ) SET( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${BUILD_DIRECTORY}/killerbeez/ ) SET( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${BUILD_DIRECTORY}/killerbeez/ ) # Make linux builds relocatable SET(CMAKE_BUILD_WITH_INSTALL_RPATH 1) SET(CMAKE_INSTALL_RPATH "$ORIGIN") # add headers for utils from utils source folder/repo include_directories (jansson/) include_directories (utils/) # IWYU. Pass -DNO_IWYU=1 to disable, -DNO_IWYU= to re-enable (or regenerate cache) if (UNIX AND NOT NO_IWYU) find_program(iwyu_path NAMES include-what-you-use iwyu) if(NOT iwyu_path) # can be installed with sudo apt install iwyu -y message(STATUS "Could not find include-what-you-use, continuing without it") else() message(STATUS "Found include-what-you-use, displaying its suggestions") set(CMAKE_C_INCLUDE_WHAT_YOU_USE ${iwyu_path}) set(CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${iwyu_path}) endif() else() message(STATUS "include-what-you-use disabled") endif() add_subdirectory(jansson/) # The general mutator library with the common functionality add_subdirectory(mutators) add_subdirectory(corpus) # test programs add_subdirectory(fuzzer) # instantiates & coordinates other parts add_subdirectory(driver) # starts program, feeds input, determines when program is done add_subdirectory(instrumentation) # inserts instructions to program to tell whether an input makes the binary take a new path add_subdirectory(merger) # merges instrumentation data between fuzzer nodes add_subdirectory(tracer) # runs through program and records basic block edges if (WIN32) add_subdirectory(picker) # picks which libraries of a target program are being used, and worth fuzzing add_subdirectory(winafl) # parts ripped from winafl for dynamorio endif (WIN32) ### RELEASE ZIP CONFIG ### # Choose what to install into the release zip install(DIRECTORY ${BUILD_DIRECTORY}/killerbeez DESTINATION . USE_SOURCE_PERMISSIONS) install(DIRECTORY ${BUILD_DIRECTORY}/mutators DESTINATION . USE_SOURCE_PERMISSIONS) ### BOINC wrapper ### # If on Windows, include prebuilt BOINC wrapper if (WIN32) SET(BOINC_WRAPPER C:/killerbeez/wrapper_26014_windows_x86_64.exe) endif () # If on Linux, we can build our own BOINC wrapper if (UNIX AND (NOT APPLE)) SET(BOINC_WRAPPER ${CMAKE_SOURCE_DIR}/server/boinc/samples/wrapper/wrapper) ExternalProject_Add(boinc-wrapper SOURCE_DIR ${CMAKE_SOURCE_DIR}/server/boinc DOWNLOAD_COMMAND cd ${CMAKE_SOURCE_DIR} && git submodule update --init server/boinc BUILD_IN_SOURCE true CONFIGURE_COMMAND ./_autosetup COMMAND ./configure --disable-server --disable-client --disable-manager --enable-boinczip BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} COMMAND ${CMAKE_MAKE_PROGRAM} -C samples/wrapper INSTALL_COMMAND "" ) endif () install(PROGRAMS ${BOINC_WRAPPER} DESTINATION server/skel) ### radamsa ### set(RADAMSA_URL https://gitlab.com/akihe/radamsa.git) ExternalProject_Add(radamsa GIT_REPOSITORY ${RADAMSA_URL} GIT_TAG develop BUILD_IN_SOURCE true CONFIGURE_COMMAND "" BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} INSTALL_COMMAND "" EXCLUDE_FROM_ALL true ) ExternalProject_Get_Property(radamsa SOURCE_DIR) install(DIRECTORY ${SOURCE_DIR}/bin DESTINATION radamsa USE_SOURCE_PERMISSIONS) install(FILES ${SOURCE_DIR}/LICENCE DESTINATION radamsa) # Set up CPack to generate the release zip SET(CPACK_GENERATOR "ZIP") # TODO: might want some kind of versioning or architecture in this name SET(CPACK_PACKAGE_FILE_NAME "killerbeez-${CMAKE_SYSTEM_NAME}") SET(CPACK_SOURCE_GENERATOR "ZIP") SET(CPACK_SOURCE_IGNORE_FILES "/build/;/server/boinc/") SET(CPACK_SOURCE_INSTALLED_DIRECTORIES "${CMAKE_SOURCE_DIR};killerbeez") include (CPack) # Special `release` target to ensure boinc-wrapper is built before package add_custom_target(release ${CMAKE_MAKE_PROGRAM} package) if (UNIX) if (APPLE) # macOS can't build the boinc wrapper right now, maybe we can patch # things up in the future to make this possible, but for now we're # just not building it. add_dependencies(release radamsa) else () add_dependencies(release radamsa boinc-wrapper) endif () endif () ================================================ FILE: LICENSE ================================================ Unless otherwise marked, this license applies to all code in this repository. University of Illinois/NCSA Open Source License (UIUC license) Copyright (c) 2018 Grimm. All rights reserved. Developed by: Software Security Group Grimm https://grimm-co.com Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution. - Neither the names of Grimm, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission. SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. ================================================ FILE: Makefile ================================================ .PHONY=all clean docs all: docs docs: # Defer to the Makefile in the docs directory make -C docs ================================================ FILE: README.md ================================================ # Killerbeez Killerbeez is a modular fuzzing framework that aims to bring awesome tools together into a standard format. ## Table of Contents * [Motivation](#motivation) * [Getting Started](#getting-started) * [Windows](#windows) * [Linux and Mac](#linux-and-mac) * [Documentation](#documentation) * [Troubleshooting](#troubleshooting) ## Motivation Many fuzzing tools are "research-quality" code, which means they're difficult to incorporate with each other or make changes to short of forking. Killerbeez seeks to reduce the engineering effort required to bring these tools together. By writing things to a common API, we hope to encourage clean interfaces, which should discourage spaghetti code and make writing cross-platform tools easier. ## Getting Started We provide build instructions for Windows and Linux, and binaries for Windows. For instructions building Killerbeez from source, see the [BUILD instructions](docs/BUILD.md). Currently only the standalone client is available, server coming soon! ### Windows #### [Binary Releases](https://github.com/grimm-co/killerbeez/releases) If you don't want to build the project from source, you can try the [binary releases](https://github.com/grimm-co/killerbeez/releases) (though be warned they are likely out of date). They have been tested on the following operating systems. | Windows Version| 64-Bit | 32-Bit | | -------------- | ------------ | --------------- | | Windows 7 | Not Working [1] | Not Working [1] | | Windows 8 | Working | Experimental [2] | | Windows 8.1 | Working | Experimental [2] | | Windows 10 | Experimental [2] | Experimental [2] | You will also need to install the 2017 Microsoft Visual C++ Redistributable. Please note that if you are running Killerbeez on a 64-bit host, you will need to install both the 64-bit and the 32-bit versions of the redistributable. - [64-Bit Redistributable Download](https://aka.ms/vs/15/release/vc_redist.x64.exe) - [32-Bit Redistributable Download](https://aka.ms/vs/15/release/vc_redist.x86.exe) [1] This is due to a compatibility problem with Windows 7 and DynamoRIO see [this issue](https://github.com/DynamoRIO/dynamorio/issues/2658) for more info. [2] Experimental status means that most of the features are working as expected, and a few are not. #### Quickstart and Examples ##### Fuzzing a simple test program: ``` REM Paste this into cmd.exe. REM Assuming you: set WORKDIR=C:/killerbeez REM Note: if using backslashes, they need to be escaped to be proper JSON. cd %WORKDIR%/killerbeez/build/x64/Debug/killerbeez echo {"path":"%WORKDIR%/killerbeez/corpus/test/test.exe","arguments":"@@"} > driver.json fuzzer.exe -n 9 -s "%WORKDIR%/killerbeez/corpus/test/inputs/close.txt" ^ -d driver.json file debug bit_flip ``` Successful output should look like ``` Wed Aug 8 18:27:08 2018 - INFO - Logging Started Wed Aug 8 18:27:09 2018 - CRITICAL - Found crashes Wed Aug 8 18:27:09 2018 - INFO - Ran 9 iterations in 1 seconds ``` ##### Fuzzing Windows Media Player Download a small video file you would like to use as a seed file (e.g. `youtube-dl --format mp4 --output test.mp4 your-favorite-video`). Be sure to replace the seed file argument `-s` with the path to the video file you just downloaded. Note that because `wmplayer.exe` is a 32-bit executable you'll either need to use the 32-bit `fuzzer.exe`, or manually specify the path to the 32-bit `winafl.dll` with the instrumentation's `winafl_dir` option. Additionally, the `-target_offset` argument that is passed to the instrumentation will need to be updated depending on your Windows version. In this case we are just using the entry point of `wmplayer.exe`, below there is a table to use as reference but it is best to verify the entry point of your binary. | WMP Version | Offset | | --------------- | ------ | | 12.0.7601 | 0x176D | | 12.0.9200 | 0x1BAD | | 12.0.9600 | 0x1F00 | | 12.0.17134 | 0x1F20 | ``` echo {"timeout":20} > driver.json echo {"timeout":5000,"coverage_modules":["wmp.DLL"],"target_path":"C:\\Program Files (x86)\\Windows Media Player\\wmplayer.exe"} > instrumentation.json fuzzer.exe -n 3 -s "C:\Users\user\Desktop\test.mp4" -d driver.json -i instrumentation.json wmp dynamorio nop ``` You may need to modify these parameters to match your environment. In order to speed up fuzzing, it may be useful to enable persistence mode. See [PersistenceMode.md](docs/PersistenceMode.md) for instructions. ### Linux and Mac Once you've built Killerbeez following the [BUILD instructions](docs/BUILD.md#linux-and-mac), you should be ready to change into the right directory and run the fuzzer. Here's an example of running it on a test program from our corpus. ``` # assuming that you're in $WORKDIR/build/killerbeez cd ../build/killerbeez/ echo '{"path":"corpus/test-linux","arguments":"@@"}' > driver.json ./fuzzer -n 20 -s /bin/bash -d driver.json file return_code honggfuzz ``` If it ran correctly, you should see something like this: ``` Thu Jul 19 09:40:46 2018 - INFO - Logging Started Thu Jul 19 09:40:46 2018 - INFO - Ran 20 iterations in 0 seconds ``` In the example above, we're using the **file** driver, the **return\_code** instrumentation, and the **honggfuzz** mutator module. We are only going to do 20 executions and our seed file is /bin/bash, because why not? The -d option are for the driver. We need to give it the path to our executable and the command line arguments, which in our case is just the filename, represented by "@@" here. We don't need to specify any options for the mutator or the instrumentation, so we'll rely on default values instead. To see the options available, you can use the `-h` help flag. Some examples: ``` ./fuzzer -h ./fuzzer -hd ./fuzzer -hi ``` Looking at the results in the "output" directory, we see that it didn't find any crashes, hangs or new paths. At first glance, it might seem like it didn't work. However, we were using the return\_code instrumentation, which does not actually track code coverage, so it can not determine the execution path, thus it can't determine if a new path was hit. Instead, it just looks at the return code to determine if the process crashed or not. It's very efficient, however this is effectively dumb fuzzing. In order to track coverage on Linux, Killerbeez has support for Intel Processor Trace. See [IPT.md](docs/IPT.md) for more details. To see a crash, we can just change our seed file to be close to the file which will cause a crash. It's cheating, but it works well to demonstrate the importance of seed files as well as illustrating what the output of finding a crash looks like. The following commands assume you are still in the directory containing ./fuzzer. ``` # assuming that you're in $WORKDIR/build/killerbeez echo "ABC@" > test1 # ABC@ is one bit different than ABCD, the crashing input echo '{"path":"corpus/test-linux","arguments":"@@"}' > driver.json ./fuzzer -n 2000 -s ./test1 -d driver.json file return_code honggfuzz ``` Which should yield output similar to this: ``` Thu Jul 19 12:03:11 2018 - INFO - Logging Started Thu Jul 19 12:03:13 2018 - CRITICAL - Found crashes Thu Jul 19 12:03:13 2018 - CRITICAL - Found crashes Thu Jul 19 12:03:19 2018 - CRITICAL - Found crashes Thu Jul 19 12:03:22 2018 - CRITICAL - Found crashes Thu Jul 19 12:03:22 2018 - INFO - Ran 2000 iterations in 11 seconds ``` Looking in the output/crashes folder, we can see the inputs which were found to crash this target and reproduce the crash manually. ``` $ ls output/crashes/ 2B81D0C867F76051FD33D8690AA2AC68 5220E572A6F9DAAF522EF5C5698EAF4C 59F885D0289BE9A83E711C5E7CFCBE4D ED5D34C74E59D16BD6D5B3683DB655C3 $ cat output/crashes/59F885D0289BE9A83E711C5E7CFCBE4D ; echo ABCD $ corpus/test-linux output/crashes/59F885D0289BE9A83E711C5E7CFCBE4D Segmentation fault (core dumped) ``` ## Documentation Documentation of the API can be found in the [docs](docs) folder. It's written in LaTeX which can be used to generate a PDF, HTML, or various other formats. PDFs are also included so the documentation is easy to read for those who do not have a LaTeX typesetting environment set up. ## Troubleshooting Q: The target program doesn't start A: Windows Media Player won't automatically play media the first time is run. There's a pop-up which requires you to configure some settings. Just run it manually once and you should be good to go after that. Q: I'm getting an error about a pipe timing out A: This is related to the instrumentation and the target taking too long to start up. If running it again doesn't work, try increasing the "timeout" on the -i argument and that should take care of it. ## Still Having a Problem? Please create an issue on GitHub and we will address it as soon as possible. ## Have questions? Wanna chat? Feel free to join the mailing list! Send a request to join to `killerbeez-join@lists.grimm-co.com` then post your questions to `killerbeez@lists.grimm-co.com`! We've also got #killerbeez on freenode, but it's pretty quiet. ## License This project is licensed under the UIUC License - see the [LICENSE](LICENSE) file for details. Some parts of this project have been included from other software and will be under different licenses, where marked. ================================================ FILE: afl_progs/Makefile ================================================ # # american fuzzy lop - makefile # ----------------------------- # # Originally written and maintained by Michal Zalewski # # Copyright 2013, 2014, 2015, 2016, 2017 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Modified by GRIMM COMM_HDR = alloc-inl.h config.h debug.h types.h PROGS = afl-gcc PREFIX ?= /usr/local BIN_PATH = $(PREFIX)/bin HELPER_PATH = $(PREFIX)/lib/afl DOC_PATH = $(PREFIX)/share/doc/afl CFLAGS ?= -O3 -funroll-loops CFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \ -DAFL_PATH=\"$(HELPER_PATH)\" -DDOC_PATH=\"$(DOC_PATH)\" \ -DBIN_PATH=\"$(BIN_PATH)\" ifeq "$(findstring clang, $(shell $(CC) --version 2>/dev/null))" "" TEST_CC = afl-gcc else TEST_CC = afl-clang endif all: test_x86 $(PROGS) afl-as test_build all_done ifndef AFL_NO_X86 test_x86: @echo "[*] Checking for the ability to compile x86 code..." @echo 'main() { __asm__("xorb %al, %al"); }' | $(CC) -w -x c - -o .test || ( echo; echo "Oops, looks like your compiler can't generate x86 code."; echo; echo "Don't panic! You can use the LLVM or QEMU mode, but see docs/INSTALL first."; echo "(To ignore this error, set AFL_NO_X86=1 and try again.)"; echo; exit 1 ) @rm -f .test @echo "[+] Everything seems to be working, ready to compile." else test_x86: @echo "[!] Note: skipping x86 compilation checks (AFL_NO_X86 set)." endif afl-gcc: afl-gcc.c $(COMM_HDR) | test_x86 $(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS) set -e; for i in afl-g++ afl-clang afl-clang++; do ln -sf afl-gcc $$i; done ln -sf afl-gcc afl-clang ln -sf afl-gcc afl-clang++ ln -sf afl-gcc afl-g++ afl-as: afl-as.c afl-as.h $(COMM_HDR) | test_x86 $(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS) ln -sf afl-as as afl-showmap: afl-showmap.c hash.h $(COMM_HDR) | test_x86 $(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS) ifndef AFL_NO_X86 test_build: test-instr.c afl-gcc afl-as afl-showmap @echo "[*] Testing the CC wrapper and instrumentation output..." unset AFL_USE_ASAN AFL_USE_MSAN; AFL_QUIET=1 AFL_INST_RATIO=100 AFL_PATH=. ./$(TEST_CC) $(CFLAGS) test-instr.c -o test-instr $(LDFLAGS) echo 0 | ./afl-showmap -m none -q -o .test-instr0 ./test-instr echo 1 | ./afl-showmap -m none -q -o .test-instr1 ./test-instr @rm -f test-instr @cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please ping to troubleshoot the issue."; echo; exit 1; fi @echo "[+] All right, the instrumentation seems to be working!" else test_build: afl-gcc afl-as afl-showmap @echo "[!] Note: skipping build tests (you may need to use LLVM or QEMU mode)." endif all_done: test_build @if [ ! "`which clang 2>/dev/null`" = "" ]; then echo "[+] LLVM users: see llvm_mode/README.llvm for a faster alternative to afl-gcc."; fi @echo "[+] All done! Be sure to review README - it's pretty short and useful." @if [ "`uname`" = "Darwin" ]; then printf "\nWARNING: Fuzzing on MacOS X is slow because of the unusually high overhead of\nfork() on this OS. Consider using Linux or *BSD. You can also use VirtualBox\n(virtualbox.org) to put AFL inside a Linux or *BSD VM.\n\n"; fi @! tty <&1 >/dev/null || printf "\033[0;30mNOTE: If you can read this, your terminal probably uses white background.\nThis will make the UI hard to read. See docs/status_screen.txt for advice.\033[0m\n" 2>/dev/null ================================================ FILE: afl_progs/afl-as.c ================================================ /* american fuzzy lop - wrapper for GNU as --------------------------------------- Written and maintained by Michal Zalewski Copyright 2013, 2014, 2015 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 The sole purpose of this wrapper is to preprocess assembly files generated by GCC / clang and inject the instrumentation bits included from afl-as.h. It is automatically invoked by the toolchain when compiling programs using afl-gcc / afl-clang. Note that it's an explicit non-goal to instrument hand-written assembly, be it in separate .s files or in __asm__ blocks. The only aspiration this utility has right now is to be able to skip them gracefully and allow the compilation process to continue. That said, see experimental/clang_asm_normalize/ for a solution that may allow clang users to make things work even with hand-crafted assembly. Just note that there is no equivalent for GCC. */ #define AFL_MAIN #include "config.h" #include "types.h" #include "debug.h" #include "alloc-inl.h" #include "afl-as.h" #include #include #include #include #include #include #include #include #include static u8** as_params; /* Parameters passed to the real 'as' */ static u8* input_file; /* Originally specified input file */ static u8* modified_file; /* Instrumented file for the real 'as' */ static u8 be_quiet, /* Quiet mode (no stderr output) */ clang_mode, /* Running in clang mode? */ pass_thru, /* Just pass data through? */ just_version, /* Just show version? */ sanitizer; /* Using ASAN / MSAN */ static u32 inst_ratio = 100, /* Instrumentation probability (%) */ as_par_cnt = 1; /* Number of params to 'as' */ /* If we don't find --32 or --64 in the command line, default to instrumentation for whichever mode we were compiled with. This is not perfect, but should do the trick for almost all use cases. */ #ifdef __x86_64__ static u8 use_64bit = 1; #else static u8 use_64bit = 0; #ifdef __APPLE__ # error "Sorry, 32-bit Apple platforms are not supported." #endif /* __APPLE__ */ #endif /* ^__x86_64__ */ /* Examine and modify parameters to pass to 'as'. Note that the file name is always the last parameter passed by GCC, so we exploit this property to keep the code simple. */ static void edit_params(int argc, char** argv) { u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS"); u32 i; #ifdef __APPLE__ u8 use_clang_as = 0; /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work with the code generated by newer versions of clang that are hand-built by the user. See the thread here: http://goo.gl/HBWDtn. To work around this, when using clang and running without AFL_AS specified, we will actually call 'clang -c' instead of 'as -q' to compile the assembly file. The tools aren't cmdline-compatible, but at least for now, we can seemingly get away with this by making only very minor tweaks. Thanks to Nico Weber for the idea. */ if (clang_mode && !afl_as) { use_clang_as = 1; afl_as = getenv("AFL_CC"); if (!afl_as) afl_as = getenv("AFL_CXX"); if (!afl_as) afl_as = "clang"; } #endif /* __APPLE__ */ /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR is not set. We need to check these non-standard variables to properly handle the pass_thru logic later on. */ if (!tmp_dir) tmp_dir = getenv("TEMP"); if (!tmp_dir) tmp_dir = getenv("TMP"); if (!tmp_dir) tmp_dir = "/tmp"; as_params = ck_alloc((argc + 32) * sizeof(u8*)); as_params[0] = afl_as ? afl_as : (u8*)"as"; as_params[argc] = 0; for (i = 1; i < argc - 1; i++) { if (!strcmp(argv[i], "--64")) use_64bit = 1; else if (!strcmp(argv[i], "--32")) use_64bit = 0; #ifdef __APPLE__ /* The Apple case is a bit different... */ if (!strcmp(argv[i], "-arch") && i + 1 < argc) { if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1; else if (!strcmp(argv[i + 1], "i386")) FATAL("Sorry, 32-bit Apple platforms are not supported."); } /* Strip options that set the preference for a particular upstream assembler in Xcode. */ if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q"))) continue; #endif /* __APPLE__ */ as_params[as_par_cnt++] = argv[i]; } #ifdef __APPLE__ /* When calling clang as the upstream assembler, append -c -x assembler and hope for the best. */ if (use_clang_as) { as_params[as_par_cnt++] = "-c"; as_params[as_par_cnt++] = "-x"; as_params[as_par_cnt++] = "assembler"; } #endif /* __APPLE__ */ input_file = argv[argc - 1]; if (input_file[0] == '-') { if (!strcmp(input_file + 1, "-version")) { just_version = 1; modified_file = input_file; goto wrap_things_up; } if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)"); else input_file = NULL; } else { /* Check if this looks like a standard invocation as a part of an attempt to compile a program, rather than using gcc on an ad-hoc .s file in a format we may not understand. This works around an issue compiling NSS. */ if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) && strncmp(input_file, "/var/tmp/", 9) && strncmp(input_file, "/tmp/", 5)) pass_thru = 1; } modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(), (u32)time(NULL)); wrap_things_up: as_params[as_par_cnt++] = modified_file; as_params[as_par_cnt] = NULL; } /* Process input file, generate modified_file. Insert instrumentation in all the appropriate places. */ static void add_instrumentation(void) { static u8 line[MAX_LINE]; FILE* inf; FILE* outf; s32 outfd; u32 ins_lines = 0; u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0, skip_intel = 0, skip_app = 0, instrument_next = 0; #ifdef __APPLE__ u8* colon_pos; #endif /* __APPLE__ */ if (input_file) { inf = fopen(input_file, "r"); if (!inf) PFATAL("Unable to read '%s'", input_file); } else inf = stdin; outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600); if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file); outf = fdopen(outfd, "w"); if (!outf) PFATAL("fdopen() failed"); while (fgets(line, MAX_LINE, inf)) { /* In some cases, we want to defer writing the instrumentation trampoline until after all the labels, macros, comments, etc. If we're in this mode, and if the line starts with a tab followed by a character, dump the trampoline now. */ if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok && instrument_next && line[0] == '\t' && isalpha(line[1])) { fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, R(MAP_SIZE)); instrument_next = 0; ins_lines++; } /* Output the actual line, call it a day in pass-thru mode. */ fputs(line, outf); if (pass_thru) continue; /* All right, this is where the actual fun begins. For one, we only want to instrument the .text section. So, let's keep track of that in processed files - and let's set instr_ok accordingly. */ if (line[0] == '\t' && line[1] == '.') { /* OpenBSD puts jump tables directly inline with the code, which is a bit annoying. They use a specific format of p2align directives around them, so we use that as a signal. */ if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) && isdigit(line[10]) && line[11] == '\n') skip_next_label = 1; if (!strncmp(line + 2, "text\n", 5) || !strncmp(line + 2, "section\t.text", 13) || !strncmp(line + 2, "section\t__TEXT,__text", 21) || !strncmp(line + 2, "section __TEXT,__text", 21)) { instr_ok = 1; continue; } if (!strncmp(line + 2, "section\t", 8) || !strncmp(line + 2, "section ", 8) || !strncmp(line + 2, "bss\n", 4) || !strncmp(line + 2, "data\n", 5)) { instr_ok = 0; continue; } } /* Detect off-flavor assembly (rare, happens in gdb). When this is encountered, we set skip_csect until the opposite directive is seen, and we do not instrument. */ if (strstr(line, ".code")) { if (strstr(line, ".code32")) skip_csect = use_64bit; if (strstr(line, ".code64")) skip_csect = !use_64bit; } /* Detect syntax changes, as could happen with hand-written assembly. Skip Intel blocks, resume instrumentation when back to AT&T. */ if (strstr(line, ".intel_syntax")) skip_intel = 1; if (strstr(line, ".att_syntax")) skip_intel = 0; /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */ if (line[0] == '#' || line[1] == '#') { if (strstr(line, "#APP")) skip_app = 1; if (strstr(line, "#NO_APP")) skip_app = 0; } /* If we're in the right mood for instrumenting, check for function names or conditional labels. This is a bit messy, but in essence, we want to catch: ^main: - function entry point (always instrumented) ^.L0: - GCC branch label ^.LBB0_0: - clang branch label (but only in clang mode) ^\tjnz foo - conditional branches ...but not: ^# BB#0: - clang comments ^ # BB#0: - ditto ^.Ltmp0: - clang non-branch labels ^.LC0 - GCC non-branch labels ^.LBB0_0: - ditto (when in GCC mode) ^\tjmp foo - non-conditional jumps Additionally, clang and GCC on MacOS X follow a different convention with no leading dots on labels, hence the weird maze of #ifdefs later on. */ if (skip_intel || skip_app || skip_csect || !instr_ok || line[0] == '#' || line[0] == ' ') continue; /* Conditional branch instruction (jnz, etc). We append the instrumentation right after the branch (to instrument the not-taken path) and at the branch destination label (handled later on). */ if (line[0] == '\t') { if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) { fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, R(MAP_SIZE)); ins_lines++; } continue; } /* Label of some sort. This may be a branch destination, but we need to tread carefully and account for several different formatting conventions. */ #ifdef __APPLE__ /* Apple: L: */ if ((colon_pos = strstr(line, ":"))) { if (line[0] == 'L' && isdigit(*(colon_pos - 1))) { #else /* Everybody else: .L: */ if (strstr(line, ":")) { if (line[0] == '.') { #endif /* __APPLE__ */ /* .L0: or LBB0_0: style jump destination */ #ifdef __APPLE__ /* Apple: L / LBB */ if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3))) && R(100) < inst_ratio) { #else /* Apple: .L / .LBB */ if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3))) && R(100) < inst_ratio) { #endif /* __APPLE__ */ /* An optimization is possible here by adding the code only if the label is mentioned in the code in contexts other than call / jmp. That said, this complicates the code by requiring two-pass processing (messy with stdin), and results in a speed gain typically under 10%, because compilers are generally pretty good about not generating spurious intra-function jumps. We use deferred output chiefly to avoid disrupting .Lfunc_begin0-style exception handling calculations (a problem on MacOS X). */ if (!skip_next_label) instrument_next = 1; else skip_next_label = 0; } } else { /* Function label (always instrumented, deferred mode). */ instrument_next = 1; } } } if (ins_lines) fputs(use_64bit ? main_payload_64 : main_payload_32, outf); if (input_file) fclose(inf); fclose(outf); if (!be_quiet) { if (!ins_lines) WARNF("No instrumentation targets found%s.", pass_thru ? " (pass-thru mode)" : ""); else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).", ins_lines, use_64bit ? "64" : "32", getenv("AFL_HARDEN") ? "hardened" : (sanitizer ? "ASAN/MSAN" : "non-hardened"), inst_ratio); } } /* Main entry point */ int main(int argc, char** argv) { s32 pid; u32 rand_seed; int status; u8* inst_ratio_str = getenv("AFL_INST_RATIO"); struct timeval tv; struct timezone tz; clang_mode = !!getenv(CLANG_ENV_VAR); if (isatty(2) && !getenv("AFL_QUIET")) { SAYF(cCYA "afl-as " cBRI VERSION cRST " by \n"); } else be_quiet = 1; if (argc < 2) { SAYF("\n" "This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n" "executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n" "don't want to run this program directly.\n\n" "Rarely, when dealing with extremely complex projects, it may be advisable to\n" "set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n" "instrumenting every discovered branch.\n\n"); exit(1); } gettimeofday(&tv, &tz); rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); srandom(rand_seed); edit_params(argc, argv); if (inst_ratio_str) { if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)"); } if (getenv(AS_LOOP_ENV_VAR)) FATAL("Endless loop when calling 'as' (remove '.' from your PATH)"); setenv(AS_LOOP_ENV_VAR, "1", 1); /* When compiling with ASAN, we don't have a particularly elegant way to skip ASAN-specific branches. But we can probabilistically compensate for that... */ if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) { sanitizer = 1; inst_ratio /= 3; } if (!just_version) add_instrumentation(); if (!(pid = fork())) { execvp(as_params[0], (char**)as_params); FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]); } if (pid < 0) PFATAL("fork() failed"); if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file); exit(WEXITSTATUS(status)); } ================================================ FILE: afl_progs/afl-as.h ================================================ /* american fuzzy lop - injectable parts ------------------------------------- Written and maintained by Michal Zalewski Forkserver design by Jann Horn Copyright 2013, 2014, 2015 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 This file houses the assembly-level instrumentation injected into fuzzed programs. The instrumentation stores XORed pairs of data: identifiers of the currently executing branch and the one that executed immediately before. TL;DR: the instrumentation does shm_trace_map[cur_loc ^ prev_loc]++ The code is designed for 32-bit and 64-bit x86 systems. Both modes should work everywhere except for Apple systems. Apple does relocations differently from everybody else, so since their OSes have been 64-bit for a longer while, I didn't go through the mental effort of porting the 32-bit code. In principle, similar code should be easy to inject into any well-behaved binary-only code (e.g., using DynamoRIO). Conditional jumps offer natural targets for instrumentation, and should offer comparable probe density. */ /* This file has been modified from the original AFL version to incorporate into Killerbeez. Specifically, the injected fork server has been modified to match the Killerbeez fork server protocol. */ #ifndef _HAVE_AFL_AS_H #define _HAVE_AFL_AS_H #include "config.h" #include "types.h" #include "../instrumentation/forkserver_internal.h" /* ------------------ Performances notes ------------------ Contributions to make this code faster are appreciated! Here are some rough notes that may help with the task: - Only the trampoline_fmt and the non-setup __afl_maybe_log code paths are really worth optimizing; the setup / fork server stuff matters a lot less and should be mostly just kept readable. - We're aiming for modern CPUs with out-of-order execution and large pipelines; the code is mostly follows intuitive, human-readable instruction ordering, because "textbook" manual reorderings make no substantial difference. - Interestingly, instrumented execution isn't a lot faster if we store a variable pointer to the setup, log, or return routine and then do a reg call from within trampoline_fmt. It does speed up non-instrumented execution quite a bit, though, since that path just becomes push-call-ret-pop. - There is also not a whole lot to be gained by doing SHM attach at a fixed address instead of retrieving __afl_area_ptr. Although it allows us to have a shorter log routine inserted for conditional jumps and jump labels (for a ~10% perf gain), there is a risk of bumping into other allocations created by the program or by tools such as ASAN. - popf is *awfully* slow, which is why we're doing the lahf / sahf + overflow test trick. Unfortunately, this forces us to taint eax / rax, but this dependency on a commonly-used register still beats the alternative of using pushf / popf. One possible optimization is to avoid touching flags by using a circular buffer that stores just a sequence of current locations, with the XOR stuff happening offline. Alas, this doesn't seem to have a huge impact: https://groups.google.com/d/msg/afl-users/MsajVf4fRLo/2u6t88ntUBIJ - Preforking one child a bit sooner, and then waiting for the "go" command from within the child, doesn't offer major performance gains; fork() seems to be relatively inexpensive these days. Preforking multiple children does help, but badly breaks the "~1 core per fuzzer" design, making it harder to scale up. Maybe there is some middle ground. Perhaps of note: in the 64-bit version for all platforms except for Apple, the instrumentation is done slightly differently than on 32-bit, with __afl_prev_loc and __afl_area_ptr being local to the object file (.lcomm), rather than global (.comm). This is to avoid GOTRELPC lookups in the critical code path, which AFAICT, are otherwise unavoidable if we want gcc -shared to work; simple relocations between .bss and .text won't work on most 64-bit platforms in such a case. (Fun fact: on Apple systems, .lcomm can segfault the linker.) The side effect is that state transitions are measured in a somewhat different way, with previous tuple being recorded separately within the scope of every .c file. This should have no impact in any practical sense. Another side effect of this design is that getenv() will be called once per every .o file when running in non-instrumented mode; and since getenv() tends to be optimized in funny ways, we need to be very careful to save every oddball register it may touch. */ static const u8* trampoline_fmt_32 = "\n" "/* --- AFL TRAMPOLINE (32-BIT) --- */\n" "\n" ".align 4\n" "\n" "leal -16(%%esp), %%esp\n" "movl %%edi, 0(%%esp)\n" "movl %%edx, 4(%%esp)\n" "movl %%ecx, 8(%%esp)\n" "movl %%eax, 12(%%esp)\n" "movl $0x%08x, %%ecx\n" "call __afl_maybe_log\n" "movl 12(%%esp), %%eax\n" "movl 8(%%esp), %%ecx\n" "movl 4(%%esp), %%edx\n" "movl 0(%%esp), %%edi\n" "leal 16(%%esp), %%esp\n" "\n" "/* --- END --- */\n" "\n"; static const u8* trampoline_fmt_64 = "\n" "/* --- AFL TRAMPOLINE (64-BIT) --- */\n" "\n" ".align 4\n" "\n" "leaq -(128+24)(%%rsp), %%rsp\n" "movq %%rdx, 0(%%rsp)\n" "movq %%rcx, 8(%%rsp)\n" "movq %%rax, 16(%%rsp)\n" "movq $0x%08x, %%rcx\n" "call __afl_maybe_log\n" "movq 16(%%rsp), %%rax\n" "movq 8(%%rsp), %%rcx\n" "movq 0(%%rsp), %%rdx\n" "leaq (128+24)(%%rsp), %%rsp\n" "\n" "/* --- END --- */\n" "\n"; static const u8* main_payload_32 = "\n" "/* --- AFL MAIN PAYLOAD (32-BIT) --- */\n" "\n" ".text\n" ".att_syntax\n" ".code32\n" ".align 8\n" "\n" "__afl_maybe_log:\n" "\n" " lahf\n" " seto %al\n" "\n" " /* Check if SHM region is already mapped. */\n" "\n" " movl __afl_area_ptr, %edx\n" " testl %edx, %edx\n" " je __afl_setup\n" "\n" "__afl_store:\n" "\n" " /* Calculate and store hit for the code location specified in ecx. There\n" " is a double-XOR way of doing this without tainting another register,\n" " and we use it on 64-bit systems; but it's slower for 32-bit ones. */\n" "\n" #ifndef COVERAGE_ONLY " movl __afl_prev_loc, %edi\n" " xorl %ecx, %edi\n" " shrl $1, %ecx\n" " movl %ecx, __afl_prev_loc\n" #else " movl %ecx, %edi\n" #endif /* ^!COVERAGE_ONLY */ "\n" #ifdef SKIP_COUNTS " orb $1, (%edx, %edi, 1)\n" #else " incb (%edx, %edi, 1)\n" #endif /* ^SKIP_COUNTS */ "\n" "__afl_return:\n" "\n" " addb $127, %al\n" " sahf\n" " ret\n" "\n" ".align 8\n" "\n" "__afl_setup:\n" "\n" " /* Do not retry setup if we had previous failures. */\n" "\n" " cmpb $0, __afl_setup_failure\n" " jne __afl_return\n" "\n" " /* Map SHM, jumping to __afl_setup_abort if something goes wrong.\n" " We do not save FPU/MMX/SSE registers here, but hopefully, nobody\n" " will notice this early in the game. */\n" "\n" " pushl %eax\n" " pushl %ecx\n" "\n" " pushl $.AFL_SHM_ENV\n" " call getenv\n" " addl $4, %esp\n" "\n" " testl %eax, %eax\n" " je __afl_setup_abort\n" "\n" " pushl %eax\n" " call atoi\n" " addl $4, %esp\n" "\n" " pushl $0 /* shmat flags */\n" " pushl $0 /* requested addr */\n" " pushl %eax /* SHM ID */\n" " call shmat\n" " addl $12, %esp\n" "\n" " cmpl $-1, %eax\n" " je __afl_setup_abort\n" "\n" " /* Store the address of the SHM region. */\n" "\n" " movl %eax, __afl_area_ptr\n" " movl %eax, %edx\n" "\n" " popl %ecx\n" " popl %eax\n" "\n" "__afl_forkserver:\n" "\n" " /* Enter the fork server mode to avoid the overhead of execve() calls. */\n" "\n" " pushl %eax\n" " pushl %ecx\n" " pushl %edx\n" "\n" " /* Phone home and tell the parent that we're OK. (Note that signals with\n" " no SA_RESTART will mess it up). If this fails, assume that the fd is\n" " closed because we were execve()d from an instrumented binary, or because\n" " the parent doesn't want to use the fork server. */\n" "\n" " pushl $4 /* length */\n" " pushl $__afl_temp /* data */\n" " pushl $" STRINGIFY(FORKSRV_TO_FUZZER) " /* file desc */\n" " call write\n" " addl $12, %esp\n" "\n" " cmpl $4, %eax\n" " jne __afl_fork_resume\n" "\n" "__afl_fork_wait_loop:\n" "\n" " /* Wait for parent FORK_RUN command by reading from the pipe. Abort if read fails. */\n" "\n" " pushl $1 /* length */\n" " pushl $__afl_temp /* data */\n" " pushl $" STRINGIFY(FUZZER_TO_FORKSRV) " /* file desc */\n" " call read\n" " addl $12, %esp\n" "\n" " cmpl $1, %eax\n" " jne __afl_die\n" " movb __afl_temp, %al\n" " cmpb $" STRINGIFY(FORK_RUN) ", %al\n" " jne __afl_die\n" "\n" " /* Once woken up, create a clone of our process. This is an excellent use\n" " case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n" " caches getpid() results and offers no way to update the value, breaking\n" " abort(), raise(), and a bunch of other things :-( */\n" "\n" " call fork\n" "\n" " cmpl $0, %eax\n" " jl __afl_die\n" " je __afl_fork_resume\n" "\n" " /* In parent process: write PID to pipe, then wait for child. */\n" "\n" " movl %eax, __afl_fork_pid\n" "\n" " pushl $4 /* length */\n" " pushl $__afl_fork_pid /* data */\n" " pushl $" STRINGIFY(FORKSRV_TO_FUZZER) " /* file desc */\n" " call write\n" " addl $12, %esp\n" "\n" " /* Wait for the GET_STATUS command by reading from the pipe. Abort if read fails. */\n" "\n" " pushl $1 /* length */\n" " pushl $__afl_temp /* data */\n" " pushl $" STRINGIFY(FUZZER_TO_FORKSRV) " /* file desc */\n" " call read\n" " addl $12, %esp\n" "\n" " cmpl $1, %eax\n" " jne __afl_die\n" " movb __afl_temp, %al\n" " cmpb $" STRINGIFY(GET_STATUS) ", %al\n" " jne __afl_die\n" "\n" " pushl $0 /* no flags */\n" " pushl $__afl_temp /* status */\n" " pushl __afl_fork_pid /* PID */\n" " call waitpid\n" " addl $12, %esp\n" "\n" " cmpl $0, %eax\n" " jle __afl_die\n" "\n" " /* Relay wait status to pipe, then loop back. */\n" "\n" " pushl $4 /* length */\n" " pushl $__afl_temp /* data */\n" " pushl $" STRINGIFY(FORKSRV_TO_FUZZER) " /* file desc */\n" " call write\n" " addl $12, %esp\n" "\n" " jmp __afl_fork_wait_loop\n" "\n" "__afl_fork_resume:\n" "\n" " /* In child process: close fds, resume execution. */\n" "\n" " pushl $" STRINGIFY(FUZZER_TO_FORKSRV) "\n" " call close\n" "\n" " pushl $" STRINGIFY(FORKSRV_TO_FUZZER) "\n" " call close\n" "\n" " addl $8, %esp\n" "\n" " popl %edx\n" " popl %ecx\n" " popl %eax\n" " jmp __afl_store\n" "\n" "__afl_die:\n" "\n" " xorl %eax, %eax\n" " call _exit\n" "\n" "__afl_setup_abort:\n" "\n" " /* Record setup failure so that we don't keep calling\n" " shmget() / shmat() over and over again. */\n" "\n" " incb __afl_setup_failure\n" " popl %ecx\n" " popl %eax\n" " jmp __afl_return\n" "\n" ".AFL_VARS:\n" "\n" " .comm __afl_area_ptr, 4, 32\n" " .comm __afl_setup_failure, 1, 32\n" #ifndef COVERAGE_ONLY " .comm __afl_prev_loc, 4, 32\n" #endif /* !COVERAGE_ONLY */ " .comm __afl_fork_pid, 4, 32\n" " .comm __afl_temp, 4, 32\n" "\n" ".AFL_SHM_ENV:\n" " .asciz \"" SHM_ENV_VAR "\"\n" "\n" "/* --- END --- */\n" "\n"; /* The OpenBSD hack is due to lahf and sahf not being recognized by some versions of binutils: http://marc.info/?l=openbsd-cvs&m=141636589924400 The Apple code is a bit different when calling libc functions because they are doing relocations differently from everybody else. We also need to work around the crash issue with .lcomm and the fact that they don't recognize .string. */ #ifdef __APPLE__ # define CALL_L64(str) "call _" str "\n" #else # define CALL_L64(str) "call " str "@PLT\n" #endif /* ^__APPLE__ */ static const u8* main_payload_64 = "\n" "/* --- AFL MAIN PAYLOAD (64-BIT) --- */\n" "\n" ".text\n" ".att_syntax\n" ".code64\n" ".align 8\n" "\n" "__afl_maybe_log:\n" "\n" #if defined(__OpenBSD__) || (defined(__FreeBSD__) && (__FreeBSD__ < 9)) " .byte 0x9f /* lahf */\n" #else " lahf\n" #endif /* ^__OpenBSD__, etc */ " seto %al\n" "\n" " /* Check if SHM region is already mapped. */\n" "\n" " movq __afl_area_ptr(%rip), %rdx\n" " testq %rdx, %rdx\n" " je __afl_setup\n" "\n" "__afl_store:\n" "\n" " /* Calculate and store hit for the code location specified in rcx. */\n" "\n" #ifndef COVERAGE_ONLY " xorq __afl_prev_loc(%rip), %rcx\n" " xorq %rcx, __afl_prev_loc(%rip)\n" " shrq $1, __afl_prev_loc(%rip)\n" #endif /* ^!COVERAGE_ONLY */ "\n" #ifdef SKIP_COUNTS " orb $1, (%rdx, %rcx, 1)\n" #else " incb (%rdx, %rcx, 1)\n" #endif /* ^SKIP_COUNTS */ "\n" "__afl_return:\n" "\n" " addb $127, %al\n" #if defined(__OpenBSD__) || (defined(__FreeBSD__) && (__FreeBSD__ < 9)) " .byte 0x9e /* sahf */\n" #else " sahf\n" #endif /* ^__OpenBSD__, etc */ " ret\n" "\n" ".align 8\n" "\n" "__afl_setup:\n" "\n" " /* Do not retry setup if we had previous failures. */\n" "\n" " cmpb $0, __afl_setup_failure(%rip)\n" " jne __afl_return\n" "\n" " /* Check out if we have a global pointer on file. */\n" "\n" #ifndef __APPLE__ " movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n" " movq (%rdx), %rdx\n" #else " movq __afl_global_area_ptr(%rip), %rdx\n" #endif /* !^__APPLE__ */ " testq %rdx, %rdx\n" " je __afl_setup_first\n" "\n" " movq %rdx, __afl_area_ptr(%rip)\n" " jmp __afl_store\n" "\n" "__afl_setup_first:\n" "\n" " /* Save everything that is not yet saved and that may be touched by\n" " getenv() and several other libcalls we'll be relying on. */\n" "\n" " leaq -352(%rsp), %rsp\n" "\n" " movq %rax, 0(%rsp)\n" " movq %rcx, 8(%rsp)\n" " movq %rdi, 16(%rsp)\n" " movq %rsi, 32(%rsp)\n" " movq %r8, 40(%rsp)\n" " movq %r9, 48(%rsp)\n" " movq %r10, 56(%rsp)\n" " movq %r11, 64(%rsp)\n" "\n" " movq %xmm0, 96(%rsp)\n" " movq %xmm1, 112(%rsp)\n" " movq %xmm2, 128(%rsp)\n" " movq %xmm3, 144(%rsp)\n" " movq %xmm4, 160(%rsp)\n" " movq %xmm5, 176(%rsp)\n" " movq %xmm6, 192(%rsp)\n" " movq %xmm7, 208(%rsp)\n" " movq %xmm8, 224(%rsp)\n" " movq %xmm9, 240(%rsp)\n" " movq %xmm10, 256(%rsp)\n" " movq %xmm11, 272(%rsp)\n" " movq %xmm12, 288(%rsp)\n" " movq %xmm13, 304(%rsp)\n" " movq %xmm14, 320(%rsp)\n" " movq %xmm15, 336(%rsp)\n" "\n" " /* Map SHM, jumping to __afl_setup_abort if something goes wrong. */\n" "\n" " /* The 64-bit ABI requires 16-byte stack alignment. We'll keep the\n" " original stack ptr in the callee-saved r12. */\n" "\n" " pushq %r12\n" " movq %rsp, %r12\n" " subq $16, %rsp\n" " andq $0xfffffffffffffff0, %rsp\n" "\n" " leaq .AFL_SHM_ENV(%rip), %rdi\n" CALL_L64("getenv") "\n" " testq %rax, %rax\n" " je __afl_setup_abort\n" "\n" " movq %rax, %rdi\n" CALL_L64("atoi") "\n" " xorq %rdx, %rdx /* shmat flags */\n" " xorq %rsi, %rsi /* requested addr */\n" " movq %rax, %rdi /* SHM ID */\n" CALL_L64("shmat") "\n" " cmpq $-1, %rax\n" " je __afl_setup_abort\n" "\n" " /* Store the address of the SHM region. */\n" "\n" " movq %rax, %rdx\n" " movq %rax, __afl_area_ptr(%rip)\n" "\n" #ifdef __APPLE__ " movq %rax, __afl_global_area_ptr(%rip)\n" #else " movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n" " movq %rax, (%rdx)\n" #endif /* ^__APPLE__ */ " movq %rax, %rdx\n" "\n" "__afl_forkserver:\n" "\n" " /* Enter the fork server mode to avoid the overhead of execve() calls. We\n" " push rdx (area ptr) twice to keep stack alignment neat. */\n" "\n" " pushq %rdx\n" " pushq %rdx\n" "\n" " /* Phone home and tell the parent that we're OK. (Note that signals with\n" " no SA_RESTART will mess it up). If this fails, assume that the fd is\n" " closed because we were execve()d from an instrumented binary, or because\n" " the parent doesn't want to use the fork server. */\n" "\n" " movq $4, %rdx /* length */\n" " leaq __afl_temp(%rip), %rsi /* data */\n" " movq $" STRINGIFY(FORKSRV_TO_FUZZER) ", %rdi /* file desc */\n" CALL_L64("write") "\n" " cmpq $4, %rax\n" " jne __afl_fork_resume\n" "\n" "__afl_fork_wait_loop:\n" "\n" " /* Wait for parent FORK_RUN command by reading from the pipe. Abort if read fails. */\n" "\n" " movq $1, %rdx /* length */\n" " leaq __afl_temp(%rip), %rsi /* data */\n" " movq $" STRINGIFY(FUZZER_TO_FORKSRV) ", %rdi /* file desc */\n" CALL_L64("read") " cmpq $1, %rax\n" " jne __afl_die\n" " movb __afl_temp(%rip), %al\n" " cmpb $" STRINGIFY(FORK_RUN) ", %al\n" " jne __afl_die\n" "\n" " /* Once woken up, create a clone of our process. This is an excellent use\n" " case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n" " caches getpid() results and offers no way to update the value, breaking\n" " abort(), raise(), and a bunch of other things :-( */\n" "\n" CALL_L64("fork") " cmpq $0, %rax\n" " jl __afl_die\n" " je __afl_fork_resume\n" "\n" " /* In parent process: write PID to pipe, then wait for child. */\n" "\n" " movl %eax, __afl_fork_pid(%rip)\n" "\n" " movq $4, %rdx /* length */\n" " leaq __afl_fork_pid(%rip), %rsi /* data */\n" " movq $" STRINGIFY(FORKSRV_TO_FUZZER) ", %rdi /* file desc */\n" CALL_L64("write") "\n" " /* Wait for the GET_STATUS command by reading from the pipe. Abort if read fails. */\n" "\n" " movq $1, %rdx /* length */\n" " leaq __afl_temp(%rip), %rsi /* data */\n" " movq $" STRINGIFY(FUZZER_TO_FORKSRV) ", %rdi /* file desc */\n" CALL_L64("read") " cmpq $1, %rax\n" " jne __afl_die\n" " movb __afl_temp(%rip), %al\n" " cmpb $" STRINGIFY(GET_STATUS) ", %al\n" " jne __afl_die\n" "\n" " movq $0, %rdx /* no flags */\n" " leaq __afl_temp(%rip), %rsi /* status */\n" " movq __afl_fork_pid(%rip), %rdi /* PID */\n" CALL_L64("waitpid") " cmpq $0, %rax\n" " jle __afl_die\n" "\n" " /* Relay wait status to pipe, then loop back. */\n" "\n" " movq $4, %rdx /* length */\n" " leaq __afl_temp(%rip), %rsi /* data */\n" " movq $" STRINGIFY(FORKSRV_TO_FUZZER) ", %rdi /* file desc */\n" CALL_L64("write") "\n" " jmp __afl_fork_wait_loop\n" "\n" "__afl_fork_resume:\n" "\n" " /* In child process: close fds, resume execution. */\n" "\n" " movq $" STRINGIFY(FUZZER_TO_FORKSRV) ", %rdi\n" CALL_L64("close") "\n" " movq $" STRINGIFY(FORKSRV_TO_FUZZER) ", %rdi\n" CALL_L64("close") "\n" " popq %rdx\n" " popq %rdx\n" "\n" " movq %r12, %rsp\n" " popq %r12\n" "\n" " movq 0(%rsp), %rax\n" " movq 8(%rsp), %rcx\n" " movq 16(%rsp), %rdi\n" " movq 32(%rsp), %rsi\n" " movq 40(%rsp), %r8\n" " movq 48(%rsp), %r9\n" " movq 56(%rsp), %r10\n" " movq 64(%rsp), %r11\n" "\n" " movq 96(%rsp), %xmm0\n" " movq 112(%rsp), %xmm1\n" " movq 128(%rsp), %xmm2\n" " movq 144(%rsp), %xmm3\n" " movq 160(%rsp), %xmm4\n" " movq 176(%rsp), %xmm5\n" " movq 192(%rsp), %xmm6\n" " movq 208(%rsp), %xmm7\n" " movq 224(%rsp), %xmm8\n" " movq 240(%rsp), %xmm9\n" " movq 256(%rsp), %xmm10\n" " movq 272(%rsp), %xmm11\n" " movq 288(%rsp), %xmm12\n" " movq 304(%rsp), %xmm13\n" " movq 320(%rsp), %xmm14\n" " movq 336(%rsp), %xmm15\n" "\n" " leaq 352(%rsp), %rsp\n" "\n" " jmp __afl_store\n" "\n" "__afl_die:\n" "\n" " xorq %rax, %rax\n" CALL_L64("_exit") "\n" "__afl_setup_abort:\n" "\n" " /* Record setup failure so that we don't keep calling\n" " shmget() / shmat() over and over again. */\n" "\n" " incb __afl_setup_failure(%rip)\n" "\n" " movq %r12, %rsp\n" " popq %r12\n" "\n" " movq 0(%rsp), %rax\n" " movq 8(%rsp), %rcx\n" " movq 16(%rsp), %rdi\n" " movq 32(%rsp), %rsi\n" " movq 40(%rsp), %r8\n" " movq 48(%rsp), %r9\n" " movq 56(%rsp), %r10\n" " movq 64(%rsp), %r11\n" "\n" " movq 96(%rsp), %xmm0\n" " movq 112(%rsp), %xmm1\n" " movq 128(%rsp), %xmm2\n" " movq 144(%rsp), %xmm3\n" " movq 160(%rsp), %xmm4\n" " movq 176(%rsp), %xmm5\n" " movq 192(%rsp), %xmm6\n" " movq 208(%rsp), %xmm7\n" " movq 224(%rsp), %xmm8\n" " movq 240(%rsp), %xmm9\n" " movq 256(%rsp), %xmm10\n" " movq 272(%rsp), %xmm11\n" " movq 288(%rsp), %xmm12\n" " movq 304(%rsp), %xmm13\n" " movq 320(%rsp), %xmm14\n" " movq 336(%rsp), %xmm15\n" "\n" " leaq 352(%rsp), %rsp\n" "\n" " jmp __afl_return\n" "\n" ".AFL_VARS:\n" "\n" #ifdef __APPLE__ " .comm __afl_area_ptr, 8\n" #ifndef COVERAGE_ONLY " .comm __afl_prev_loc, 8\n" #endif /* !COVERAGE_ONLY */ " .comm __afl_fork_pid, 4\n" " .comm __afl_temp, 4\n" " .comm __afl_setup_failure, 1\n" #else " .lcomm __afl_area_ptr, 8\n" #ifndef COVERAGE_ONLY " .lcomm __afl_prev_loc, 8\n" #endif /* !COVERAGE_ONLY */ " .lcomm __afl_fork_pid, 4\n" " .lcomm __afl_temp, 4\n" " .lcomm __afl_setup_failure, 1\n" #endif /* ^__APPLE__ */ " .comm __afl_global_area_ptr, 8, 8\n" "\n" ".AFL_SHM_ENV:\n" " .asciz \"" SHM_ENV_VAR "\"\n" "\n" "/* --- END --- */\n" "\n"; #endif /* !_HAVE_AFL_AS_H */ ================================================ FILE: afl_progs/afl-gcc.c ================================================ /* american fuzzy lop - wrapper for GCC and clang ---------------------------------------------- Written and maintained by Michal Zalewski Copyright 2013, 2014, 2015 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 This program is a drop-in replacement for GCC or clang. The most common way of using it is to pass the path to afl-gcc or afl-clang via CC when invoking ./configure. (Of course, use CXX and point it to afl-g++ / afl-clang++ for C++ code.) The wrapper needs to know the path to afl-as (renamed to 'as'). The default is /usr/local/lib/afl/. A convenient way to specify alternative directories would be to set AFL_PATH. If AFL_HARDEN is set, the wrapper will compile the target app with various hardening options that may help detect memory management issues more reliably. You can also specify AFL_USE_ASAN to enable ASAN. If you want to call a non-default compiler as a next step of the chain, specify its location via AFL_CC or AFL_CXX. */ #define AFL_MAIN #include "config.h" #include "types.h" #include "debug.h" #include "alloc-inl.h" #include #include #include #include static u8* as_path; /* Path to the AFL 'as' wrapper */ static u8** cc_params; /* Parameters passed to the real CC */ static u32 cc_par_cnt = 1; /* Param count, including argv0 */ static u8 be_quiet, /* Quiet mode */ clang_mode; /* Invoked as afl-clang*? */ /* Try to find our "fake" GNU assembler in AFL_PATH or at the location derived from argv[0]. If that fails, abort. */ static void find_as(u8* argv0) { u8 *afl_path = getenv("AFL_PATH"); u8 *slash, *tmp; if (afl_path) { tmp = alloc_printf("%s/as", afl_path); if (!access(tmp, X_OK)) { as_path = afl_path; ck_free(tmp); return; } ck_free(tmp); } slash = strrchr(argv0, '/'); if (slash) { u8 *dir; *slash = 0; dir = ck_strdup(argv0); *slash = '/'; tmp = alloc_printf("%s/afl-as", dir); if (!access(tmp, X_OK)) { as_path = dir; ck_free(tmp); return; } ck_free(tmp); ck_free(dir); } if (!access(AFL_PATH "/as", X_OK)) { as_path = AFL_PATH; return; } FATAL("Unable to find AFL wrapper binary for 'as'. Please set AFL_PATH"); } /* Copy argv to cc_params, making the necessary edits. */ static void edit_params(u32 argc, char** argv) { u8 fortify_set = 0, asan_set = 0; u8 *name; #if defined(__FreeBSD__) && defined(__x86_64__) u8 m32_set = 0; #endif cc_params = ck_alloc((argc + 128) * sizeof(u8*)); name = strrchr(argv[0], '/'); if (!name) name = argv[0]; else name++; if (!strncmp(name, "afl-clang", 9)) { clang_mode = 1; setenv(CLANG_ENV_VAR, "1", 1); if (!strcmp(name, "afl-clang++")) { u8* alt_cxx = getenv("AFL_CXX"); cc_params[0] = alt_cxx ? alt_cxx : (u8*)"clang++"; } else { u8* alt_cc = getenv("AFL_CC"); cc_params[0] = alt_cc ? alt_cc : (u8*)"clang"; } } else { /* With GCJ and Eclipse installed, you can actually compile Java! The instrumentation will work (amazingly). Alas, unhandled exceptions do not call abort(), so afl-fuzz would need to be modified to equate non-zero exit codes with crash conditions when working with Java binaries. Meh. */ #ifdef __APPLE__ if (!strcmp(name, "afl-g++")) cc_params[0] = getenv("AFL_CXX"); else if (!strcmp(name, "afl-gcj")) cc_params[0] = getenv("AFL_GCJ"); else cc_params[0] = getenv("AFL_CC"); if (!cc_params[0]) { SAYF("\n" cLRD "[-] " cRST "On Apple systems, 'gcc' is usually just a wrapper for clang. Please use the\n" " 'afl-clang' utility instead of 'afl-gcc'. If you really have GCC installed,\n" " set AFL_CC or AFL_CXX to specify the correct path to that compiler.\n"); FATAL("AFL_CC or AFL_CXX required on MacOS X"); } #else if (!strcmp(name, "afl-g++")) { u8* alt_cxx = getenv("AFL_CXX"); cc_params[0] = alt_cxx ? alt_cxx : (u8*)"g++"; } else if (!strcmp(name, "afl-gcj")) { u8* alt_cc = getenv("AFL_GCJ"); cc_params[0] = alt_cc ? alt_cc : (u8*)"gcj"; } else { u8* alt_cc = getenv("AFL_CC"); cc_params[0] = alt_cc ? alt_cc : (u8*)"gcc"; } #endif /* __APPLE__ */ } while (--argc) { u8* cur = *(++argv); if (!strncmp(cur, "-B", 2)) { if (!be_quiet) WARNF("-B is already set, overriding"); if (!cur[2] && argc > 1) { argc--; argv++; } continue; } if (!strcmp(cur, "-integrated-as")) continue; if (!strcmp(cur, "-pipe")) continue; #if defined(__FreeBSD__) && defined(__x86_64__) if (!strcmp(cur, "-m32")) m32_set = 1; #endif if (!strcmp(cur, "-fsanitize=address") || !strcmp(cur, "-fsanitize=memory")) asan_set = 1; if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1; cc_params[cc_par_cnt++] = cur; } cc_params[cc_par_cnt++] = "-B"; cc_params[cc_par_cnt++] = as_path; if (clang_mode) cc_params[cc_par_cnt++] = "-no-integrated-as"; if (getenv("AFL_HARDEN")) { cc_params[cc_par_cnt++] = "-fstack-protector-all"; if (!fortify_set) cc_params[cc_par_cnt++] = "-D_FORTIFY_SOURCE=2"; } if (asan_set) { /* Pass this on to afl-as to adjust map density. */ setenv("AFL_USE_ASAN", "1", 1); } else if (getenv("AFL_USE_ASAN")) { if (getenv("AFL_USE_MSAN")) FATAL("ASAN and MSAN are mutually exclusive"); if (getenv("AFL_HARDEN")) FATAL("ASAN and AFL_HARDEN are mutually exclusive"); cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE"; cc_params[cc_par_cnt++] = "-fsanitize=address"; } else if (getenv("AFL_USE_MSAN")) { if (getenv("AFL_USE_ASAN")) FATAL("ASAN and MSAN are mutually exclusive"); if (getenv("AFL_HARDEN")) FATAL("MSAN and AFL_HARDEN are mutually exclusive"); cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE"; cc_params[cc_par_cnt++] = "-fsanitize=memory"; } if (!getenv("AFL_DONT_OPTIMIZE")) { #if defined(__FreeBSD__) && defined(__x86_64__) /* On 64-bit FreeBSD systems, clang -g -m32 is broken, but -m32 itself works OK. This has nothing to do with us, but let's avoid triggering that bug. */ if (!clang_mode || !m32_set) cc_params[cc_par_cnt++] = "-g"; #else cc_params[cc_par_cnt++] = "-g"; #endif cc_params[cc_par_cnt++] = "-O3"; cc_params[cc_par_cnt++] = "-funroll-loops"; /* Two indicators that you're building for fuzzing; one of them is AFL-specific, the other is shared with libfuzzer. */ cc_params[cc_par_cnt++] = "-D__AFL_COMPILER=1"; cc_params[cc_par_cnt++] = "-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"; } if (getenv("AFL_NO_BUILTIN")) { cc_params[cc_par_cnt++] = "-fno-builtin-strcmp"; cc_params[cc_par_cnt++] = "-fno-builtin-strncmp"; cc_params[cc_par_cnt++] = "-fno-builtin-strcasecmp"; cc_params[cc_par_cnt++] = "-fno-builtin-strncasecmp"; cc_params[cc_par_cnt++] = "-fno-builtin-memcmp"; cc_params[cc_par_cnt++] = "-fno-builtin-strstr"; cc_params[cc_par_cnt++] = "-fno-builtin-strcasestr"; } cc_params[cc_par_cnt] = NULL; } /* Main entry point */ int main(int argc, char** argv) { if (isatty(2) && !getenv("AFL_QUIET")) { SAYF(cCYA "afl-cc " cBRI VERSION cRST " by \n"); } else be_quiet = 1; if (argc < 2) { SAYF("\n" "This is a helper application for afl-fuzz. It serves as a drop-in replacement\n" "for gcc or clang, letting you recompile third-party code with the required\n" "runtime instrumentation. A common use pattern would be one of the following:\n\n" " CC=%s/afl-gcc ./configure\n" " CXX=%s/afl-g++ ./configure\n\n" "You can specify custom next-stage toolchain via AFL_CC, AFL_CXX, and AFL_AS.\n" "Setting AFL_HARDEN enables hardening optimizations in the compiled code.\n\n", BIN_PATH, BIN_PATH); exit(1); } find_as(argv[0]); edit_params(argc, argv); execvp(cc_params[0], (char**)cc_params); FATAL("Oops, failed to execute '%s' - check your PATH", cc_params[0]); return 0; } ================================================ FILE: afl_progs/afl-showmap.c ================================================ /* american fuzzy lop - map display utility ---------------------------------------- Written and maintained by Michal Zalewski Copyright 2013, 2014, 2015, 2016, 2017 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 A very simple tool that runs the targeted binary and displays the contents of the trace bitmap in a human-readable form. Useful in scripts to eliminate redundant inputs and perform other checks. Exit code is 2 if the target program crashes; 1 if it times out or there is a problem executing it; or 0 if execution is successful. */ #define AFL_MAIN #include "config.h" #include "types.h" #include "debug.h" #include "alloc-inl.h" #include "hash.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static s32 child_pid; /* PID of the tested program */ static u8* trace_bits; /* SHM with instrumentation bitmap */ static u8 *out_file, /* Trace output file */ *doc_path, /* Path to docs */ *target_path, /* Path to target binary */ *at_file; /* Substitution string for @@ */ static u32 exec_tmout; /* Exec timeout (ms) */ static u64 mem_limit = MEM_LIMIT; /* Memory limit (MB) */ static s32 shm_id; /* ID of the SHM region */ static u8 quiet_mode, /* Hide non-essential messages? */ edges_only, /* Ignore hit counts? */ cmin_mode, /* Generate output in afl-cmin mode? */ binary_mode, /* Write output as a binary map */ keep_cores; /* Allow coredumps? */ static volatile u8 stop_soon, /* Ctrl-C pressed? */ child_timed_out, /* Child timed out? */ child_crashed; /* Child crashed? */ /* Classify tuple counts. Instead of mapping to individual bits, as in afl-fuzz.c, we map to more user-friendly numbers between 1 and 8. */ static const u8 count_class_human[256] = { [0] = 0, [1] = 1, [2] = 2, [3] = 3, [4 ... 7] = 4, [8 ... 15] = 5, [16 ... 31] = 6, [32 ... 127] = 7, [128 ... 255] = 8 }; static const u8 count_class_binary[256] = { [0] = 0, [1] = 1, [2] = 2, [3] = 4, [4 ... 7] = 8, [8 ... 15] = 16, [16 ... 31] = 32, [32 ... 127] = 64, [128 ... 255] = 128 }; static void classify_counts(u8* mem, const u8* map) { u32 i = MAP_SIZE; if (edges_only) { while (i--) { if (*mem) *mem = 1; mem++; } } else { while (i--) { *mem = map[*mem]; mem++; } } } /* Get rid of shared memory (atexit handler). */ static void remove_shm(void) { shmctl(shm_id, IPC_RMID, NULL); } /* Configure shared memory. */ static void setup_shm(void) { u8* shm_str; shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600); if (shm_id < 0) PFATAL("shmget() failed"); atexit(remove_shm); shm_str = alloc_printf("%d", shm_id); setenv(SHM_ENV_VAR, shm_str, 1); ck_free(shm_str); trace_bits = shmat(shm_id, NULL, 0); if (!trace_bits) PFATAL("shmat() failed"); } /* Write results. */ static u32 write_results(void) { s32 fd; u32 i, ret = 0; u8 cco = !!getenv("AFL_CMIN_CRASHES_ONLY"), caa = !!getenv("AFL_CMIN_ALLOW_ANY"); if (!strncmp(out_file, "/dev/", 5)) { fd = open(out_file, O_WRONLY, 0600); if (fd < 0) PFATAL("Unable to open '%s'", out_file); } else if (!strcmp(out_file, "-")) { fd = dup(1); if (fd < 0) PFATAL("Unable to open stdout"); } else { unlink(out_file); /* Ignore errors */ fd = open(out_file, O_WRONLY | O_CREAT | O_EXCL, 0600); if (fd < 0) PFATAL("Unable to create '%s'", out_file); } if (binary_mode) { for (i = 0; i < MAP_SIZE; i++) if (trace_bits[i]) ret++; ck_write(fd, trace_bits, MAP_SIZE, out_file); close(fd); } else { FILE* f = fdopen(fd, "w"); if (!f) PFATAL("fdopen() failed"); for (i = 0; i < MAP_SIZE; i++) { if (!trace_bits[i]) continue; ret++; if (cmin_mode) { if (child_timed_out) break; if (!caa && child_crashed != cco) break; fprintf(f, "%u%u\n", trace_bits[i], i); } else fprintf(f, "%06u:%u\n", i, trace_bits[i]); } fclose(f); } return ret; } /* Handle timeout signal. */ static void handle_timeout(int sig) { child_timed_out = 1; if (child_pid > 0) kill(child_pid, SIGKILL); } /* Execute target application. */ static void run_target(char** argv) { static struct itimerval it; int status = 0; if (!quiet_mode) SAYF("-- Program output begins --\n" cRST); MEM_BARRIER(); child_pid = fork(); if (child_pid < 0) PFATAL("fork() failed"); if (!child_pid) { struct rlimit r; if (quiet_mode) { s32 fd = open("/dev/null", O_RDWR); if (fd < 0 || dup2(fd, 1) < 0 || dup2(fd, 2) < 0) { *(u32*)trace_bits = EXEC_FAIL_SIG; PFATAL("Descriptor initialization failed"); } close(fd); } if (mem_limit) { r.rlim_max = r.rlim_cur = ((rlim_t)mem_limit) << 20; #ifdef RLIMIT_AS setrlimit(RLIMIT_AS, &r); /* Ignore errors */ #else setrlimit(RLIMIT_DATA, &r); /* Ignore errors */ #endif /* ^RLIMIT_AS */ } if (!keep_cores) r.rlim_max = r.rlim_cur = 0; else r.rlim_max = r.rlim_cur = RLIM_INFINITY; setrlimit(RLIMIT_CORE, &r); /* Ignore errors */ if (!getenv("LD_BIND_LAZY")) setenv("LD_BIND_NOW", "1", 0); setsid(); execv(target_path, argv); *(u32*)trace_bits = EXEC_FAIL_SIG; exit(0); } /* Configure timeout, wait for child, cancel timeout. */ if (exec_tmout) { child_timed_out = 0; it.it_value.tv_sec = (exec_tmout / 1000); it.it_value.tv_usec = (exec_tmout % 1000) * 1000; } setitimer(ITIMER_REAL, &it, NULL); if (waitpid(child_pid, &status, 0) <= 0) FATAL("waitpid() failed"); child_pid = 0; it.it_value.tv_sec = 0; it.it_value.tv_usec = 0; setitimer(ITIMER_REAL, &it, NULL); MEM_BARRIER(); /* Clean up bitmap, analyze exit condition, etc. */ if (*(u32*)trace_bits == EXEC_FAIL_SIG) FATAL("Unable to execute '%s'", argv[0]); classify_counts(trace_bits, binary_mode ? count_class_binary : count_class_human); if (!quiet_mode) SAYF(cRST "-- Program output ends --\n"); if (!child_timed_out && !stop_soon && WIFSIGNALED(status)) child_crashed = 1; if (!quiet_mode) { if (child_timed_out) SAYF(cLRD "\n+++ Program timed off +++\n" cRST); else if (stop_soon) SAYF(cLRD "\n+++ Program aborted by user +++\n" cRST); else if (child_crashed) SAYF(cLRD "\n+++ Program killed by signal %u +++\n" cRST, WTERMSIG(status)); } } /* Handle Ctrl-C and the like. */ static void handle_stop_sig(int sig) { stop_soon = 1; if (child_pid > 0) kill(child_pid, SIGKILL); } /* Do basic preparations - persistent fds, filenames, etc. */ static void set_up_environment(void) { setenv("ASAN_OPTIONS", "abort_on_error=1:" "detect_leaks=0:" "symbolize=0:" "allocator_may_return_null=1", 0); setenv("MSAN_OPTIONS", "exit_code=" STRINGIFY(MSAN_ERROR) ":" "symbolize=0:" "abort_on_error=1:" "allocator_may_return_null=1:" "msan_track_origins=0", 0); if (getenv("AFL_PRELOAD")) { setenv("LD_PRELOAD", getenv("AFL_PRELOAD"), 1); setenv("DYLD_INSERT_LIBRARIES", getenv("AFL_PRELOAD"), 1); } } /* Setup signal handlers, duh. */ static void setup_signal_handlers(void) { struct sigaction sa; sa.sa_handler = NULL; sa.sa_flags = SA_RESTART; sa.sa_sigaction = NULL; sigemptyset(&sa.sa_mask); /* Various ways of saying "stop". */ sa.sa_handler = handle_stop_sig; sigaction(SIGHUP, &sa, NULL); sigaction(SIGINT, &sa, NULL); sigaction(SIGTERM, &sa, NULL); /* Exec timeout notifications. */ sa.sa_handler = handle_timeout; sigaction(SIGALRM, &sa, NULL); } /* Detect @@ in args. */ static void detect_file_args(char** argv) { u32 i = 0; u8* cwd = getcwd(NULL, 0); if (!cwd) PFATAL("getcwd() failed"); while (argv[i]) { u8* aa_loc = strstr(argv[i], "@@"); if (aa_loc) { u8 *aa_subst, *n_arg; if (!at_file) FATAL("@@ syntax is not supported by this tool."); /* Be sure that we're always using fully-qualified paths. */ if (at_file[0] == '/') aa_subst = at_file; else aa_subst = alloc_printf("%s/%s", cwd, at_file); /* Construct a replacement argv value. */ *aa_loc = 0; n_arg = alloc_printf("%s%s%s", argv[i], aa_subst, aa_loc + 2); argv[i] = n_arg; *aa_loc = '@'; if (at_file[0] != '/') ck_free(aa_subst); } i++; } free(cwd); /* not tracked */ } /* Show banner. */ static void show_banner(void) { SAYF(cCYA "afl-showmap " cBRI VERSION cRST " by \n"); } /* Display usage hints. */ static void usage(u8* argv0) { show_banner(); SAYF("\n%s [ options ] -- /path/to/target_app [ ... ]\n\n" "Required parameters:\n\n" " -o file - file to write the trace data to\n\n" "Execution control settings:\n\n" " -t msec - timeout for each run (none)\n" " -m megs - memory limit for child process (%u MB)\n" " -Q - use binary-only instrumentation (QEMU mode)\n\n" "Other settings:\n\n" " -q - sink program's output and don't show messages\n" " -e - show edge coverage only, ignore hit counts\n" " -c - allow core dumps\n\n" "This tool displays raw tuple data captured by AFL instrumentation.\n" "For additional help, consult %s/README.\n\n" cRST, argv0, MEM_LIMIT, doc_path); exit(1); } /* Find binary. */ static void find_binary(u8* fname) { u8* env_path = 0; struct stat st; if (strchr(fname, '/') || !(env_path = getenv("PATH"))) { target_path = ck_strdup(fname); if (stat(target_path, &st) || !S_ISREG(st.st_mode) || !(st.st_mode & 0111) || st.st_size < 4) FATAL("Program '%s' not found or not executable", fname); } else { while (env_path) { u8 *cur_elem, *delim = strchr(env_path, ':'); if (delim) { cur_elem = ck_alloc(delim - env_path + 1); memcpy(cur_elem, env_path, delim - env_path); delim++; } else cur_elem = ck_strdup(env_path); env_path = delim; if (cur_elem[0]) target_path = alloc_printf("%s/%s", cur_elem, fname); else target_path = ck_strdup(fname); ck_free(cur_elem); if (!stat(target_path, &st) && S_ISREG(st.st_mode) && (st.st_mode & 0111) && st.st_size >= 4) break; ck_free(target_path); target_path = 0; } if (!target_path) FATAL("Program '%s' not found or not executable", fname); } } /* Fix up argv for QEMU. */ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) { char** new_argv = ck_alloc(sizeof(char*) * (argc + 4)); u8 *tmp, *cp, *rsl, *own_copy; /* Workaround for a QEMU stability glitch. */ setenv("QEMU_LOG", "nochain", 1); memcpy(new_argv + 3, argv + 1, sizeof(char*) * argc); new_argv[2] = target_path; new_argv[1] = "--"; /* Now we need to actually find qemu for argv[0]. */ tmp = getenv("AFL_PATH"); if (tmp) { cp = alloc_printf("%s/afl-qemu-trace", tmp); if (access(cp, X_OK)) FATAL("Unable to find '%s'", tmp); target_path = new_argv[0] = cp; return new_argv; } own_copy = ck_strdup(own_loc); rsl = strrchr(own_copy, '/'); if (rsl) { *rsl = 0; cp = alloc_printf("%s/afl-qemu-trace", own_copy); ck_free(own_copy); if (!access(cp, X_OK)) { target_path = new_argv[0] = cp; return new_argv; } } else ck_free(own_copy); if (!access(BIN_PATH "/afl-qemu-trace", X_OK)) { target_path = new_argv[0] = BIN_PATH "/afl-qemu-trace"; return new_argv; } FATAL("Unable to find 'afl-qemu-trace'."); } /* Main entry point */ int main(int argc, char** argv) { s32 opt; u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0; u32 tcnt; char** use_argv; doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH; while ((opt = getopt(argc,argv,"+o:m:t:A:eqZQbc")) > 0) switch (opt) { case 'o': if (out_file) FATAL("Multiple -o options not supported"); out_file = optarg; break; case 'm': { u8 suffix = 'M'; if (mem_limit_given) FATAL("Multiple -m options not supported"); mem_limit_given = 1; if (!strcmp(optarg, "none")) { mem_limit = 0; break; } if (sscanf(optarg, "%llu%c", &mem_limit, &suffix) < 1 || optarg[0] == '-') FATAL("Bad syntax used for -m"); switch (suffix) { case 'T': mem_limit *= 1024 * 1024; break; case 'G': mem_limit *= 1024; break; case 'k': mem_limit /= 1024; break; case 'M': break; default: FATAL("Unsupported suffix or bad syntax for -m"); } if (mem_limit < 5) FATAL("Dangerously low value of -m"); if (sizeof(rlim_t) == 4 && mem_limit > 2000) FATAL("Value of -m out of range on 32-bit systems"); } break; case 't': if (timeout_given) FATAL("Multiple -t options not supported"); timeout_given = 1; if (strcmp(optarg, "none")) { exec_tmout = atoi(optarg); if (exec_tmout < 20 || optarg[0] == '-') FATAL("Dangerously low value of -t"); } break; case 'e': if (edges_only) FATAL("Multiple -e options not supported"); edges_only = 1; break; case 'q': if (quiet_mode) FATAL("Multiple -q options not supported"); quiet_mode = 1; break; case 'Z': /* This is an undocumented option to write data in the syntax expected by afl-cmin. Nobody else should have any use for this. */ cmin_mode = 1; quiet_mode = 1; break; case 'A': /* Another afl-cmin specific feature. */ at_file = optarg; break; case 'Q': if (qemu_mode) FATAL("Multiple -Q options not supported"); if (!mem_limit_given) mem_limit = MEM_LIMIT_QEMU; qemu_mode = 1; break; case 'b': /* Secret undocumented mode. Writes output in raw binary format similar to that dumped by afl-fuzz in Copyright 2013, 2014, 2015 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 This allocator is not designed to resist malicious attackers (the canaries are small and predictable), but provides a robust and portable way to detect use-after-free, off-by-one writes, stale pointers, and so on. */ #ifndef _HAVE_ALLOC_INL_H #define _HAVE_ALLOC_INL_H #include #include #include #include "config.h" #include "types.h" #include "debug.h" /* User-facing macro to sprintf() to a dynamically allocated buffer. */ #define alloc_printf(_str...) ({ \ u8* _tmp; \ s32 _len = snprintf(NULL, 0, _str); \ if (_len < 0) FATAL("Whoa, snprintf() fails?!"); \ _tmp = ck_alloc(_len + 1); \ snprintf((char*)_tmp, _len + 1, _str); \ _tmp; \ }) /* Macro to enforce allocation limits as a last-resort defense against integer overflows. */ #define ALLOC_CHECK_SIZE(_s) do { \ if ((_s) > MAX_ALLOC) \ ABORT("Bad alloc request: %u bytes", (_s)); \ } while (0) /* Macro to check malloc() failures and the like. */ #define ALLOC_CHECK_RESULT(_r, _s) do { \ if (!(_r)) \ ABORT("Out of memory: can't allocate %u bytes", (_s)); \ } while (0) /* Magic tokens used to mark used / freed chunks. */ #define ALLOC_MAGIC_C1 0xFF00FF00 /* Used head (dword) */ #define ALLOC_MAGIC_F 0xFE00FE00 /* Freed head (dword) */ #define ALLOC_MAGIC_C2 0xF0 /* Used tail (byte) */ /* Positions of guard tokens in relation to the user-visible pointer. */ #define ALLOC_C1(_ptr) (((u32*)(_ptr))[-2]) #define ALLOC_S(_ptr) (((u32*)(_ptr))[-1]) #define ALLOC_C2(_ptr) (((u8*)(_ptr))[ALLOC_S(_ptr)]) #define ALLOC_OFF_HEAD 8 #define ALLOC_OFF_TOTAL (ALLOC_OFF_HEAD + 1) /* Allocator increments for ck_realloc_block(). */ #define ALLOC_BLK_INC 256 /* Sanity-checking macros for pointers. */ #define CHECK_PTR(_p) do { \ if (_p) { \ if (ALLOC_C1(_p) ^ ALLOC_MAGIC_C1) {\ if (ALLOC_C1(_p) == ALLOC_MAGIC_F) \ ABORT("Use after free."); \ else ABORT("Corrupted head alloc canary."); \ } \ if (ALLOC_C2(_p) ^ ALLOC_MAGIC_C2) \ ABORT("Corrupted tail alloc canary."); \ } \ } while (0) #define CHECK_PTR_EXPR(_p) ({ \ typeof (_p) _tmp = (_p); \ CHECK_PTR(_tmp); \ _tmp; \ }) /* Allocate a buffer, explicitly not zeroing it. Returns NULL for zero-sized requests. */ static inline void* DFL_ck_alloc_nozero(u32 size) { void* ret; if (!size) return NULL; ALLOC_CHECK_SIZE(size); ret = malloc(size + ALLOC_OFF_TOTAL); ALLOC_CHECK_RESULT(ret, size); ret += ALLOC_OFF_HEAD; ALLOC_C1(ret) = ALLOC_MAGIC_C1; ALLOC_S(ret) = size; ALLOC_C2(ret) = ALLOC_MAGIC_C2; return ret; } /* Allocate a buffer, returning zeroed memory. */ static inline void* DFL_ck_alloc(u32 size) { void* mem; if (!size) return NULL; mem = DFL_ck_alloc_nozero(size); return memset(mem, 0, size); } /* Free memory, checking for double free and corrupted heap. When DEBUG_BUILD is set, the old memory will be also clobbered with 0xFF. */ static inline void DFL_ck_free(void* mem) { if (!mem) return; CHECK_PTR(mem); #ifdef DEBUG_BUILD /* Catch pointer issues sooner. */ memset(mem, 0xFF, ALLOC_S(mem)); #endif /* DEBUG_BUILD */ ALLOC_C1(mem) = ALLOC_MAGIC_F; free(mem - ALLOC_OFF_HEAD); } /* Re-allocate a buffer, checking for issues and zeroing any newly-added tail. With DEBUG_BUILD, the buffer is always reallocated to a new addresses and the old memory is clobbered with 0xFF. */ static inline void* DFL_ck_realloc(void* orig, u32 size) { void* ret; u32 old_size = 0; if (!size) { DFL_ck_free(orig); return NULL; } if (orig) { CHECK_PTR(orig); #ifndef DEBUG_BUILD ALLOC_C1(orig) = ALLOC_MAGIC_F; #endif /* !DEBUG_BUILD */ old_size = ALLOC_S(orig); orig -= ALLOC_OFF_HEAD; ALLOC_CHECK_SIZE(old_size); } ALLOC_CHECK_SIZE(size); #ifndef DEBUG_BUILD ret = realloc(orig, size + ALLOC_OFF_TOTAL); ALLOC_CHECK_RESULT(ret, size); #else /* Catch pointer issues sooner: force relocation and make sure that the original buffer is wiped. */ ret = malloc(size + ALLOC_OFF_TOTAL); ALLOC_CHECK_RESULT(ret, size); if (orig) { memcpy(ret + ALLOC_OFF_HEAD, orig + ALLOC_OFF_HEAD, MIN(size, old_size)); memset(orig + ALLOC_OFF_HEAD, 0xFF, old_size); ALLOC_C1(orig + ALLOC_OFF_HEAD) = ALLOC_MAGIC_F; free(orig); } #endif /* ^!DEBUG_BUILD */ ret += ALLOC_OFF_HEAD; ALLOC_C1(ret) = ALLOC_MAGIC_C1; ALLOC_S(ret) = size; ALLOC_C2(ret) = ALLOC_MAGIC_C2; if (size > old_size) memset(ret + old_size, 0, size - old_size); return ret; } /* Re-allocate a buffer with ALLOC_BLK_INC increments (used to speed up repeated small reallocs without complicating the user code). */ static inline void* DFL_ck_realloc_block(void* orig, u32 size) { #ifndef DEBUG_BUILD if (orig) { CHECK_PTR(orig); if (ALLOC_S(orig) >= size) return orig; size += ALLOC_BLK_INC; } #endif /* !DEBUG_BUILD */ return DFL_ck_realloc(orig, size); } /* Create a buffer with a copy of a string. Returns NULL for NULL inputs. */ static inline u8* DFL_ck_strdup(u8* str) { void* ret; u32 size; if (!str) return NULL; size = strlen((char*)str) + 1; ALLOC_CHECK_SIZE(size); ret = malloc(size + ALLOC_OFF_TOTAL); ALLOC_CHECK_RESULT(ret, size); ret += ALLOC_OFF_HEAD; ALLOC_C1(ret) = ALLOC_MAGIC_C1; ALLOC_S(ret) = size; ALLOC_C2(ret) = ALLOC_MAGIC_C2; return memcpy(ret, str, size); } /* Create a buffer with a copy of a memory block. Returns NULL for zero-sized or NULL inputs. */ static inline void* DFL_ck_memdup(void* mem, u32 size) { void* ret; if (!mem || !size) return NULL; ALLOC_CHECK_SIZE(size); ret = malloc(size + ALLOC_OFF_TOTAL); ALLOC_CHECK_RESULT(ret, size); ret += ALLOC_OFF_HEAD; ALLOC_C1(ret) = ALLOC_MAGIC_C1; ALLOC_S(ret) = size; ALLOC_C2(ret) = ALLOC_MAGIC_C2; return memcpy(ret, mem, size); } /* Create a buffer with a block of text, appending a NUL terminator at the end. Returns NULL for zero-sized or NULL inputs. */ static inline u8* DFL_ck_memdup_str(u8* mem, u32 size) { u8* ret; if (!mem || !size) return NULL; ALLOC_CHECK_SIZE(size); ret = malloc(size + ALLOC_OFF_TOTAL + 1); ALLOC_CHECK_RESULT(ret, size); ret += ALLOC_OFF_HEAD; ALLOC_C1(ret) = ALLOC_MAGIC_C1; ALLOC_S(ret) = size; ALLOC_C2(ret) = ALLOC_MAGIC_C2; memcpy(ret, mem, size); ret[size] = 0; return ret; } #ifndef DEBUG_BUILD /* In non-debug mode, we just do straightforward aliasing of the above functions to user-visible names such as ck_alloc(). */ #define ck_alloc DFL_ck_alloc #define ck_alloc_nozero DFL_ck_alloc_nozero #define ck_realloc DFL_ck_realloc #define ck_realloc_block DFL_ck_realloc_block #define ck_strdup DFL_ck_strdup #define ck_memdup DFL_ck_memdup #define ck_memdup_str DFL_ck_memdup_str #define ck_free DFL_ck_free #define alloc_report() #else /* In debugging mode, we also track allocations to detect memory leaks, and the flow goes through one more layer of indirection. */ /* Alloc tracking data structures: */ #define ALLOC_BUCKETS 4096 struct TRK_obj { void *ptr; char *file, *func; u32 line; }; #ifdef AFL_MAIN struct TRK_obj* TRK[ALLOC_BUCKETS]; u32 TRK_cnt[ALLOC_BUCKETS]; # define alloc_report() TRK_report() #else extern struct TRK_obj* TRK[ALLOC_BUCKETS]; extern u32 TRK_cnt[ALLOC_BUCKETS]; # define alloc_report() #endif /* ^AFL_MAIN */ /* Bucket-assigning function for a given pointer: */ #define TRKH(_ptr) (((((u32)(_ptr)) >> 16) ^ ((u32)(_ptr))) % ALLOC_BUCKETS) /* Add a new entry to the list of allocated objects. */ static inline void TRK_alloc_buf(void* ptr, const char* file, const char* func, u32 line) { u32 i, bucket; if (!ptr) return; bucket = TRKH(ptr); /* Find a free slot in the list of entries for that bucket. */ for (i = 0; i < TRK_cnt[bucket]; i++) if (!TRK[bucket][i].ptr) { TRK[bucket][i].ptr = ptr; TRK[bucket][i].file = (char*)file; TRK[bucket][i].func = (char*)func; TRK[bucket][i].line = line; return; } /* No space available - allocate more. */ TRK[bucket] = DFL_ck_realloc_block(TRK[bucket], (TRK_cnt[bucket] + 1) * sizeof(struct TRK_obj)); TRK[bucket][i].ptr = ptr; TRK[bucket][i].file = (char*)file; TRK[bucket][i].func = (char*)func; TRK[bucket][i].line = line; TRK_cnt[bucket]++; } /* Remove entry from the list of allocated objects. */ static inline void TRK_free_buf(void* ptr, const char* file, const char* func, u32 line) { u32 i, bucket; if (!ptr) return; bucket = TRKH(ptr); /* Find the element on the list... */ for (i = 0; i < TRK_cnt[bucket]; i++) if (TRK[bucket][i].ptr == ptr) { TRK[bucket][i].ptr = 0; return; } WARNF("ALLOC: Attempt to free non-allocated memory in %s (%s:%u)", func, file, line); } /* Do a final report on all non-deallocated objects. */ static inline void TRK_report(void) { u32 i, bucket; fflush(0); for (bucket = 0; bucket < ALLOC_BUCKETS; bucket++) for (i = 0; i < TRK_cnt[bucket]; i++) if (TRK[bucket][i].ptr) WARNF("ALLOC: Memory never freed, created in %s (%s:%u)", TRK[bucket][i].func, TRK[bucket][i].file, TRK[bucket][i].line); } /* Simple wrappers for non-debugging functions: */ static inline void* TRK_ck_alloc(u32 size, const char* file, const char* func, u32 line) { void* ret = DFL_ck_alloc(size); TRK_alloc_buf(ret, file, func, line); return ret; } static inline void* TRK_ck_realloc(void* orig, u32 size, const char* file, const char* func, u32 line) { void* ret = DFL_ck_realloc(orig, size); TRK_free_buf(orig, file, func, line); TRK_alloc_buf(ret, file, func, line); return ret; } static inline void* TRK_ck_realloc_block(void* orig, u32 size, const char* file, const char* func, u32 line) { void* ret = DFL_ck_realloc_block(orig, size); TRK_free_buf(orig, file, func, line); TRK_alloc_buf(ret, file, func, line); return ret; } static inline void* TRK_ck_strdup(u8* str, const char* file, const char* func, u32 line) { void* ret = DFL_ck_strdup(str); TRK_alloc_buf(ret, file, func, line); return ret; } static inline void* TRK_ck_memdup(void* mem, u32 size, const char* file, const char* func, u32 line) { void* ret = DFL_ck_memdup(mem, size); TRK_alloc_buf(ret, file, func, line); return ret; } static inline void* TRK_ck_memdup_str(void* mem, u32 size, const char* file, const char* func, u32 line) { void* ret = DFL_ck_memdup_str(mem, size); TRK_alloc_buf(ret, file, func, line); return ret; } static inline void TRK_ck_free(void* ptr, const char* file, const char* func, u32 line) { TRK_free_buf(ptr, file, func, line); DFL_ck_free(ptr); } /* Aliasing user-facing names to tracking functions: */ #define ck_alloc(_p1) \ TRK_ck_alloc(_p1, __FILE__, __FUNCTION__, __LINE__) #define ck_alloc_nozero(_p1) \ TRK_ck_alloc(_p1, __FILE__, __FUNCTION__, __LINE__) #define ck_realloc(_p1, _p2) \ TRK_ck_realloc(_p1, _p2, __FILE__, __FUNCTION__, __LINE__) #define ck_realloc_block(_p1, _p2) \ TRK_ck_realloc_block(_p1, _p2, __FILE__, __FUNCTION__, __LINE__) #define ck_strdup(_p1) \ TRK_ck_strdup(_p1, __FILE__, __FUNCTION__, __LINE__) #define ck_memdup(_p1, _p2) \ TRK_ck_memdup(_p1, _p2, __FILE__, __FUNCTION__, __LINE__) #define ck_memdup_str(_p1, _p2) \ TRK_ck_memdup_str(_p1, _p2, __FILE__, __FUNCTION__, __LINE__) #define ck_free(_p1) \ TRK_ck_free(_p1, __FILE__, __FUNCTION__, __LINE__) #endif /* ^!DEBUG_BUILD */ #endif /* ! _HAVE_ALLOC_INL_H */ ================================================ FILE: afl_progs/config.h ================================================ /* american fuzzy lop - vaguely configurable bits ---------------------------------------------- Written and maintained by Michal Zalewski Copyright 2013, 2014, 2015, 2016 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 */ #ifndef _HAVE_CONFIG_H #define _HAVE_CONFIG_H #include "types.h" /* Version string: */ #define VERSION "2.52b" /****************************************************** * * * Settings that may be of interest to power users: * * * ******************************************************/ /* Comment out to disable terminal colors (note that this makes afl-analyze a lot less nice): */ #define USE_COLOR /* Comment out to disable fancy ANSI boxes and use poor man's 7-bit UI: */ #define FANCY_BOXES /* Default timeout for fuzzed code (milliseconds). This is the upper bound, also used for detecting hangs; the actual value is auto-scaled: */ #define EXEC_TIMEOUT 1000 /* Timeout rounding factor when auto-scaling (milliseconds): */ #define EXEC_TM_ROUND 20 /* Default memory limit for child process (MB): */ #ifndef __x86_64__ # define MEM_LIMIT 25 #else # define MEM_LIMIT 50 #endif /* ^!__x86_64__ */ /* Default memory limit when running in QEMU mode (MB): */ #define MEM_LIMIT_QEMU 200 /* Number of calibration cycles per every new test case (and for test cases that show variable behavior): */ #define CAL_CYCLES 8 #define CAL_CYCLES_LONG 40 /* Number of subsequent timeouts before abandoning an input file: */ #define TMOUT_LIMIT 250 /* Maximum number of unique hangs or crashes to record: */ #define KEEP_UNIQUE_HANG 500 #define KEEP_UNIQUE_CRASH 5000 /* Baseline number of random tweaks during a single 'havoc' stage: */ #define HAVOC_CYCLES 256 #define HAVOC_CYCLES_INIT 1024 /* Maximum multiplier for the above (should be a power of two, beware of 32-bit int overflows): */ #define HAVOC_MAX_MULT 16 /* Absolute minimum number of havoc cycles (after all adjustments): */ #define HAVOC_MIN 16 /* Maximum stacking for havoc-stage tweaks. The actual value is calculated like this: n = random between 1 and HAVOC_STACK_POW2 stacking = 2^n In other words, the default (n = 7) produces 2, 4, 8, 16, 32, 64, or 128 stacked tweaks: */ #define HAVOC_STACK_POW2 7 /* Caps on block sizes for cloning and deletion operations. Each of these ranges has a 33% probability of getting picked, except for the first two cycles where smaller blocks are favored: */ #define HAVOC_BLK_SMALL 32 #define HAVOC_BLK_MEDIUM 128 #define HAVOC_BLK_LARGE 1500 /* Extra-large blocks, selected very rarely (<5% of the time): */ #define HAVOC_BLK_XL 32768 /* Probabilities of skipping non-favored entries in the queue, expressed as percentages: */ #define SKIP_TO_NEW_PROB 99 /* ...when there are new, pending favorites */ #define SKIP_NFAV_OLD_PROB 95 /* ...no new favs, cur entry already fuzzed */ #define SKIP_NFAV_NEW_PROB 75 /* ...no new favs, cur entry not fuzzed yet */ /* Splicing cycle count: */ #define SPLICE_CYCLES 15 /* Nominal per-splice havoc cycle length: */ #define SPLICE_HAVOC 32 /* Maximum offset for integer addition / subtraction stages: */ #define ARITH_MAX 35 /* Limits for the test case trimmer. The absolute minimum chunk size; and the starting and ending divisors for chopping up the input file: */ #define TRIM_MIN_BYTES 4 #define TRIM_START_STEPS 16 #define TRIM_END_STEPS 1024 /* Maximum size of input file, in bytes (keep under 100MB): */ #define MAX_FILE (1 * 1024 * 1024) /* The same, for the test case minimizer: */ #define TMIN_MAX_FILE (10 * 1024 * 1024) /* Block normalization steps for afl-tmin: */ #define TMIN_SET_MIN_SIZE 4 #define TMIN_SET_STEPS 128 /* Maximum dictionary token size (-x), in bytes: */ #define MAX_DICT_FILE 128 /* Length limits for auto-detected dictionary tokens: */ #define MIN_AUTO_EXTRA 3 #define MAX_AUTO_EXTRA 32 /* Maximum number of user-specified dictionary tokens to use in deterministic steps; past this point, the "extras/user" step will be still carried out, but with proportionally lower odds: */ #define MAX_DET_EXTRAS 200 /* Maximum number of auto-extracted dictionary tokens to actually use in fuzzing (first value), and to keep in memory as candidates. The latter should be much higher than the former. */ #define USE_AUTO_EXTRAS 50 #define MAX_AUTO_EXTRAS (USE_AUTO_EXTRAS * 10) /* Scaling factor for the effector map used to skip some of the more expensive deterministic steps. The actual divisor is set to 2^EFF_MAP_SCALE2 bytes: */ #define EFF_MAP_SCALE2 3 /* Minimum input file length at which the effector logic kicks in: */ #define EFF_MIN_LEN 128 /* Maximum effector density past which everything is just fuzzed unconditionally (%): */ #define EFF_MAX_PERC 90 /* UI refresh frequency (Hz): */ #define UI_TARGET_HZ 5 /* Fuzzer stats file and plot update intervals (sec): */ #define STATS_UPDATE_SEC 60 #define PLOT_UPDATE_SEC 5 /* Smoothing divisor for CPU load and exec speed stats (1 - no smoothing). */ #define AVG_SMOOTHING 16 /* Sync interval (every n havoc cycles): */ #define SYNC_INTERVAL 5 /* Output directory reuse grace period (minutes): */ #define OUTPUT_GRACE 25 /* Uncomment to use simple file names (id_NNNNNN): */ // #define SIMPLE_FILES /* List of interesting values to use in fuzzing. */ #define INTERESTING_8 \ -128, /* Overflow signed 8-bit when decremented */ \ -1, /* */ \ 0, /* */ \ 1, /* */ \ 16, /* One-off with common buffer size */ \ 32, /* One-off with common buffer size */ \ 64, /* One-off with common buffer size */ \ 100, /* One-off with common buffer size */ \ 127 /* Overflow signed 8-bit when incremented */ #define INTERESTING_16 \ -32768, /* Overflow signed 16-bit when decremented */ \ -129, /* Overflow signed 8-bit */ \ 128, /* Overflow signed 8-bit */ \ 255, /* Overflow unsig 8-bit when incremented */ \ 256, /* Overflow unsig 8-bit */ \ 512, /* One-off with common buffer size */ \ 1000, /* One-off with common buffer size */ \ 1024, /* One-off with common buffer size */ \ 4096, /* One-off with common buffer size */ \ 32767 /* Overflow signed 16-bit when incremented */ #define INTERESTING_32 \ -2147483648LL, /* Overflow signed 32-bit when decremented */ \ -100663046, /* Large negative number (endian-agnostic) */ \ -32769, /* Overflow signed 16-bit */ \ 32768, /* Overflow signed 16-bit */ \ 65535, /* Overflow unsig 16-bit when incremented */ \ 65536, /* Overflow unsig 16 bit */ \ 100663045, /* Large positive number (endian-agnostic) */ \ 2147483647 /* Overflow signed 32-bit when incremented */ /*********************************************************** * * * Really exotic stuff you probably don't want to touch: * * * ***********************************************************/ /* Call count interval between reseeding the libc PRNG from /dev/urandom: */ #define RESEED_RNG 10000 /* Maximum line length passed from GCC to 'as' and used for parsing configuration files: */ #define MAX_LINE 8192 /* Environment variable used to pass SHM ID to the called program. */ #define SHM_ENV_VAR "__AFL_SHM_ID" /* Other less interesting, internal-only variables. */ #define CLANG_ENV_VAR "__AFL_CLANG_MODE" #define AS_LOOP_ENV_VAR "__AFL_AS_LOOPCHECK" /* In-code signatures for deferred and persistent mode. */ #define PERSIST_SIG "##SIG_AFL_PERSISTENT##" #define DEFER_SIG "##SIG_AFL_DEFER_FORKSRV##" /* Distinctive bitmap signature used to indicate failed execution: */ #define EXEC_FAIL_SIG 0xfee1dead /* Distinctive exit code used to indicate MSAN trip condition: */ #define MSAN_ERROR 86 /* Designated file descriptors for forkserver commands (the application will use FORKSRV_FD and FORKSRV_FD + 1): */ #define FORKSRV_FD 198 /* Fork server init timeout multiplier: we'll wait the user-selected timeout plus this much for the fork server to spin up. */ #define FORK_WAIT_MULT 10 /* Calibration timeout adjustments, to be a bit more generous when resuming fuzzing sessions or trying to calibrate already-added internal finds. The first value is a percentage, the other is in milliseconds: */ #define CAL_TMOUT_PERC 125 #define CAL_TMOUT_ADD 50 /* Number of chances to calibrate a case before giving up: */ #define CAL_CHANCES 3 /* Map size for the traced binary (2^MAP_SIZE_POW2). Must be greater than 2; you probably want to keep it under 18 or so for performance reasons (adjusting AFL_INST_RATIO when compiling is probably a better way to solve problems with complex programs). You need to recompile the target binary after changing this - otherwise, SEGVs may ensue. */ #define MAP_SIZE_POW2 16 #define MAP_SIZE (1 << MAP_SIZE_POW2) /* Maximum allocator request size (keep well under INT_MAX): */ #define MAX_ALLOC 0x40000000 /* A made-up hashing seed: */ #define HASH_CONST 0xa5b35705 /* Constants for afl-gotcpu to control busy loop timing: */ #define CTEST_TARGET_MS 5000 #define CTEST_CORE_TRG_MS 1000 #define CTEST_BUSY_CYCLES (10 * 1000 * 1000) /* Uncomment this to use inferior block-coverage-based instrumentation. Note that you need to recompile the target binary for this to have any effect: */ // #define COVERAGE_ONLY /* Uncomment this to ignore hit counts and output just one bit per tuple. As with the previous setting, you will need to recompile the target binary: */ // #define SKIP_COUNTS /* Uncomment this to use instrumentation data to record newly discovered paths, but do not use them as seeds for fuzzing. This is useful for conveniently measuring coverage that could be attained by a "dumb" fuzzing algorithm: */ // #define IGNORE_FINDS #endif /* ! _HAVE_CONFIG_H */ ================================================ FILE: afl_progs/debug.h ================================================ /* american fuzzy lop - debug / error handling macros -------------------------------------------------- Written and maintained by Michal Zalewski Copyright 2013, 2014, 2015, 2016 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 */ #ifndef _HAVE_DEBUG_H #define _HAVE_DEBUG_H #include #include "types.h" #include "config.h" /******************* * Terminal colors * *******************/ #ifdef USE_COLOR # define cBLK "\x1b[0;30m" # define cRED "\x1b[0;31m" # define cGRN "\x1b[0;32m" # define cBRN "\x1b[0;33m" # define cBLU "\x1b[0;34m" # define cMGN "\x1b[0;35m" # define cCYA "\x1b[0;36m" # define cLGR "\x1b[0;37m" # define cGRA "\x1b[1;90m" # define cLRD "\x1b[1;91m" # define cLGN "\x1b[1;92m" # define cYEL "\x1b[1;93m" # define cLBL "\x1b[1;94m" # define cPIN "\x1b[1;95m" # define cLCY "\x1b[1;96m" # define cBRI "\x1b[1;97m" # define cRST "\x1b[0m" # define bgBLK "\x1b[40m" # define bgRED "\x1b[41m" # define bgGRN "\x1b[42m" # define bgBRN "\x1b[43m" # define bgBLU "\x1b[44m" # define bgMGN "\x1b[45m" # define bgCYA "\x1b[46m" # define bgLGR "\x1b[47m" # define bgGRA "\x1b[100m" # define bgLRD "\x1b[101m" # define bgLGN "\x1b[102m" # define bgYEL "\x1b[103m" # define bgLBL "\x1b[104m" # define bgPIN "\x1b[105m" # define bgLCY "\x1b[106m" # define bgBRI "\x1b[107m" #else # define cBLK "" # define cRED "" # define cGRN "" # define cBRN "" # define cBLU "" # define cMGN "" # define cCYA "" # define cLGR "" # define cGRA "" # define cLRD "" # define cLGN "" # define cYEL "" # define cLBL "" # define cPIN "" # define cLCY "" # define cBRI "" # define cRST "" # define bgBLK "" # define bgRED "" # define bgGRN "" # define bgBRN "" # define bgBLU "" # define bgMGN "" # define bgCYA "" # define bgLGR "" # define bgGRA "" # define bgLRD "" # define bgLGN "" # define bgYEL "" # define bgLBL "" # define bgPIN "" # define bgLCY "" # define bgBRI "" #endif /* ^USE_COLOR */ /************************* * Box drawing sequences * *************************/ #ifdef FANCY_BOXES # define SET_G1 "\x1b)0" /* Set G1 for box drawing */ # define RESET_G1 "\x1b)B" /* Reset G1 to ASCII */ # define bSTART "\x0e" /* Enter G1 drawing mode */ # define bSTOP "\x0f" /* Leave G1 drawing mode */ # define bH "q" /* Horizontal line */ # define bV "x" /* Vertical line */ # define bLT "l" /* Left top corner */ # define bRT "k" /* Right top corner */ # define bLB "m" /* Left bottom corner */ # define bRB "j" /* Right bottom corner */ # define bX "n" /* Cross */ # define bVR "t" /* Vertical, branch right */ # define bVL "u" /* Vertical, branch left */ # define bHT "v" /* Horizontal, branch top */ # define bHB "w" /* Horizontal, branch bottom */ #else # define SET_G1 "" # define RESET_G1 "" # define bSTART "" # define bSTOP "" # define bH "-" # define bV "|" # define bLT "+" # define bRT "+" # define bLB "+" # define bRB "+" # define bX "+" # define bVR "+" # define bVL "+" # define bHT "+" # define bHB "+" #endif /* ^FANCY_BOXES */ /*********************** * Misc terminal codes * ***********************/ #define TERM_HOME "\x1b[H" #define TERM_CLEAR TERM_HOME "\x1b[2J" #define cEOL "\x1b[0K" #define CURSOR_HIDE "\x1b[?25l" #define CURSOR_SHOW "\x1b[?25h" /************************ * Debug & error macros * ************************/ /* Just print stuff to the appropriate stream. */ #ifdef MESSAGES_TO_STDOUT # define SAYF(x...) printf(x) #else # define SAYF(x...) fprintf(stderr, x) #endif /* ^MESSAGES_TO_STDOUT */ /* Show a prefixed warning. */ #define WARNF(x...) do { \ SAYF(cYEL "[!] " cBRI "WARNING: " cRST x); \ SAYF(cRST "\n"); \ } while (0) /* Show a prefixed "doing something" message. */ #define ACTF(x...) do { \ SAYF(cLBL "[*] " cRST x); \ SAYF(cRST "\n"); \ } while (0) /* Show a prefixed "success" message. */ #define OKF(x...) do { \ SAYF(cLGN "[+] " cRST x); \ SAYF(cRST "\n"); \ } while (0) /* Show a prefixed fatal error message (not used in afl). */ #define BADF(x...) do { \ SAYF(cLRD "\n[-] " cRST x); \ SAYF(cRST "\n"); \ } while (0) /* Die with a verbose non-OS fatal error message. */ #define FATAL(x...) do { \ SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] PROGRAM ABORT : " \ cBRI x); \ SAYF(cLRD "\n Location : " cRST "%s(), %s:%u\n\n", \ __FUNCTION__, __FILE__, __LINE__); \ exit(1); \ } while (0) /* Die by calling abort() to provide a core dump. */ #define ABORT(x...) do { \ SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] PROGRAM ABORT : " \ cBRI x); \ SAYF(cLRD "\n Stop location : " cRST "%s(), %s:%u\n\n", \ __FUNCTION__, __FILE__, __LINE__); \ abort(); \ } while (0) /* Die while also including the output of perror(). */ #define PFATAL(x...) do { \ fflush(stdout); \ SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] SYSTEM ERROR : " \ cBRI x); \ SAYF(cLRD "\n Stop location : " cRST "%s(), %s:%u\n", \ __FUNCTION__, __FILE__, __LINE__); \ SAYF(cLRD " OS message : " cRST "%s\n", strerror(errno)); \ exit(1); \ } while (0) /* Die with FAULT() or PFAULT() depending on the value of res (used to interpret different failure modes for read(), write(), etc). */ #define RPFATAL(res, x...) do { \ if (res < 0) PFATAL(x); else FATAL(x); \ } while (0) /* Error-checking versions of read() and write() that call RPFATAL() as appropriate. */ #define ck_write(fd, buf, len, fn) do { \ u32 _len = (len); \ s32 _res = write(fd, buf, _len); \ if (_res != _len) RPFATAL(_res, "Short write to %s", fn); \ } while (0) #define ck_read(fd, buf, len, fn) do { \ u32 _len = (len); \ s32 _res = read(fd, buf, _len); \ if (_res != _len) RPFATAL(_res, "Short read from %s", fn); \ } while (0) #endif /* ! _HAVE_DEBUG_H */ ================================================ FILE: afl_progs/hash.h ================================================ /* american fuzzy lop - hashing function ------------------------------------- The hash32() function is a variant of MurmurHash3, a good non-cryptosafe hashing function developed by Austin Appleby. For simplicity, this variant does *NOT* accept buffer lengths that are not divisible by 8 bytes. The 32-bit version is otherwise similar to the original; the 64-bit one is a custom hack with mostly-unproven properties. Austin's original code is public domain. Other code written and maintained by Michal Zalewski Copyright 2016 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 */ #ifndef _HAVE_HASH_H #define _HAVE_HASH_H #include "types.h" #ifdef __x86_64__ #define ROL64(_x, _r) ((((u64)(_x)) << (_r)) | (((u64)(_x)) >> (64 - (_r)))) static inline u32 hash32(const void* key, u32 len, u32 seed) { const u64* data = (u64*)key; u64 h1 = seed ^ len; len >>= 3; while (len--) { u64 k1 = *data++; k1 *= 0x87c37b91114253d5ULL; k1 = ROL64(k1, 31); k1 *= 0x4cf5ad432745937fULL; h1 ^= k1; h1 = ROL64(h1, 27); h1 = h1 * 5 + 0x52dce729; } h1 ^= h1 >> 33; h1 *= 0xff51afd7ed558ccdULL; h1 ^= h1 >> 33; h1 *= 0xc4ceb9fe1a85ec53ULL; h1 ^= h1 >> 33; return h1; } #else #define ROL32(_x, _r) ((((u32)(_x)) << (_r)) | (((u32)(_x)) >> (32 - (_r)))) static inline u32 hash32(const void* key, u32 len, u32 seed) { const u32* data = (u32*)key; u32 h1 = seed ^ len; len >>= 2; while (len--) { u32 k1 = *data++; k1 *= 0xcc9e2d51; k1 = ROL32(k1, 15); k1 *= 0x1b873593; h1 ^= k1; h1 = ROL32(h1, 13); h1 = h1 * 5 + 0xe6546b64; } h1 ^= h1 >> 16; h1 *= 0x85ebca6b; h1 ^= h1 >> 13; h1 *= 0xc2b2ae35; h1 ^= h1 >> 16; return h1; } #endif /* ^__x86_64__ */ #endif /* !_HAVE_HASH_H */ ================================================ FILE: afl_progs/llvm_mode/Makefile ================================================ # # american fuzzy lop - LLVM instrumentation # ----------------------------------------- # # Written by Laszlo Szekeres and # Michal Zalewski # # LLVM integration design comes from Laszlo Szekeres. # # Copyright 2015, 2016 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # PREFIX ?= /usr/local HELPER_PATH = $(PREFIX)/lib/afl BIN_PATH = $(PREFIX)/bin VERSION = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2) LLVM_CONFIG ?= llvm-config CFLAGS ?= -O3 -funroll-loops CFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \ -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \ -DVERSION=\"$(VERSION)\" ifdef AFL_TRACE_PC CFLAGS += -DUSE_TRACE_PC=1 endif CXXFLAGS ?= -O3 -funroll-loops CXXFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \ -DVERSION=\"$(VERSION)\" -Wno-variadic-macros CLANG_CFL = `$(LLVM_CONFIG) --cxxflags` -fno-rtti -fpic $(CXXFLAGS) CLANG_LFL = `$(LLVM_CONFIG) --ldflags` $(LDFLAGS) # User teor2345 reports that this is required to make things work on MacOS X. ifeq "$(shell uname)" "Darwin" CLANG_LFL += -Wl,-flat_namespace -Wl,-undefined,suppress endif # We were using llvm-config --bindir to get the location of clang, but # this seems to be busted on some distros, so using the one in $PATH is # probably better. ifeq "$(origin CC)" "default" CC = clang CXX = clang++ endif ifndef AFL_TRACE_PC PROGS = ../afl-clang-fast ../afl-llvm-pass.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o else PROGS = ../afl-clang-fast ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o endif all: test_deps $(PROGS) test_build all_done test_deps: ifndef AFL_TRACE_PC @echo "[*] Checking for working 'llvm-config'..." @which $(LLVM_CONFIG) >/dev/null 2>&1 || ( echo "[-] Oops, can't find 'llvm-config'. Install clang or set \$$LLVM_CONFIG or \$$PATH beforehand."; echo " (Sometimes, the binary will be named llvm-config-3.5 or something like that.)"; exit 1 ) else @echo "[!] Note: using -fsanitize=trace-pc mode (this will fail with older LLVM)." endif @echo "[*] Checking for working '$(CC)'..." @which $(CC) >/dev/null 2>&1 || ( echo "[-] Oops, can't find '$(CC)'. Make sure that it's in your \$$PATH (or set \$$CC and \$$CXX)."; exit 1 ) @echo "[*] Checking for '../afl-showmap'..." @test -f ../afl-showmap || ( echo "[-] Oops, can't find '../afl-showmap'. Be sure to compile AFL first."; exit 1 ) @echo "[+] All set and ready to build." ../afl-clang-fast: afl-clang-fast.c | test_deps $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) ln -sf afl-clang-fast ../afl-clang-fast++ ../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) ../afl-llvm-rt.o: afl-llvm-rt.o.c | test_deps $(CC) $(CFLAGS) -fPIC -c $< -o $@ ../afl-llvm-rt-32.o: afl-llvm-rt.o.c | test_deps @printf "[*] Building 32-bit variant of the runtime (-m32)... " @$(CC) $(CFLAGS) -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi ../afl-llvm-rt-64.o: afl-llvm-rt.o.c | test_deps @printf "[*] Building 64-bit variant of the runtime (-m64)... " @$(CC) $(CFLAGS) -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi test_build: $(PROGS) @echo "[*] Testing the CC wrapper and instrumentation output..." unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_CC=$(CC) ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS) echo 0 | ../afl-showmap -m none -q -o .test-instr0 ./test-instr echo 1 | ../afl-showmap -m none -q -o .test-instr1 ./test-instr @rm -f test-instr @cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please ping to troubleshoot the issue."; echo; exit 1; fi @echo "[+] All right, the instrumentation seems to be working!" all_done: test_build @echo "[+] All done! You can now use '../afl-clang-fast' to compile programs." .NOTPARALLEL: clean clean: rm -f *.o *.so *~ a.out core core.[1-9][0-9]* test-instr .test-instr0 .test-instr1 rm -f $(PROGS) ../afl-clang-fast++ ================================================ FILE: afl_progs/llvm_mode/README.llvm ================================================ ============================================ Fast LLVM-based instrumentation for afl-fuzz ============================================ 1) Introduction --------------- The code in this directory allows you to instrument programs for AFL using true compiler-level instrumentation, instead of the more crude assembly-level rewriting approach taken by afl-gcc and afl-clang. This has several interesting properties: - The compiler can make many optimizations that are hard to pull off when manually inserting assembly. As a result, some slow, CPU-bound programs will run up to around 2x faster. The gains are less pronounced for fast binaries, where the speed is limited chiefly by the cost of creating new processes. In such cases, the gain will probably stay within 10%. - The instrumentation is CPU-independent. At least in principle, you should be able to rely on it to fuzz programs on non-x86 architectures (after building afl-fuzz with AFL_NO_X86=1). - The instrumentation can cope a bit better with multi-threaded targets. - Because the feature relies on the internals of LLVM, it is clang-specific and will *not* work with GCC. Once this implementation is shown to be sufficiently robust and portable, it will probably replace afl-clang. For now, it can be built separately and co-exists with the original code. The idea and much of the implementation comes from Laszlo Szekeres. 2) How to use ------------- In order to leverage this mechanism, you need to have clang installed on your system. You should also make sure that the llvm-config tool is in your path (or pointed to via LLVM_CONFIG in the environment). Unfortunately, some systems that do have clang come without llvm-config or the LLVM development headers; one example of this is FreeBSD. FreeBSD users will also run into problems with clang being built statically and not being able to load modules (you'll see "Service unavailable" when loading afl-llvm-pass.so). To solve all your problems, you can grab pre-built binaries for your OS from: http://llvm.org/releases/download.html ...and then put the bin/ directory from the tarball at the beginning of your $PATH when compiling the feature and building packages later on. You don't need to be root for that. To build the instrumentation itself, type 'make'. This will generate binaries called afl-clang-fast and afl-clang-fast++ in the parent directory. Once this is done, you can instrument third-party code in a way similar to the standard operating mode of AFL, e.g.: CC=/path/to/afl/afl-clang-fast ./configure [...options...] make Be sure to also include CXX set to afl-clang-fast++ for C++ code. The tool honors roughly the same environmental variables as afl-gcc (see ../docs/env_variables.txt). This includes AFL_INST_RATIO, AFL_USE_ASAN, AFL_HARDEN, and AFL_DONT_OPTIMIZE. Note: if you want the LLVM helper to be installed on your system for all users, you need to build it before issuing 'make install' in the parent directory. 3) Gotchas, feedback, bugs -------------------------- This is an early-stage mechanism, so field reports are welcome. You can send bug reports to . 4) Bonus feature #1: deferred instrumentation --------------------------------------------- AFL tries to optimize performance by executing the targeted binary just once, stopping it just before main(), and then cloning this "master" process to get a steady supply of targets to fuzz. Although this approach eliminates much of the OS-, linker- and libc-level costs of executing the program, it does not always help with binaries that perform other time-consuming initialization steps - say, parsing a large config file before getting to the fuzzed data. In such cases, it's beneficial to initialize the forkserver a bit later, once most of the initialization work is already done, but before the binary attempts to read the fuzzed input and parse it; in some cases, this can offer a 10x+ performance gain. You can implement delayed initialization in LLVM mode in a fairly simple way. First, find a suitable location in the code where the delayed cloning can take place. This needs to be done with *extreme* care to avoid breaking the binary. In particular, the program will probably malfunction if you select a location after: - The creation of any vital threads or child processes - since the forkserver can't clone them easily. - The initialization of timers via setitimer() or equivalent calls. - The creation of temporary files, network sockets, offset-sensitive file descriptors, and similar shared-state resources - but only provided that their state meaningfully influences the behavior of the program later on. - Any access to the fuzzed input, including reading the metadata about its size. With the location selected, add this code in the appropriate spot: #ifdef __AFL_HAVE_MANUAL_CONTROL __AFL_INIT(); #endif You don't need the #ifdef guards, but including them ensures that the program will keep working normally when compiled with a tool other than afl-clang-fast. Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will *not* generate a deferred-initialization binary) - and you should be all set! 5) Bonus feature #2: persistent mode ------------------------------------ Some libraries provide APIs that are stateless, or whose state can be reset in between processing different input files. When such a reset is performed, a single long-lived process can be reused to try out multiple test cases, eliminating the need for repeated fork() calls and the associated OS overhead. The basic structure of the program that does this would be: while (__AFL_LOOP()) { /* Read input data. */ /* Call library code to be fuzzed. */ /* Reset state. */ } /* Exit normally */ The numerical value specified within the loop controls the maximum number of iterations before AFL will restart the process from scratch. This minimizes the impact of memory leaks and similar glitches; 1000 is a good starting point, and going much higher increases the likelihood of hiccups without giving you any real performance benefits. A more detailed template is shown in ../experimental/persistent_demo/. Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef guards can be used to suppress it when using other compilers. Note that as with the previous mode, the feature is easy to misuse; if you do not fully reset the critical state, you may end up with false positives or waste a whole lot of CPU power doing nothing useful at all. Be particularly wary of memory leaks and of the state of file descriptors. PS. Because there are task switches still involved, the mode isn't as fast as "pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot faster than the normal fork() model, and compared to in-process fuzzing, should be a lot more robust. 6) Bonus feature #3: new 'trace-pc-guard' mode ---------------------------------------------- Recent versions of LLVM are shipping with a built-in execution tracing feature that provides AFL with the necessary tracing data without the need to post-process the assembly or install any compiler plugins. See: http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards As of this writing, the feature is only available on SVN trunk, and is yet to make it to an official release of LLVM. Nevertheless, if you have a sufficiently recent compiler and want to give it a try, build afl-clang-fast this way: AFL_TRACE_PC=1 make clean all Note that this mode is currently about 20% slower than "vanilla" afl-clang-fast, and about 5-10% slower than afl-clang. This is likely because the instrumentation is not inlined, and instead involves a function call. On systems that support it, compiling your target with -flto should help. ================================================ FILE: afl_progs/llvm_mode/afl-clang-fast.c ================================================ /* american fuzzy lop - LLVM-mode wrapper for clang ------------------------------------------------ Written by Laszlo Szekeres and Michal Zalewski LLVM integration design comes from Laszlo Szekeres. Copyright 2015, 2016 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 This program is a drop-in replacement for clang, similar in most respects to ../afl-gcc. It tries to figure out compilation mode, adds a bunch of flags, and then calls the real compiler. */ #define AFL_MAIN #include "../config.h" #include "../types.h" #include "../debug.h" #include "../alloc-inl.h" #include #include #include #include static u8* obj_path; /* Path to runtime libraries */ static u8** cc_params; /* Parameters passed to the real CC */ static u32 cc_par_cnt = 1; /* Param count, including argv0 */ /* Try to find the runtime libraries. If that fails, abort. */ static void find_obj(u8* argv0) { u8 *afl_path = getenv("AFL_PATH"); u8 *slash, *tmp; if (afl_path) { tmp = alloc_printf("%s/afl-llvm-rt.o", afl_path); if (!access(tmp, R_OK)) { obj_path = afl_path; ck_free(tmp); return; } ck_free(tmp); } slash = strrchr(argv0, '/'); if (slash) { u8 *dir; *slash = 0; dir = ck_strdup(argv0); *slash = '/'; tmp = alloc_printf("%s/afl-llvm-rt.o", dir); if (!access(tmp, R_OK)) { obj_path = dir; ck_free(tmp); return; } ck_free(tmp); ck_free(dir); } if (!access(AFL_PATH "/afl-llvm-rt.o", R_OK)) { obj_path = AFL_PATH; return; } FATAL("Unable to find 'afl-llvm-rt.o' or 'afl-llvm-pass.so'. Please set AFL_PATH"); } /* Copy argv to cc_params, making the necessary edits. */ static void edit_params(u32 argc, char** argv) { u8 fortify_set = 0, asan_set = 0, x_set = 0, maybe_linking = 1, bit_mode = 0; u8 *name; cc_params = ck_alloc((argc + 128) * sizeof(u8*)); name = strrchr(argv[0], '/'); if (!name) name = argv[0]; else name++; if (!strcmp(name, "afl-clang-fast++")) { u8* alt_cxx = getenv("AFL_CXX"); cc_params[0] = alt_cxx ? alt_cxx : (u8*)"clang++"; } else { u8* alt_cc = getenv("AFL_CC"); cc_params[0] = alt_cc ? alt_cc : (u8*)"clang"; } /* There are two ways to compile afl-clang-fast. In the traditional mode, we use afl-llvm-pass.so to inject instrumentation. In the experimental 'trace-pc-guard' mode, we use native LLVM instrumentation callbacks instead. The latter is a very recent addition - see: http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards */ #ifdef USE_TRACE_PC cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard"; cc_params[cc_par_cnt++] = "-mllvm"; cc_params[cc_par_cnt++] = "-sanitizer-coverage-block-threshold=0"; #else cc_params[cc_par_cnt++] = "-Xclang"; cc_params[cc_par_cnt++] = "-load"; cc_params[cc_par_cnt++] = "-Xclang"; cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); #endif /* ^USE_TRACE_PC */ cc_params[cc_par_cnt++] = "-Qunused-arguments"; /* Detect stray -v calls from ./configure scripts. */ if (argc == 1 && !strcmp(argv[1], "-v")) maybe_linking = 0; while (--argc) { u8* cur = *(++argv); if (!strcmp(cur, "-m32")) bit_mode = 32; if (!strcmp(cur, "-m64")) bit_mode = 64; if (!strcmp(cur, "-x")) x_set = 1; if (!strcmp(cur, "-c") || !strcmp(cur, "-S") || !strcmp(cur, "-E")) maybe_linking = 0; if (!strcmp(cur, "-fsanitize=address") || !strcmp(cur, "-fsanitize=memory")) asan_set = 1; if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1; if (!strcmp(cur, "-shared")) maybe_linking = 0; if (!strcmp(cur, "-Wl,-z,defs") || !strcmp(cur, "-Wl,--no-undefined")) continue; cc_params[cc_par_cnt++] = cur; } if (getenv("AFL_HARDEN")) { cc_params[cc_par_cnt++] = "-fstack-protector-all"; if (!fortify_set) cc_params[cc_par_cnt++] = "-D_FORTIFY_SOURCE=2"; } if (!asan_set) { if (getenv("AFL_USE_ASAN")) { if (getenv("AFL_USE_MSAN")) FATAL("ASAN and MSAN are mutually exclusive"); if (getenv("AFL_HARDEN")) FATAL("ASAN and AFL_HARDEN are mutually exclusive"); cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE"; cc_params[cc_par_cnt++] = "-fsanitize=address"; } else if (getenv("AFL_USE_MSAN")) { if (getenv("AFL_USE_ASAN")) FATAL("ASAN and MSAN are mutually exclusive"); if (getenv("AFL_HARDEN")) FATAL("MSAN and AFL_HARDEN are mutually exclusive"); cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE"; cc_params[cc_par_cnt++] = "-fsanitize=memory"; } } #ifdef USE_TRACE_PC if (getenv("AFL_INST_RATIO")) FATAL("AFL_INST_RATIO not available at compile time with 'trace-pc'."); #endif /* USE_TRACE_PC */ if (!getenv("AFL_DONT_OPTIMIZE")) { cc_params[cc_par_cnt++] = "-g"; cc_params[cc_par_cnt++] = "-O3"; cc_params[cc_par_cnt++] = "-funroll-loops"; } if (getenv("AFL_NO_BUILTIN")) { cc_params[cc_par_cnt++] = "-fno-builtin-strcmp"; cc_params[cc_par_cnt++] = "-fno-builtin-strncmp"; cc_params[cc_par_cnt++] = "-fno-builtin-strcasecmp"; cc_params[cc_par_cnt++] = "-fno-builtin-strncasecmp"; cc_params[cc_par_cnt++] = "-fno-builtin-memcmp"; } cc_params[cc_par_cnt++] = "-D__AFL_HAVE_MANUAL_CONTROL=1"; cc_params[cc_par_cnt++] = "-D__AFL_COMPILER=1"; cc_params[cc_par_cnt++] = "-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"; /* When the user tries to use persistent or deferred forkserver modes by appending a single line to the program, we want to reliably inject a signature into the binary (to be picked up by afl-fuzz) and we want to call a function from the runtime .o file. This is unnecessarily painful for three reasons: 1) We need to convince the compiler not to optimize out the signature. This is done with __attribute__((used)). 2) We need to convince the linker, when called with -Wl,--gc-sections, not to do the same. This is done by forcing an assignment to a 'volatile' pointer. 3) We need to declare __afl_persistent_loop() in the global namespace, but doing this within a method in a class is hard - :: and extern "C" are forbidden and __attribute__((alias(...))) doesn't work. Hence the __asm__ aliasing trick. */ cc_params[cc_par_cnt++] = "-D__AFL_LOOP()=" "({ static volatile char *_B __attribute__((used)); " " _B = (char*)\"" PERSIST_SIG "\"; " #ifdef __APPLE__ "__attribute__((visibility(\"default\"))) " "int _L(void) __asm__(\"___afl_persistent_loop\"); " #else "__attribute__((visibility(\"default\"))) " "int _L(void) __asm__(\"__afl_persistent_loop\"); " #endif /* ^__APPLE__ */ "_L(); })"; cc_params[cc_par_cnt++] = "-D__AFL_INIT()=" "do { static volatile char *_A __attribute__((used)); " " _A = (char*)\"" DEFER_SIG "\"; " #ifdef __APPLE__ "__attribute__((visibility(\"default\"))) " "void _I(void) __asm__(\"___afl_manual_init\"); " #else "__attribute__((visibility(\"default\"))) " "void _I(void) __asm__(\"__afl_manual_init\"); " #endif /* ^__APPLE__ */ "_I(); } while (0)"; if (maybe_linking) { if (x_set) { cc_params[cc_par_cnt++] = "-x"; cc_params[cc_par_cnt++] = "none"; } switch (bit_mode) { case 0: cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt.o", obj_path); break; case 32: cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-32.o", obj_path); if (access(cc_params[cc_par_cnt - 1], R_OK)) FATAL("-m32 is not supported by your compiler"); break; case 64: cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-64.o", obj_path); if (access(cc_params[cc_par_cnt - 1], R_OK)) FATAL("-m64 is not supported by your compiler"); break; } } cc_params[cc_par_cnt] = NULL; } /* Main entry point */ int main(int argc, char** argv) { if (isatty(2) && !getenv("AFL_QUIET")) { #ifdef USE_TRACE_PC SAYF(cCYA "afl-clang-fast [tpcg] " cBRI VERSION cRST " by \n"); #else SAYF(cCYA "afl-clang-fast " cBRI VERSION cRST " by \n"); #endif /* ^USE_TRACE_PC */ } if (argc < 2) { SAYF("\n" "This is a helper application for afl-fuzz. It serves as a drop-in replacement\n" "for clang, letting you recompile third-party code with the required runtime\n" "instrumentation. A common use pattern would be one of the following:\n\n" " CC=%s/afl-clang-fast ./configure\n" " CXX=%s/afl-clang-fast++ ./configure\n\n" "In contrast to the traditional afl-clang tool, this version is implemented as\n" "an LLVM pass and tends to offer improved performance with slow programs.\n\n" "You can specify custom next-stage toolchain via AFL_CC and AFL_CXX. Setting\n" "AFL_HARDEN enables hardening optimizations in the compiled code.\n\n", BIN_PATH, BIN_PATH); exit(1); } find_obj(argv[0]); edit_params(argc, argv); execvp(cc_params[0], (char**)cc_params); FATAL("Oops, failed to execute '%s' - check your PATH", cc_params[0]); return 0; } ================================================ FILE: afl_progs/llvm_mode/afl-llvm-pass.so.cc ================================================ /* american fuzzy lop - LLVM-mode instrumentation pass --------------------------------------------------- Written by Laszlo Szekeres and Michal Zalewski LLVM integration design comes from Laszlo Szekeres. C bits copied-and-pasted from afl-as.c are Michal's fault. Copyright 2015, 2016 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 This library is plugged into LLVM when invoking clang through afl-clang-fast. It tells the compiler to add code roughly equivalent to the bits discussed in ../afl-as.h. */ #define AFL_LLVM_PASS #include "../config.h" #include "../debug.h" #include #include #include #include "llvm/ADT/Statistic.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" using namespace llvm; namespace { class AFLCoverage : public ModulePass { public: static char ID; AFLCoverage() : ModulePass(ID) { } bool runOnModule(Module &M) override; // StringRef getPassName() const override { // return "American Fuzzy Lop Instrumentation"; // } }; } char AFLCoverage::ID = 0; bool AFLCoverage::runOnModule(Module &M) { LLVMContext &C = M.getContext(); IntegerType *Int8Ty = IntegerType::getInt8Ty(C); IntegerType *Int32Ty = IntegerType::getInt32Ty(C); /* Show a banner */ char be_quiet = 0; if (isatty(2) && !getenv("AFL_QUIET")) { SAYF(cCYA "afl-llvm-pass " cBRI VERSION cRST " by \n"); } else be_quiet = 1; /* Decide instrumentation ratio */ char* inst_ratio_str = getenv("AFL_INST_RATIO"); unsigned int inst_ratio = 100; if (inst_ratio_str) { if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || inst_ratio > 100) FATAL("Bad value of AFL_INST_RATIO (must be between 1 and 100)"); } /* Get globals for the SHM region and the previous location. Note that __afl_prev_loc is thread-local. */ GlobalVariable *AFLMapPtr = new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); GlobalVariable *AFLPrevLoc = new GlobalVariable( M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0, GlobalVariable::GeneralDynamicTLSModel, 0, false); /* Instrument all the things! */ int inst_blocks = 0; for (auto &F : M) for (auto &BB : F) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); IRBuilder<> IRB(&(*IP)); if (AFL_R(100) >= inst_ratio) continue; /* Make up cur_loc */ unsigned int cur_loc = AFL_R(MAP_SIZE); ConstantInt *CurLoc = ConstantInt::get(Int32Ty, cur_loc); /* Load prev_loc */ LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc); PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); /* Load SHM pointer */ LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); Value *MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, CurLoc)); /* Update bitmap */ LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); Value *Incr = IRB.CreateAdd(Counter, ConstantInt::get(Int8Ty, 1)); IRB.CreateStore(Incr, MapPtrIdx) ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); /* Set prev_loc to cur_loc >> 1 */ StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc); Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); inst_blocks++; } /* Say something nice. */ if (!be_quiet) { if (!inst_blocks) WARNF("No instrumentation targets found."); else OKF("Instrumented %u locations (%s mode, ratio %u%%).", inst_blocks, getenv("AFL_HARDEN") ? "hardened" : ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) ? "ASAN/MSAN" : "non-hardened"), inst_ratio); } return true; } static void registerAFLPass(const PassManagerBuilder &, legacy::PassManagerBase &PM) { PM.add(new AFLCoverage()); } static RegisterStandardPasses RegisterAFLPass( PassManagerBuilder::EP_OptimizerLast, registerAFLPass); static RegisterStandardPasses RegisterAFLPass0( PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass); ================================================ FILE: afl_progs/llvm_mode/afl-llvm-rt.o.c ================================================ /* american fuzzy lop - LLVM instrumentation bootstrap --------------------------------------------------- Written by Laszlo Szekeres and Michal Zalewski LLVM integration design comes from Laszlo Szekeres. Copyright 2015, 2016 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 This code is the rewrite of afl-as.h's main_payload. */ /* This file has been modified from the original AFL version to incorporate into Killerbeez. Specifically, the fork server has been modified to match the Killerbeez fork server protocol. */ #include "../config.h" #include "../types.h" #include "../../instrumentation/forkserver_internal.h" #include #include #include #include #include #include #include #include #include #include /* This is a somewhat ugly hack for the experimental 'trace-pc-guard' mode. Basically, we need to make sure that the forkserver is initialized after the LLVM-generated runtime initialization pass, not before. */ #ifdef USE_TRACE_PC # define CONST_PRIO 5 #else # define CONST_PRIO 0 #endif /* ^USE_TRACE_PC */ /* Globals needed by the injected instrumentation. The __afl_area_initial region is used for instrumentation output before __afl_map_shm() has a chance to run. It will end up as .comm, so it shouldn't be too wasteful. */ u8 __afl_area_initial[MAP_SIZE]; u8* __afl_area_ptr = __afl_area_initial; __thread u32 __afl_prev_loc; /* Running in persistent mode? */ static u8 is_persistent; /* SHM setup. */ static void __afl_map_shm(void) { u8 *id_str = getenv(SHM_ENV_VAR); /* If we're running under AFL, attach to the appropriate region, replacing the early-stage __afl_area_initial region that is needed to allow some really hacky .init code to work correctly in projects such as OpenSSL. */ if (id_str) { u32 shm_id = atoi(id_str); __afl_area_ptr = shmat(shm_id, NULL, 0); /* Whooooops. */ if (__afl_area_ptr == (void *)-1) _exit(1); } } /* Fork server logic. */ static void __afl_start_forkserver_persistence(void); static int max_cnt = 0; static int forkserver_cycle_cnt = 0; static int cycle_cnt = 0; static void __afl_start_forkserver(void) { static int response = 0x41414141; char command; s32 child_pid; /* Phone home and tell the parent that we're OK. If parent isn't there, assume we're not running in forkserver mode and just execute program. */ if(write(FORKSRV_TO_FUZZER, &response, sizeof(int)) != sizeof(int)) return; if(getenv(PERSIST_MAX_VAR)) { __afl_start_forkserver_persistence(); return; } while (1) { // Wait for parent by reading from the pipe. Exit if read fails. if(read(FUZZER_TO_FORKSRV, &command, sizeof(command)) != sizeof(command)) _exit(1); switch(command) { case EXIT: case RUN: //LLVM doesn't do the single RUN/FORK commands case FORK: //but instead only implements FORK_RUN _exit(0); break; case FORK_RUN: child_pid = fork(); if(child_pid < 0) _exit(1); //In child process: close fds, resume execution. if(!child_pid) { close(FUZZER_TO_FORKSRV); close(FORKSRV_TO_FUZZER); //Reset the afl bitmap to a clean state memset(__afl_area_ptr, 0, MAP_SIZE); __afl_prev_loc = 0; return; } response = child_pid; break; case GET_STATUS: if(waitpid(child_pid, &response, 0) < 0) _exit(1); break; } if(write(FORKSRV_TO_FUZZER, &response, sizeof(int)) != sizeof(int)) _exit(1); } } static void __afl_start_forkserver_persistence(void) { int response = 0x41414141; char command; int child_pid = -1; //Get the maximum number of persistent executions max_cnt = atoi(getenv(PERSIST_MAX_VAR)); if(!max_cnt) _exit(1); while (1) { // Wait for parent by reading from the pipe. Exit if read fails. if(read(FUZZER_TO_FORKSRV, &command, sizeof(command)) != sizeof(command)) _exit(1); switch(command) { case FORK: case RUN: case EXIT: if(child_pid != -1) kill(child_pid, SIGKILL); _exit(0); break; case FORK_RUN: if(child_pid == -1 || forkserver_cycle_cnt == max_cnt) { //If we need to (re)start the persistent child, do so if(child_pid != -1 && forkserver_cycle_cnt == max_cnt) { //if we've hit the maximum cycle count, continue the child, so it may exit //and clean up. We do this now, rather than in GET_STATUS commands, to ensure that //the exit portion of the target process does not get traced. kill(child_pid, SIGCONT); if(waitpid(child_pid, &response, 0) < 0) _exit(1); forkserver_cycle_cnt = 0; } child_pid = fork(); if(child_pid < 0) _exit(1); //In child process: close fds, resume execution. if(!child_pid) { close(FUZZER_TO_FORKSRV); close(FORKSRV_TO_FUZZER); return; } } else { //Otherwise, just tell it to continue kill(child_pid, SIGCONT); } //Tell the target process to go response = child_pid; if(child_pid == -1) { response = FORKSERVER_ERROR; break; } forkserver_cycle_cnt++; break; case GET_STATUS: if(waitpid(child_pid, &response, WUNTRACED) < 0) _exit(1); if(WIFEXITED(response) || WIFSIGNALED(response)) { //The process ended, either child_pid = -1; //by hitting the max_cnt count and exiting, or by crashing forkserver_cycle_cnt = 0; } else if(WIFSTOPPED(response)) //If we hit a SIGSTOP, then the child didn't response = 0; //die, just return 0 to the parent break; } if(write(FORKSRV_TO_FUZZER, &response, sizeof(response)) != sizeof(response)) _exit(1); } } /* A simplified persistent mode handler, used as explained in README.llvm. */ int __afl_persistent_loop(void) { static u8 first_pass = 1; if (first_pass) { if (is_persistent) { memset(__afl_area_ptr, 0, MAP_SIZE); __afl_prev_loc = 0; } cycle_cnt = 0; first_pass = 0; return 1; } if (is_persistent) { if(++cycle_cnt != max_cnt) { raise(SIGSTOP); memset(__afl_area_ptr, 0, MAP_SIZE); __afl_prev_loc = 0; return 1; } else { /* When exiting __AFL_LOOP(), make sure that the subsequent code that follows the loop is not traced. We do that by pivoting back to the dummy output region. */ __afl_area_ptr = __afl_area_initial; } } return 0; } /* This one can be called from user code when deferred forkserver mode is enabled. */ void __afl_manual_init(void) { static u8 init_done; if (!init_done) { __afl_map_shm(); __afl_start_forkserver(); init_done = 1; } } /* Proper initialization routine. */ __attribute__((constructor(CONST_PRIO))) void __afl_auto_init(void) { is_persistent = !!getenv(PERSIST_MAX_VAR); if (getenv(DEFER_ENV_VAR)) return; __afl_manual_init(); } /* The following stuff deals with supporting -fsanitize-coverage=trace-pc-guard. It remains non-operational in the traditional, plugin-backed LLVM mode. For more info about 'trace-pc-guard', see README.llvm. The first function (__sanitizer_cov_trace_pc_guard) is called back on every edge (as opposed to every basic block). */ void __sanitizer_cov_trace_pc_guard(uint32_t* guard) { __afl_area_ptr[*guard]++; } /* Init callback. Populates instrumentation IDs. Note that we're using ID of 0 as a special value to indicate non-instrumented bits. That may still touch the bitmap, but in a fairly harmless way. */ void __sanitizer_cov_trace_pc_guard_init(uint32_t* start, uint32_t* stop) { u32 inst_ratio = 100; u8* x; if (start == stop || *start) return; x = getenv("AFL_INST_RATIO"); if (x) inst_ratio = atoi(x); if (!inst_ratio || inst_ratio > 100) { fprintf(stderr, "[-] ERROR: Invalid AFL_INST_RATIO (must be 1-100).\n"); abort(); } /* Make sure that the first element in the range is always set - we use that to avoid duplicate calls (which can happen as an artifact of the underlying implementation in LLVM). */ *(start++) = R(MAP_SIZE - 1) + 1; while (start < stop) { if (R(100) < inst_ratio) *start = R(MAP_SIZE - 1) + 1; else *start = 0; start++; } } ================================================ FILE: afl_progs/qemu_mode/README.qemu ================================================ This directory contains the build script and patches for the QEMU-based instrumentation. This instrumentation is copied from AFL with minor changes to fit into the killerbeez fork server framework (see http://lcamtuf.coredump.cx/afl/ for the original version). Additionally this version of the QEMU instrumentation has been patched with the following patches from vanhauser-thc's github repository (available at https://github.com/vanhauser-thc/afl-patches/): * afl-qemu-speed.diff * afl-qemu-ppc64.diff * afl_qemu_optimize_map.diff * afl_qemu_optimize_entrypoint.diff The original AFL QEMU Readme is listed below: ========================================================= High-performance binary-only instrumentation for afl-fuzz ========================================================= (See ../docs/README for the general instruction manual.) 1) Introduction --------------- The code in this directory allows you to build a standalone feature that leverages the QEMU "user emulation" mode and allows callers to obtain instrumentation output for black-box, closed-source binaries. This mechanism can be then used by afl-fuzz to stress-test targets that couldn't be built with afl-gcc. The usual performance cost is 2-5x, which is considerably better than seen so far in experiments with tools such as DynamoRIO and PIN. The idea and much of the implementation comes from Andrew Griffiths. 2) How to use ------------- The feature is implemented with a fairly simple patch to QEMU 2.10.0. The simplest way to build it is to run ./build_qemu_support.sh. The script will download, configure, and compile the QEMU binary for you. QEMU is a big project, so this will take a while, and you may have to resolve a couple of dependencies (most notably, you will definitely need libtool and glib2-devel). Once the binaries are compiled, you can leverage the QEMU tool by calling afl-fuzz and all the related utilities with -Q in the command line. Note that QEMU requires a generous memory limit to run; somewhere around 200 MB is a good starting point, but considerably more may be needed for more complex programs. The default -m limit will be automatically bumped up to 200 MB when specifying -Q to afl-fuzz; be careful when overriding this. In principle, if you set CPU_TARGET before calling ./build_qemu_support.sh, you should get a build capable of running non-native binaries (say, you can try CPU_TARGET=arm). This is also necessary for running 32-bit binaries on a 64-bit system (CPU_TARGET=i386). Note: if you want the QEMU helper to be installed on your system for all users, you need to build it before issuing 'make install' in the parent directory. 3) Notes on linking ------------------- The feature is supported only on Linux. Supporting BSD may amount to porting the changes made to linux-user/elfload.c and applying them to bsd-user/elfload.c, but I have not looked into this yet. The instrumentation follows only the .text section of the first ELF binary encountered in the linking process. It does not trace shared libraries. In practice, this means two things: - Any libraries you want to analyze *must* be linked statically into the executed ELF file (this will usually be the case for closed-source apps). - Standard C libraries and other stuff that is wasteful to instrument should be linked dynamically - otherwise, AFL will have no way to avoid peeking into them. Setting AFL_INST_LIBS=1 can be used to circumvent the .text detection logic and instrument every basic block encountered. 4) Benchmarking --------------- If you want to compare the performance of the QEMU instrumentation with that of afl-gcc compiled code against the same target, you need to build the non-instrumented binary with the same optimization flags that are normally injected by afl-gcc, and make sure that the bits to be tested are statically linked into the binary. A common way to do this would be: $ CFLAGS="-O3 -funroll-loops" ./configure --disable-shared $ make clean all Comparative measurements of execution speed or instrumentation coverage will be fairly meaningless if the optimization levels or instrumentation scopes don't match. 5) Gotchas, feedback, bugs -------------------------- If you need to fix up checksums or do other cleanup on mutated test cases, see experimental/post_library/ for a viable solution. Do not mix QEMU mode with ASAN, MSAN, or the likes; QEMU doesn't appreciate the "shadow VM" trick employed by the sanitizers and will probably just run out of memory. Compared to fully-fledged virtualization, the user emulation mode is *NOT* a security boundary. The binaries can freely interact with the host OS. If you somehow need to fuzz an untrusted binary, put everything in a sandbox first. QEMU does not necessarily support all CPU or hardware features that your target program may be utilizing. In particular, it does not appear to have full support for AVX2 / FMA3. Using binaries for older CPUs, or recompiling them with -march=core2, can help. Beyond that, this is an early-stage mechanism, so fields reports are welcome. You can send them to . 6) Alternatives: static rewriting --------------------------------- Statically rewriting binaries just once, instead of attempting to translate them at run time, can be a faster alternative. That said, static rewriting is fraught with peril, because it depends on being able to properly and fully model program control flow without actually executing each and every code path. If you want to experiment with this mode of operation, there is a module contributed by Aleksandar Nikolich: https://github.com/vrtadmin/moflow/tree/master/afl-dyninst https://groups.google.com/forum/#!topic/afl-users/HlSQdbOTlpg At this point, the author reports the possibility of hiccups with stripped binaries. That said, if we can get it to be comparably reliable to QEMU, we may decide to switch to this mode, but I had no time to play with it yet. ================================================ FILE: afl_progs/qemu_mode/build_qemu_support.sh ================================================ #!/bin/sh # # american fuzzy lop - QEMU build script # -------------------------------------- # # Written by Andrew Griffiths and # Michal Zalewski # # Copyright 2015, 2016, 2017 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # This script downloads, patches, and builds a version of QEMU with # minor tweaks to allow non-instrumented binaries to be run under # afl-fuzz. # # The modifications reside in patches/*. The standalone QEMU binary # will be written to ../afl-qemu-trace. # VERSION="2.10.0" QEMU_URL="http://download.qemu-project.org/qemu-${VERSION}.tar.xz" QEMU_SHA384="68216c935487bc8c0596ac309e1e3ee75c2c4ce898aab796faa321db5740609ced365fedda025678d072d09ac8928105" echo "=================================================" echo "AFL binary-only instrumentation QEMU build script" echo "=================================================" echo echo "[*] Performing basic sanity checks..." if [ ! "`uname -s`" = "Linux" ]; then echo "[-] Error: QEMU instrumentation is supported only on Linux." exit 1 fi if [ ! -f "patches/afl-qemu-cpu-inl.h" -o ! -f "../config.h" ]; then echo "[-] Error: key files not found - wrong working directory?" exit 1 fi for i in libtool wget python automake autoconf sha384sum bison iconv; do T=`which "$i" 2>/dev/null` if [ "$T" = "" ]; then echo "[-] Error: '$i' not found, please install first." exit 1 fi done if [ ! -d "/usr/include/glib-2.0/" -a ! -d "/usr/local/include/glib-2.0/" ]; then echo "[-] Error: devel version of 'glib2' not found, please install first." exit 1 fi if echo "$CC" | grep -qF /afl-; then echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool." exit 1 fi echo "[+] All checks passed!" ARCHIVE="`basename -- "$QEMU_URL"`" CKSUM=`sha384sum -- "$ARCHIVE" 2>/dev/null | cut -d' ' -f1` if [ ! "$CKSUM" = "$QEMU_SHA384" ]; then echo "[*] Downloading QEMU ${VERSION} from the web..." rm -f "$ARCHIVE" wget -O "$ARCHIVE" -- "$QEMU_URL" || exit 1 CKSUM=`sha384sum -- "$ARCHIVE" 2>/dev/null | cut -d' ' -f1` fi if [ "$CKSUM" = "$QEMU_SHA384" ]; then echo "[+] Cryptographic signature on $ARCHIVE checks out." else echo "[-] Error: signature mismatch on $ARCHIVE (perhaps download error?)." exit 1 fi echo "[*] Uncompressing archive (this will take a while)..." rm -rf "qemu-${VERSION}" || exit 1 tar xf "$ARCHIVE" || exit 1 echo "[+] Unpacking successful." echo "[*] Configuring QEMU for $CPU_TARGET..." ORIG_CPU_TARGET="$CPU_TARGET" test "$CPU_TARGET" = "" && CPU_TARGET="`uname -m`" test "$CPU_TARGET" = "i686" && CPU_TARGET="i386" cd qemu-$VERSION || exit 1 echo "[*] Applying patches..." patch -p1 <../patches/elfload.diff || exit 1 patch -p1 <../patches/cpu-exec.diff || exit 1 patch -p1 <../patches/syscall.diff || exit 1 patch -p1 <../patches/configure.diff || exit 1 patch -p1 <../patches/memfd.diff || exit 1 patch -p1 <../patches/translate-all.diff || exit 1 patch -p1 <../patches/afl_qemu_optimize_entrypoint.diff || exit 1 echo "[+] Patching done." # --enable-pie seems to give a couple of exec's a second performance # improvement, much to my surprise. Not sure how universal this is.. CFLAGS="-O3 -ggdb" ./configure --disable-system \ --enable-linux-user --disable-gtk --disable-sdl --disable-vnc \ --target-list="${CPU_TARGET}-linux-user" --enable-pie --enable-kvm || exit 1 echo "[+] Configuration complete." echo "[*] Attempting to build QEMU (fingers crossed!)..." make || exit 1 echo "[+] Build process successful!" echo "[*] Copying binary..." cp -f "${CPU_TARGET}-linux-user/qemu-${CPU_TARGET}" "../../afl-qemu-trace" || exit 1 cd .. ls -l ../afl-qemu-trace || exit 1 echo "[+] Successfully created '../afl-qemu-trace'." echo "[+] All set, you can now use the qemu mode in killerbeez!" exit 0 ================================================ FILE: afl_progs/qemu_mode/patches/afl-qemu-cpu-inl.h ================================================ /* american fuzzy lop - high-performance binary-only instrumentation ----------------------------------------------------------------- Written by Andrew Griffiths and Michal Zalewski Idea & design very much by Andrew Griffiths. TCG instrumentation and block chaining support by Andrea Biondo Copyright 2015, 2016, 2017 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 This code is a shim patched into the separately-distributed source code of QEMU 2.10.0. It leverages the built-in QEMU tracing functionality to implement AFL-style instrumentation and to take care of the remaining parts of the AFL fork server logic. The resulting QEMU binary is essentially a standalone instrumentation tool; for an example of how to leverage it for other purposes, you can have a look at afl-showmap.c. */ /* This file has been modified from the original AFL version to incorporate into Killerbeez. Specifically, the fork server has been modified to match the Killerbeez fork server protocol. */ #include #include "../../config.h" #include "../../../instrumentation/forkserver_internal.h" /*************************** * VARIOUS AUXILIARY STUFF * ***************************/ /* This snippet kicks in when the instruction pointer is positioned at _start and does the usual forkserver stuff, not very different from regular instrumentation injected via afl-as.h. */ #define AFL_QEMU_CPU_SNIPPET2 do { \ if(itb->pc == afl_entry_point) { \ afl_setup(); \ afl_forkserver(cpu); \ } \ } while (0) /* We use one additional file descriptor to relay "needs translation" messages between the child and the fork server. */ #define TSL_FD QEMU_TSL_FD //import it from forkserver_internal.h /* This is equivalent to afl-as.h: */ unsigned char dummy[65536]; unsigned char *afl_area_ptr = dummy; /* Exported for afl_gen_trace */ /* Exported variables populated by the code patched into elfload.c: */ abi_ulong afl_entry_point, /* ELF entry point (_start) */ afl_start_code, /* .text start pointer */ afl_end_code; /* .text end pointer */ /* Set in the child process in forkserver mode: */ static unsigned char afl_fork_child; unsigned int afl_forksrv_pid; /* Instrumentation ratio: */ unsigned int afl_inst_rms = MAP_SIZE; /* Exported for afl_gen_trace */ /* Function declarations. */ static void afl_setup(void); static void afl_forkserver(CPUState*); static void afl_wait_tsl(CPUState*, int); static void afl_request_tsl(target_ulong, target_ulong, uint32_t, TranslationBlock*, int); /* Data structures passed around by the translate handlers: */ struct afl_tb { target_ulong pc; target_ulong cs_base; uint32_t flags; }; struct afl_tsl { struct afl_tb tb; char is_chain; }; struct afl_chain { struct afl_tb last_tb; int tb_exit; }; /* Some forward decls: */ TranslationBlock *tb_htable_lookup(CPUState*, target_ulong, target_ulong, uint32_t); static inline TranslationBlock *tb_find(CPUState*, TranslationBlock*, int); /************************* * ACTUAL IMPLEMENTATION * *************************/ /* Set up SHM region and initialize other stuff. */ static void afl_setup(void) { char *id_str = getenv(SHM_ENV_VAR), *inst_r = getenv("AFL_INST_RATIO"); int shm_id; if (inst_r) { unsigned int r; r = atoi(inst_r); if (r > 100) r = 100; if (!r) r = 1; afl_inst_rms = MAP_SIZE * r / 100; } if (id_str) { shm_id = atoi(id_str); afl_area_ptr = shmat(shm_id, NULL, 0); if (afl_area_ptr == (void*)-1) exit(1); /* With AFL_INST_RATIO set to a low value, we want to touch the bitmap so that the parent doesn't give up on us. */ if (inst_r) afl_area_ptr[0] = 1; } if (getenv("AFL_INST_LIBS")) { afl_start_code = 0; afl_end_code = (abi_ulong)-1; } /* pthread_atfork() seems somewhat broken in util/rcu.c, and I'm not entirely sure what is the cause. This disables that behaviour, and seems to work alright? */ rcu_disable_atfork(); } /* Fork server logic, invoked once we hit _start. */ static int forkserver_installed = 0; static void afl_forkserver(CPUState *cpu) { static int response = 0x41414141; char command; int child_pid = -1; int t_fd[2]; if (forkserver_installed == 1) return; forkserver_installed = 1; //if (!afl_area_ptr) return; /* Tell the parent that we're alive. If the parent doesn't want to talk, assume that we're not running in forkserver mode. */ if(write(FORKSRV_TO_FUZZER, &response, sizeof(int)) != sizeof(int)) return; afl_forksrv_pid = getpid(); /* All right, let's await orders... */ while (1) { // Wait for parent by reading from the pipe. Exit if read fails. if(read(FUZZER_TO_FORKSRV, &command, sizeof(command)) != sizeof(command)) _exit(1); switch(command) { case EXIT: case RUN: //QEMU doesn't do the single RUN/FORK commands case FORK: //but instead only implements FORK_RUN _exit(0); break; case FORK_RUN: /* Establish a channel with child to grab translation commands. We'll read from t_fd[0], child will write to TSL_FD. */ if (pipe(t_fd) || dup2(t_fd[1], TSL_FD) < 0) exit(3); close(t_fd[1]); child_pid = fork(); if (child_pid < 0) exit(4); if (!child_pid) { /* Child process. Close descriptors and run free. */ afl_fork_child = 1; close(FUZZER_TO_FORKSRV); close(FORKSRV_TO_FUZZER); close(t_fd[0]); return; } /* Parent. */ response = child_pid; if(write(FORKSRV_TO_FUZZER, &response, sizeof(int)) != sizeof(int)) _exit(1); close(TSL_FD); /* Collect translation requests until child dies and closes the pipe. */ afl_wait_tsl(cpu, t_fd[0]); break; case GET_STATUS: /* Get and relay exit status to parent. */ if(waitpid(child_pid, &response, 0) < 0) _exit(1); if(write(FORKSRV_TO_FUZZER, &response, sizeof(int)) != sizeof(int)) _exit(1); break; } } } /* This code is invoked whenever QEMU decides that it doesn't have a translation of a particular block and needs to compute it, or when it decides to chain two TBs together. When this happens, we tell the parent to mirror the operation, so that the next fork() has a cached copy. */ static void afl_request_tsl(target_ulong pc, target_ulong cb, uint32_t flags, TranslationBlock *last_tb, int tb_exit) { struct afl_tsl t; struct afl_chain c; if (!afl_fork_child) return; t.tb.pc = pc; t.tb.cs_base = cb; t.tb.flags = flags; t.is_chain = (last_tb != NULL); if (write(TSL_FD, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl)) return; if (t.is_chain) { c.last_tb.pc = last_tb->pc; c.last_tb.cs_base = last_tb->cs_base; c.last_tb.flags = last_tb->flags; c.tb_exit = tb_exit; if (write(TSL_FD, &c, sizeof(struct afl_chain)) != sizeof(struct afl_chain)) return; } } /* This is the other side of the same channel. Since timeouts are handled by afl-fuzz simply killing the child, we can just wait until the pipe breaks. */ static void afl_wait_tsl(CPUState *cpu, int fd) { struct afl_tsl t; struct afl_chain c; TranslationBlock *tb, *last_tb; while (1) { /* Broken pipe means it's time to return to the fork server routine. */ if (read(fd, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl)) break; tb = tb_htable_lookup(cpu, t.tb.pc, t.tb.cs_base, t.tb.flags); if(!tb) { mmap_lock(); tb_lock(); tb = tb_gen_code(cpu, t.tb.pc, t.tb.cs_base, t.tb.flags, 0); mmap_unlock(); tb_unlock(); } if (t.is_chain) { if (read(fd, &c, sizeof(struct afl_chain)) != sizeof(struct afl_chain)) break; last_tb = tb_htable_lookup(cpu, c.last_tb.pc, c.last_tb.cs_base, c.last_tb.flags); if (last_tb) { tb_lock(); if (!tb->invalid) { tb_add_jump(last_tb, c.tb_exit, tb); } tb_unlock(); } } } close(fd); } ================================================ FILE: afl_progs/qemu_mode/patches/afl-qemu-translate-inl.h ================================================ /* american fuzzy lop - high-performance binary-only instrumentation ----------------------------------------------------------------- Written by Andrew Griffiths and Michal Zalewski Idea & design very much by Andrew Griffiths. TCG instrumentation and block chaining support by Andrea Biondo Copyright 2015, 2016, 2017 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 This code is a shim patched into the separately-distributed source code of QEMU 2.10.0. It leverages the built-in QEMU tracing functionality to implement AFL-style instrumentation and to take care of the remaining parts of the AFL fork server logic. The resulting QEMU binary is essentially a standalone instrumentation tool; for an example of how to leverage it for other purposes, you can have a look at afl-showmap.c. */ #include "../../config.h" #include "tcg-op.h" /* Declared in afl-qemu-cpu-inl.h */ extern unsigned char *afl_area_ptr; extern unsigned int afl_inst_rms; extern abi_ulong afl_start_code, afl_end_code; /* Generates TCG code for AFL's tracing instrumentation. */ static void afl_gen_trace(target_ulong cur_loc) { static __thread target_ulong prev_loc; TCGv index, count, new_prev_loc; TCGv_ptr prev_loc_ptr, count_ptr; /* Optimize for cur_loc > afl_end_code, which is the most likely case on Linux systems. */ if (cur_loc > afl_end_code || cur_loc < afl_start_code /*|| !afl_area_ptr*/) return; /* Looks like QEMU always maps to fixed locations, so ASAN is not a concern. Phew. But instruction addresses may be aligned. Let's mangle the value to get something quasi-uniform. */ cur_loc = (cur_loc >> 4) ^ (cur_loc << 8); cur_loc &= MAP_SIZE - 1; /* Implement probabilistic instrumentation by looking at scrambled block address. This keeps the instrumented locations stable across runs. */ if (cur_loc >= afl_inst_rms) return; /* index = prev_loc ^ cur_loc */ prev_loc_ptr = tcg_const_ptr(&prev_loc); index = tcg_temp_new(); tcg_gen_ld_tl(index, prev_loc_ptr, 0); tcg_gen_xori_tl(index, index, cur_loc); /* afl_area_ptr[index]++ */ count_ptr = tcg_const_ptr(afl_area_ptr); tcg_gen_add_ptr(count_ptr, count_ptr, TCGV_NAT_TO_PTR(index)); count = tcg_temp_new(); tcg_gen_ld8u_tl(count, count_ptr, 0); tcg_gen_addi_tl(count, count, 1); tcg_gen_st8_tl(count, count_ptr, 0); /* prev_loc = cur_loc >> 1 */ new_prev_loc = tcg_const_tl(cur_loc >> 1); tcg_gen_st_tl(new_prev_loc, prev_loc_ptr, 0); } ================================================ FILE: afl_progs/qemu_mode/patches/afl_qemu_optimize_entrypoint.diff ================================================ --- qemu-2.10.0/linux-user/elfload.c.orig 2018-03-16 11:43:21.000000000 +0100 +++ qemu-2.10.0/linux-user/elfload.c 2018-04-04 05:25:47.535020053 +0200 @@ -2086,8 +2086,21 @@ info->end_data = 0; info->brk = 0; info->elf_flags = ehdr->e_flags; - - if (!afl_entry_point) afl_entry_point = info->entry; + + if (!afl_entry_point) { + char *ptr; + if ((ptr = getenv("AFL_ENTRYPOINT")) != NULL) { + afl_entry_point = strtoul(ptr, NULL, 16); + } else { + if (!afl_entry_point) afl_entry_point = info->entry; + } +#ifdef TARGET_ARM + /* The least significant bit indicates Thumb mode. */ + afl_entry_point = afl_entry_point & ~(target_ulong)1; +#endif + if (getenv("AFL_DEBUG") != NULL) + fprintf(stderr, "AFL forkserver entrypoint: %p\n", (void*)afl_entry_point); + } while(0); for (i = 0; i < ehdr->e_phnum; i++) { struct elf_phdr *eppnt = phdr + i; ================================================ FILE: afl_progs/qemu_mode/patches/configure.diff ================================================ --- a/configure +++ b/configure @@ -3855,7 +3855,7 @@ fi # check if memfd is supported memfd=no cat > $TMPC << EOF -#include +#include int main(void) { ================================================ FILE: afl_progs/qemu_mode/patches/cpu-exec.diff ================================================ --- qemu-2.10.0-clean/accel/tcg/cpu-exec.c 2017-08-30 18:50:40.000000000 +0200 +++ qemu-2.10.0/accel/tcg/cpu-exec.c 2018-09-22 13:21:23.612068407 +0200 @@ -36,6 +36,8 @@ #include "sysemu/cpus.h" #include "sysemu/replay.h" +#include "../patches/afl-qemu-cpu-inl.h" + /* -icount align implementation. */ typedef struct SyncClocks { @@ -144,6 +146,8 @@ int tb_exit; uint8_t *tb_ptr = itb->tc_ptr; + AFL_QEMU_CPU_SNIPPET2; + qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc, "Trace %p [%d: " TARGET_FMT_lx "] %s\n", itb->tc_ptr, cpu->cpu_index, itb->pc, @@ -337,7 +341,7 @@ TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; - bool have_tb_lock = false; + bool have_tb_lock = false, was_translated = false, was_chained = false; /* we record a subset of the CPU state. It will always be the same before a given translated block @@ -365,6 +369,7 @@ if (!tb) { /* if no translated code available, then translate it now */ tb = tb_gen_code(cpu, pc, cs_base, flags, 0); + was_translated = true; } mmap_unlock(); @@ -390,11 +395,16 @@ } if (!tb->invalid) { tb_add_jump(last_tb, tb_exit, tb); + was_chained = true; } } if (have_tb_lock) { tb_unlock(); } + if (was_translated || was_chained) { + afl_request_tsl(pc, cs_base, flags, was_chained ? last_tb : NULL, + tb_exit); + } return tb; } ================================================ FILE: afl_progs/qemu_mode/patches/elfload.diff ================================================ --- qemu-2.10.0.orig/linux-user/elfload.c 2017-08-30 18:50:41.000000000 +0200 +++ qemu-2.10.0/linux-user/elfload.c 2018-10-23 12:48:16.421879765 +0200 @@ -20,6 +20,8 @@ #define ELF_OSABI ELFOSABI_SYSV +extern abi_ulong afl_entry_point, afl_start_code, afl_end_code; + /* from personality.h */ /* @@ -2085,6 +2087,8 @@ info->brk = 0; info->elf_flags = ehdr->e_flags; + if (!afl_entry_point) afl_entry_point = info->entry; + for (i = 0; i < ehdr->e_phnum; i++) { struct elf_phdr *eppnt = phdr + i; if (eppnt->p_type == PT_LOAD) { @@ -2118,9 +2122,11 @@ if (elf_prot & PROT_EXEC) { if (vaddr < info->start_code) { info->start_code = vaddr; + if (!afl_start_code) afl_start_code = vaddr; } if (vaddr_ef > info->end_code) { info->end_code = vaddr_ef; + if (!afl_end_code) afl_end_code = vaddr_ef; } } if (elf_prot & PROT_WRITE) { @@ -2443,6 +2449,22 @@ info, (elf_interpreter ? &interp_info : NULL)); info->start_stack = bprm->p; +#if defined(TARGET_PPC64) && !defined(TARGET_ABI32) + // On PowerPC64 the entry point is the _function descriptor_ + // of the entry function. For AFL to properly initialize, + // afl_entry_point needs to be set to the actual first instruction + // as opposed executed by the target program. This as opposed to + // where the function's descriptor sits in memory. + + // Shameless copy of PPC init_thread + info_report("Adjusting afl_entry_point"); + if (afl_entry_point && (get_ppc64_abi(info) < 2)) { + uint64_t val; + get_user_u64(val, afl_entry_point); + afl_entry_point = val + info->load_bias; + } +#endif + /* If we have an interpreter, set that as the program's entry point. Copy the load_bias as well, to help PPC64 interpret the entry point as a function descriptor. Do this after creating elf tables ================================================ FILE: afl_progs/qemu_mode/patches/memfd.diff ================================================ --- a/util/memfd.c +++ b/util/memfd.c @@ -31,9 +31,7 @@ #include "qemu/memfd.h" -#ifdef CONFIG_MEMFD -#include -#elif defined CONFIG_LINUX +#if defined CONFIG_LINUX && !defined CONFIG_MEMFD #include #include ================================================ FILE: afl_progs/qemu_mode/patches/syscall.diff ================================================ --- qemu-2.10.0-rc3-clean/linux-user/syscall.c 2017-08-15 11:39:41.000000000 -0700 +++ qemu-2.10.0-rc3/linux-user/syscall.c 2017-08-22 14:34:03.193088186 -0700 @@ -116,6 +116,8 @@ #include "qemu.h" +extern unsigned int afl_forksrv_pid; + #ifndef CLONE_IO #define CLONE_IO 0x80000000 /* Clone io context */ #endif @@ -11688,8 +11690,21 @@ break; case TARGET_NR_tgkill: - ret = get_errno(safe_tgkill((int)arg1, (int)arg2, - target_to_host_signal(arg3))); + + { + int pid = (int)arg1, + tgid = (int)arg2, + sig = (int)arg3; + + /* Not entirely sure if the below is correct for all architectures. */ + + if(afl_forksrv_pid && afl_forksrv_pid == pid && sig == SIGABRT) + pid = tgid = getpid(); + + ret = get_errno(safe_tgkill(pid, tgid, target_to_host_signal(sig))); + + } + break; #ifdef TARGET_NR_set_robust_list ================================================ FILE: afl_progs/qemu_mode/patches/translate-all.diff ================================================ --- a/accel/tcg/translate-all.c 2017-08-30 18:50:40.000000000 +0200 +++ b/accel/tcg/translate-all.c 2018-09-21 10:19:42.328766554 +0200 @@ -60,6 +60,8 @@ #include "exec/log.h" #include "sysemu/cpus.h" +#include "../patches/afl-qemu-translate-inl.h" + /* #define DEBUG_TB_INVALIDATE */ /* #define DEBUG_TB_FLUSH */ /* make various TB consistency checks */ @@ -1280,6 +1282,7 @@ tcg_func_start(&tcg_ctx); tcg_ctx.cpu = ENV_GET_CPU(env); + afl_gen_trace(pc); gen_intermediate_code(cpu, tb); tcg_ctx.cpu = NULL; ================================================ FILE: afl_progs/test-instr.c ================================================ /* american fuzzy lop - a trivial program to test the build -------------------------------------------------------- Written and maintained by Michal Zalewski Copyright 2014 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 */ #include #include #include int main(int argc, char** argv) { char buf[8]; if (read(0, buf, 8) < 1) { printf("Hum?\n"); exit(1); } if (buf[0] == '0') printf("Looks like a zero to me!\n"); else printf("A non-zero value? How quaint!\n"); exit(0); } ================================================ FILE: afl_progs/types.h ================================================ /* american fuzzy lop - type definitions and minor macros ------------------------------------------------------ Written and maintained by Michal Zalewski Copyright 2013, 2014, 2015 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 */ #ifndef _HAVE_TYPES_H #define _HAVE_TYPES_H #include #include typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; /* Ugh. There is an unintended compiler / glibc #include glitch caused by combining the u64 type an %llu in format strings, necessitating a workaround. In essence, the compiler is always looking for 'unsigned long long' for %llu. On 32-bit systems, the u64 type (aliased to uint64_t) is expanded to 'unsigned long long' in , so everything checks out. But on 64-bit systems, it is #ifdef'ed in the same file as 'unsigned long'. Now, it only happens in circumstances where the type happens to have the expected bit width, *but* the compiler does not know that... and complains about 'unsigned long' being unsafe to pass to %llu. */ #ifdef __x86_64__ typedef unsigned long long u64; #else typedef uint64_t u64; #endif /* ^__x86_64__ */ typedef int8_t s8; typedef int16_t s16; typedef int32_t s32; typedef int64_t s64; #ifndef MIN # define MIN(_a,_b) ((_a) > (_b) ? (_b) : (_a)) # define MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b)) #endif /* !MIN */ #define SWAP16(_x) ({ \ u16 _ret = (_x); \ (u16)((_ret << 8) | (_ret >> 8)); \ }) #define SWAP32(_x) ({ \ u32 _ret = (_x); \ (u32)((_ret << 24) | (_ret >> 24) | \ ((_ret << 8) & 0x00FF0000) | \ ((_ret >> 8) & 0x0000FF00)); \ }) #ifdef AFL_LLVM_PASS # define AFL_R(x) (random() % (x)) #else # define R(x) (random() % (x)) #endif /* ^AFL_LLVM_PASS */ #define STRINGIFY_INTERNAL(x) #x #define STRINGIFY(x) STRINGIFY_INTERNAL(x) #define MEM_BARRIER() \ asm volatile("" ::: "memory") #define likely(_x) __builtin_expect(!!(_x), 1) #define unlikely(_x) __builtin_expect(!!(_x), 0) #endif /* ! _HAVE_TYPES_H */ ================================================ FILE: corpus/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (corpus) # All of the Windows test programs have precompiled versions, as the DynamoRIO # instrumentation needs exact offsets into the program to know where to hook. # As such, we've included precompiled versions with and listed the offsets, # rather than having the user compile them. if (UNIX) SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${BUILD_DIRECTORY}/killerbeez/corpus/ ) SET( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${BUILD_DIRECTORY}/killerbeez/corpus/ ) SET( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${BUILD_DIRECTORY}/killerbeez/corpus/ ) if (NOT APPLE) add_subdirectory(persist) endif (NOT APPLE) add_subdirectory(hang) add_subdirectory(libtest) add_subdirectory(test) add_subdirectory(network) endif () ================================================ FILE: corpus/afl_test/Makefile ================================================ all: test test32 test-qemu test-fast test-fast-persist test-fast-persist-hang test-fast-deferred test-fast-persist-deferred help: echo "\n\n This Makefile can be used to compile the example test program with AFL instrumentation.\n" \ "Use the AFL_PATH environment variable to specify the path to afl-gcc/afl-clang-fast\n" \ "\n" \ "Example:\n" \ " make AFL_PATH=/path/to/afl/ all" \ "\n" check-afl-gcc: if [ ! "$(shell sh -c 'ls $(AFL_PATH)/afl-gcc > /dev/null; echo $$?')" = "0" ]; then \ echo "Bad compiler specified '$(AFL_PATH)'. Please use the AFL_PATH environment variable to specify the path to afl-gcc"; \ exit 1; \ fi check-afl-clang-fast: if [ ! "$(shell sh -c 'ls $(AFL_PATH)/afl-clang-fast > /dev/null; echo $$?')" = "0" ]; then \ echo "Bad compiler specified '$(AFL_PATH)'. Please use the AFL_PATH environment variable to specify the path to afl-clang-fast"; \ exit 1; \ fi test: check-afl-gcc $(AFL_PATH)/afl-gcc test.c -o test test32: check-afl-gcc $(AFL_PATH)/afl-gcc test.c -m32 -o test32 test-qemu: $(CC) test.c -o test-qemu test-fast: check-afl-clang-fast $(AFL_PATH)/afl-clang-fast test.c -o test-fast test-fast-persist: check-afl-clang-fast $(AFL_PATH)/afl-clang-fast test.c -o test-fast-persist -DPERSIST test-fast-persist-hang: check-afl-clang-fast $(AFL_PATH)/afl-clang-fast test.c -o test-fast-persist-hang -DPERSIST -DHANG test-fast-deferred: check-afl-clang-fast $(AFL_PATH)/afl-clang-fast test.c -o test-fast-deferred -DSLOW_STARTUP -DDEFERRED test-fast-persist-deferred: check-afl-clang-fast $(AFL_PATH)/afl-clang-fast test.c -o test-fast-persist-deferred -DSLOW_STARTUP -DDEFERRED -DPERSIST clean: rm -f test test32 test-qemu test-fast test-fast-persist test-fast-persist-hang test-fast-deferred test-fast-persist-deferred ================================================ FILE: corpus/afl_test/test.c ================================================ #include #include #include int test_func() { char buffer[4]; char * nil = NULL; memset(buffer, 0, 4); read(0, buffer, sizeof(buffer)); if (buffer[0] == 'A') { if (buffer[1] == 'B') { if (buffer[2] == 'C') { if (buffer[3] == 'D') { #ifdef HANG while(1) {} #endif *nil = 'E'; } else { puts("Wrong 3"); } } else { puts("Wrong 2"); } } else { puts("Wrong 1"); } } else { puts("Wrong 0"); } return 0; } int main() { #ifdef SLOW_STARTUP sleep(5); #endif #ifdef DEFERRED __AFL_INIT(); #endif #ifdef PERSIST while(__AFL_LOOP()) { #endif test_func(); #ifdef PERSIST } #endif return 0; } ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/README.md ================================================ # REMATCH_2--Mail_Server--Crackaddr ## Author Information debbie@cromulence.com ### DARPA performer group Cromulence LLC (CROMU) ## Description This binary mimics the function in sendmail that was vulnerable to CVE-2002-1337 commonly known as the crackaddr bug. Sendmail is an email server originally published in 1983. The CVE-2002-1337 was a buffer overflow in the crackaddr function for email address parsing. In 2011, 8 years after the bug was published, security researcher Thomas Dullien (AKA Halvar Flake) showcased the bug as something static analysis tools were unable to detect and created a simplified implementation to demonstrate that point. Since then, multiple teams have used the crackaddr bug as a litmus test for the real world viability of analysis tools. To date some tools have been able to detect the vulnerability in the simplified version but not the original. This implementation closely mimics the original bug in sendmail. ### Feature List The crackaddr function in sendmail was created to parse addresses with a complex combination of embedded parenthesis, angle brackets, quotes, and grouping syntax. The function recreated here adds in some additional complexity from the original function and changes all the special characters to other ascii values. ## Vulnerability The vulnerability is an error in how the while loop keeps track of matching "brackets". A pointer is used to indicate the end of the expected output stream. That pointer is manipulated to make sure there is enough room to close out any opened brackets with matching close brackets. The error occurs when the pointer is incremented on a close bracket, but not decremented on the previous open bracket. This allows the pointer to end up pointing past the end of the actual output buffer, alowing the input stream to overwrite a portion of the stack. ### Generic class of vulnerability Buffer Overflow ### CWE classification CWE-120 Buffer Copy without Checking Size of Input ('Classic Buffer Overflow') ## Challenges The crackaddr function poses a complex challenge to the CRS. The unbounded nature of the input prevents imposing a simple size restriction because that would lead to failing valid polls. The CRS must then interpret exactly which overly long inputs would result in an overflow of the output. The crackaddr function modifies the output bounds based on the sequence of input characters in a while loop which quickly leads to an unmanageable state equation for the CRS. ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/inputs/ADDRESSBOOK.txt ================================================ ADDRESSBOOK QUIT ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/inputs/LIST.txt ================================================ LIST QUIT ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/inputs/LISTALL.txt ================================================ LISTALL QUIT ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/inputs/POST.txt ================================================ POST QUIT ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/inputs/READ.txt ================================================ READ QUIT ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/inputs/crash.txt ================================================ POST sender:sender!recipient:+BCcCcCcCcCcCcCcCcCcCcCcCcCcCcCcCcCcCcJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJIIIIHHHH!body:dump 1128775680!subject:subject! QUIT ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/cgc_ctype.h ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __CTYPE_H__ #define __CTYPE_H__ int cgc_isdigit( int c ); int cgc_islower( int c ); int cgc_isupper( int c ); int cgc_isalpha( int c ); int cgc_isalnum( int c ); int cgc_isprint( int c ); int cgc_isspace( int c ); int cgc_toupper( int c ); int cgc_tolower( int c ); #endif // __CTYPE_H__ ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/cgc_libc.h ================================================ /* Author: Debbie Nuttall Copyright (c) 2015 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef LIBC_H #define LIBC_H #define FLAG_PAGE 0x4347c000 int cgc_receive_all(char *buf, int length); int cgc_receive_all_fd(int fd, char *buf, int length); int cgc_send_all(char *buf, int length); int cgc_send_all_fd(int fd, char *buf, int length); int cgc_receive_until(char *buf, int length, char delim); int cgc_buffered_receive_until(char *buf, int length, char delim); int cgc_receive_until_fd(int fd, char *buf, int length, char delim); int cgc_equals(char *s, char *d); int cgc_replace(char *s, char find, char replace, int num); int cgc_force_newline(char *s, int size, int bytes); int cgc_getopt(int argc, char **argv, char *optstring, int *opt_index); void cgc_bcopy(char *s, char *d, cgc_size_t size); void cgc_exit(int e); #endif ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/cgc_malloc.h ================================================ /* Authors: Cromulence Copyright (c) 2015 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __MALLOC_H__ #define __MALLOC_H__ #include "libcgc.h" void *cgc_calloc( cgc_size_t); void *cgc_malloc( cgc_size_t ); void cgc_free( void * ); #endif ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/cgc_math.h ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __MATH_H__ #define __MATH_H__ double cgc_round( double val ); double cgc_floor( double val ); #define isnan( val ) __builtin_isnan( val ) #define isinf( val ) __builtin_isinf( val ) #endif // __MATH_H__ ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/cgc_prng.h ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PRNG_H__ #define __PRNG_H__ #include "cgc_stdint.h" void cgc_seed_prng_array( uint32_t *array_data, uint32_t array_size ); void cgc_seed_prng( uint32_t seed_value ); uint32_t cgc_prng( void ); uint32_t cgc_random_in_range( uint32_t min, uint32_t max ); #endif // __PRNG_H__ ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/cgc_shell.h ================================================ /* Author: Debbie Nuttall Copyright (c) 2015 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SHELL_H #define SHELL_H #define SECRET_PAGE 0x4347c000 void cgc_runshellcommand(char *cmd); void cgc_shell(); #endif // SHELL_H ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/cgc_stdarg.h ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __STDARG_H__ #define __STDARG_H__ #ifdef WIN #include #else typedef __builtin_va_list va_list; #define va_start(v, l) __builtin_va_start(v, l) #define va_arg(v, l) __builtin_va_arg(v, l) #define va_end(v) __builtin_va_end(v) #endif #endif // __STDARG_H__ ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/cgc_stdint.h ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __STDINT_H__ #define __STDINT_H__ typedef unsigned long long uint64_t; typedef long long int64_t; typedef unsigned int uint32_t; typedef signed int int32_t; typedef unsigned short int uint16_t; typedef signed short int int16_t; typedef unsigned char uint8_t; typedef signed char int8_t; #endif // __STDINT_H__ ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/cgc_stdio.h ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __STDIO_H__ #define __STDIO_H__ #include "libcgc.h" #include "cgc_stdarg.h" int cgc_putchar( int c ); int cgc_printf( const char *format, ... ); int fprintf( int fd, const char *format, ... ); int vprintf( int fd, const char *format, va_list args ); int cgc_sprintf( char *buf, const char *format, ... ); int cgc_vsprintf( char *buf, const char *format, va_list args ); int cgc_puts( const char *s ); #endif // __STDIO_H__ ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/cgc_stdlib.h ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __STDLIB_H__ #define __STDLIB_H__ #include "libcgc.h" #define RAND_MAX 2147483647 int cgc_rand( void ); void cgc_srand( unsigned int seed ); int cgc_atoi( const char *pStr ); double cgc_atof( char *pStr ); char *cgc_strcpy( char *pDest, const char *pSource ); char *cgc_strncpy( char *pDest, const char *pSource, cgc_size_t maxlen ); void *cgc_memcpy( void *pDest, const void *pSrc, cgc_size_t nbytes ); void get_cgc_random( void *dest, int size); #endif // STDLIB_H__ ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/cgc_string.h ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __STRING_H__ #define __STRING_H__ #include "libcgc.h" cgc_size_t cgc_strlen( const char *str ); void cgc_bzero(void *s, cgc_size_t n); void *cgc_memset( void *ptr, int value, cgc_size_t num ); char *cgc_strchr(char *s, int c); char *cgc_strtok(char *str, char *sep); int cgc_strcmp(const char *s1, const char *s2); int cgc_strncmp(const char *s1, const char *s2, cgc_size_t n); char *cgc_strcat(char *restrict s1, const char *restrict s2); char *cgc_strstr(char *s1, char *s2); char *cgc_rindex(char *source, char match); #endif // __STRING_H__ ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/ctype.c ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_ctype.h" #define DEL 0x7f #define SPC 0x20 #define TAB 0x09 #define LF 0x0a #define VT 0x0b #define FF 0x0c #define CR 0x0d int cgc_isdigit( int c ) { if ( c >= '0' && c <= '9' ) return 1; else return 0; } int cgc_isupper( int c ) { if ( c >= 'A' && c <= 'Z' ) return 1; else return 0; } int cgc_islower( int c ) { if ( c >= 'a' && c <= 'z' ) return 1; else return 0; } int cgc_isalpha( int c ) { if ( cgc_isupper( c ) || cgc_islower( c ) ) return 1; else return 0; } int cgc_isalnum( int c ) { if ( cgc_isalpha( c ) || cgc_isdigit( c ) ) return 1; else return 0; } int cgc_isprint( int c ) { if ( c >= SPC && c != DEL ) return 1; else return 0; } int cgc_toupper( int c ) { if ( cgc_islower( c ) ) return (c - 'a') + 'A'; else return c; } int cgc_tolower( int c ) { if ( cgc_isupper( c ) ) return (c - 'A') + 'a'; else return c; } int cgc_isspace( int c ) { if ( c == SPC || c == TAB || c == LF || c == VT || c == FF || c == CR ) return 1; else return 0; } ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/libc.c ================================================ /* Author: Debbie Nuttall Copyright (c) 2015 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_string.h" #include "cgc_stdlib.h" #include "cgc_libc.h" int cgc_receive_all(char *buf, int length) { return cgc_receive_all_fd(STDIN, buf, length); } int cgc_receive_all_fd(int fd, char *buf, int length) { int total_received = 0; int ret; cgc_size_t bytes_received; while (total_received < length) { ret = cgc_receive(fd, buf + total_received, length - total_received, &bytes_received); if (ret !=0 ) { // error return -1; } if (bytes_received == 0) { break; } total_received += bytes_received; } return total_received; } // Receives bytes from the given fd until delim is found. // Output buf will always be null terminated. Output buf does not contain delim. // Returns bytes received not including delim or null int cgc_receive_str_until_fd(int fd, char *buf, int length, char delim) { int total_received = 0; int ret; cgc_size_t bytes_received; char c; buf[0] = '\0'; while (1) { ret = cgc_receive(fd, &c, 1, &bytes_received); if (ret !=0 ) { // error return -1; } if (bytes_received == 0) { break; } if (c == delim) { goto DONE; } if (total_received < length - 1) { *(buf + total_received++) = c; } } DONE: *(buf + total_received) = '\0'; return total_received; } // Receives bytes from the given fd until delim is found. // Output buf will not always be null terminated. Output buf does not contain delim. // Returns bytes received not including delim or null int cgc_receive_until_fd(int fd, char *buf, int length, char delim) { int total_received = 0; int ret; cgc_size_t bytes_received; char c; buf[0] = '\0'; while (1) { ret = cgc_receive(fd, &c, 1, &bytes_received); if (ret !=0 ) { // error return -1; } if (bytes_received == 0) { break; } if (c == delim) { goto DONE; } if (total_received < length) { *(buf + total_received++) = c; } } DONE: if (total_received < length) { *(buf + total_received) = '\0'; } return total_received; } char receive_buf[1024]; char *p_data = receive_buf; int cgc_bytes_in_buffer = 0; int cgc_buffered_receive(char *buf, int length) { int bytes_remaining = length; if (cgc_bytes_in_buffer) { int byte_to_copy = length; if (length > cgc_bytes_in_buffer) { byte_to_copy = cgc_bytes_in_buffer; } cgc_memcpy(buf, p_data, byte_to_copy); bytes_remaining -= byte_to_copy; p_data += byte_to_copy; cgc_bytes_in_buffer -= byte_to_copy; buf += byte_to_copy; if (cgc_bytes_in_buffer == 0) { p_data = receive_buf; } } if (bytes_remaining > 0) { if (cgc_receive_all(buf, bytes_remaining) < 0) { return -1; } } return length; } int cgc_receive_until(char *buf, int length, char delim) { return cgc_buffered_receive_until(buf, length, delim); } int cgc_buffered_receive_until(char *buf, int length, char delim) { char c; int bytes_copied = 0; *buf = 0; while (1) { if (cgc_bytes_in_buffer > 0) { c = *p_data++; cgc_bytes_in_buffer--; if (cgc_bytes_in_buffer == 0) { p_data = receive_buf; } } else { int ret; cgc_size_t bytes_received = 0; ret = cgc_receive(STDIN, receive_buf , 1024 , &bytes_received); if (ret != 0) { return -1; } if (bytes_received == 0) { break; } cgc_bytes_in_buffer = bytes_received - 1; p_data = receive_buf; c = *p_data++; } if (c == delim) { break; } if (bytes_copied < length - 1) { *buf++ = c; bytes_copied++; } } *buf = '\0'; return bytes_copied; } int cgc_send_all(char *buf, int length) { return cgc_send_all_fd(STDOUT, buf, length); } int cgc_send_all_fd(int fd, char *buf, int length) { int total_sent = 0; int ret; cgc_size_t bytes_sent; while(total_sent < length) { ret = cgc_transmit(fd, buf + total_sent, length - total_sent, &bytes_sent); if (ret != 0) { // error return -1; } if (bytes_sent == 0) { break; } total_sent += bytes_sent; } return bytes_sent; } // Return 1 if string 'd' equals string 's', otherwise return 0 int cgc_equals(char *d, char *s) { while (*d == *s) { if (*d == '\0') return 1; s++; d++; } return 0; } // Replace 'find' char with 'replace' char in the string 's' up to 'num' instances. // If 'num' is 0, replace all instances. // Returns number of instances that were replaced. int cgc_replace(char *s, char find, char replace, int num) { int count = 0; while (*s != '\0') { if ((num != 0) && (count >= num)) { break; } if (*s == find) { *s = replace; count++; } s++; } return count; } // Adds a newline character to the end of s. // size informs the function of the total size of the s buffer // length informs the function of the length of useful string within the buffer // The newline will be added at the end of the useful string or, if the string is max length, in the last position of the buffer // Returns the new length of the useful string including the newline character // Buffer will only be null terminated if size permits int cgc_force_newline(char *s, int size, int bytes) { if (bytes == size) { s[bytes - 1] = '\n'; } else { if (s[bytes-1] != '\n') { s[bytes++] = '\n'; } if (bytes < size) { s[bytes] = '\0'; } } return bytes; } int cgc_getopt(int argc, char **argv, char *optstring, int *opt_index) { int option = -1; if (*opt_index >= argc || !argv[*opt_index]) { goto DONE; } for (int i = 0; i < cgc_strlen(optstring); i++) { if (*argv[*opt_index] == optstring[i]) { option = optstring[i]; (*opt_index)++; goto DONE; } } (*opt_index)++; option = 0; DONE: return option; } void cgc_bcopy(char *s, char *d, cgc_size_t size) { while (size > 0) { *d = *s; d++;s++;size--; } } void cgc_exit(int e) { cgc__terminate(e); } ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/malloc.c ================================================ /* Authors: Cromulence Copyright (c) 2015 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_malloc.h" #include "cgc_stdlib.h" #include "cgc_stdio.h" #include "cgc_string.h" typedef struct meta { cgc_size_t length; struct meta *next; struct meta *prev; } meta, *pmeta; #define BUCKET( size ) (size > 1016 ? 0 : size / 8 ) /// Each bucket is the head of a singly linked list /// The size for the bucket can be calculated via index*8 /// However, the freelist bucket 0 also uses the prev pointer pmeta cgc_lookaside[128] = {NULL}; void cgc_link( pmeta linkme ) { pmeta walker = cgc_lookaside[0]; if ( linkme == NULL ) { return; } /// Handle the case where this is <= 1016 if ( linkme->length <= 1016 ) { //cgc_printf("Adding into bucket: $d\n", BUCKET( linkme->length) ); linkme->next = cgc_lookaside[ BUCKET( linkme->length ) ]; cgc_lookaside[ BUCKET( linkme->length ) ] = linkme; return; } while ( walker ) { if ( walker->next == NULL ) { walker->next = linkme; linkme->prev = walker; linkme->next = NULL; return; } else if ( linkme->length < walker->next->length ) { linkme->next = walker->next; linkme->prev = walker; walker->next->prev = linkme; walker->next = linkme; return; } else { walker = walker->next; } } return; } void cgc_add_freelist_block( cgc_size_t length ) { pmeta block = NULL; pmeta walker = NULL; /// Round to the nearest page /// Account for the 4 byte length field length += 4; length = (length + 4095 ) & 0xfffff000; if ( cgc_allocate( length, 0, (void**)&block) != 0 ) { cgc_printf("[ERROR] Allocating a free list block failed: $d\n", length); cgc__terminate(-1); } cgc_bzero( block, length ); block->length = length-4; if ( cgc_lookaside[0] == NULL ) { cgc_lookaside[0] = block; return; } cgc_link( block ); return; } void cgc_free( void *block ) { pmeta nb = NULL; if ( block ) { nb = (pmeta) (( (char*)block) - 4); cgc_link(nb); } return; } void cgc_init_freelist( void ) { pmeta zero_block = NULL; pmeta base_block = NULL; if ( cgc_allocate(4096, 0, (void**)&cgc_lookaside) != 0 ) { cgc_printf("[ERROR] Malloc fail terminate\n"); cgc__terminate(-1); } cgc_bzero( cgc_lookaside[0], 4096); zero_block = cgc_lookaside[0]; base_block = zero_block + 1; /// Keep a zero length head on the freelist for /// ease of organization zero_block->length = 0; zero_block->next = base_block; zero_block->prev = NULL; base_block->length = 4096 - sizeof(meta) - 4; base_block->prev = zero_block; base_block->next = NULL; //cgc_printf("Set up head: $x with walker: $d: $x\n", zero_block, base_block->length, base_block); return; } void cgc_unlink( pmeta block ) { if ( block == NULL ) { return; } if ( block->prev != NULL ) { block->prev->next = block->next; } if ( block->next != NULL ) { block->next->prev = block->prev; } return; } void *cgc_freelist_alloc( cgc_size_t length ) { pmeta walker = NULL; pmeta newone = NULL; /// If there isn't a block on the free list then initialize one /// This should only be the case on the first allocation request if ( cgc_lookaside[0] == NULL ) { cgc_init_freelist(); } walker = (pmeta)cgc_lookaside[0]; // Walk while looking for the smallest useable while ( walker ) { if ( walker->length < length ) { walker = walker->next; } else { break; } } if ( walker == NULL ) { //cgc_printf("no blocks found\n"); cgc_add_freelist_block( length ); return cgc_freelist_alloc(length); } else { //cgc_printf("foudn block size: $d\n", walker->length ); cgc_unlink(walker); /// If the block is less than the size needed for at /// least an 8 byte block then return the whole thing /// That means sizeof(meta) prev and next total 8 bytes /// bytes on the lookaside list if ( walker->length - length < sizeof(meta) ) { /// Skip the 4 byte length return ((char*)walker) + 4; } /// Break the chunk off newone = (pmeta) ( ((char*)walker) + 4 + length ); newone->length = walker->length - (length+4); //cgc_printf("Broke $d into $d and $d\n", walker->length, length, newone->length); walker->length = length; cgc_link(newone); //cgc_printf("Returning size: $d\n", walker->length); return ((char*)walker) + 4; } return NULL; } void *cgc_calloc( cgc_size_t length ) { void *out = cgc_malloc( length ); if ( !out ) { return out; } length = (length+7) & 0xfffffff8; cgc_bzero( out, length); return out; } void *cgc_malloc( cgc_size_t length ) { int bucket = 0; pmeta outb = NULL; // The minimum size for a valid request is 8 bytes if ( length < 8 ) { length = 8; } // Round up to nearest 8 length = (length+7) & 0xfffffff8; bucket = BUCKET(length); if ( bucket == 0 ) { return cgc_freelist_alloc( length ); } else { while ( bucket < 128 ) { if ( cgc_lookaside[ bucket] != NULL ) { break; } bucket++; } } if ( bucket == 128 ) { //cgc_printf("No available buckets freelist alloc\n"); return cgc_freelist_alloc( length ); } else { //cgc_printf("Found bucket: $d\n", bucket); outb = cgc_lookaside[ bucket ]; cgc_lookaside[bucket] = outb->next; return ( (char*)outb ) + 4; } return NULL; } ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/math.c ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_math.h" #include "libcgc.h" double cgc_round( double val ) { // Default -- round away from zero if ( val < 0.0 ) return (double)cgc_rint( (val - 0.5) ); else if ( val > 0.0 ) return (double)cgc_rint( (val + 0.5) ); else return val; } double cgc_floor( double val ) { if ( val < 0.0 ) return (double)cgc_rint( (val - 0.5) ) + 1.0; else if ( val > 0.0 ) return (double)cgc_rint( (val + 0.5) ) - 1.0; else return val; } ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/prng.c ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ // This is an implementation of the WELL RNG 1024a random number generator #include "cgc_prng.h" #include "cgc_stdint.h" #include "cgc_string.h" #define R 32 #define M1 3 #define M2 24 #define M3 10 uint32_t state[R]; uint32_t state_n; void cgc_seed_prng_array( uint32_t *pSeedArray, uint32_t arrayLen ) { uint32_t i; // CLear initial state cgc_bzero( (void *)state, R*sizeof(uint32_t) ); state_n = 0; // Only use a maximum of 32 uint32_t's to seed state if ( arrayLen > 32 ) arrayLen = 32; for ( i = 0; i < arrayLen; i++ ) state[i] = pSeedArray[i]; for ( i = arrayLen; i < R; i++ ) { uint32_t state_value = state[(i-1)&0x1f]; // Mix in some of the previous state, the current iteration, and multiply by a mersenne prime state[i] = (uint32_t)((state_value ^ (state_value >> 30) + i) * 524287); } } void cgc_seed_prng( uint32_t seedValue ) { cgc_seed_prng_array( &seedValue, 1 ); } uint32_t cgc_prng( void ) { // Get new random uint32_t v0 = state[ state_n ]; uint32_t vM1 = state[ (state_n + M1) & 0x1f ]; uint32_t vM2 = state[ (state_n + M2) & 0x1f ]; uint32_t vM3 = state[ (state_n + M3) & 0x1f ]; uint32_t z0 = state[ (state_n+31) & 0x1f ]; uint32_t z1 = v0 ^ (vM1 ^ (vM1 >> 8)); uint32_t z2 = (vM2 ^ (vM2 << 19)) ^ (vM3 ^ (vM3 << 14)); uint32_t newV1 = z1 ^ z2; uint32_t newV0 = (z0 ^ (z0 << 11)) ^ (z1 ^ (z1 << 7)) ^ (z2 ^ (z2 << 13)); state[ state_n ] = newV1; state[ (state_n+31) & 0x1f ] = newV0; state_n = (state_n + 31) & 0x1f; return newV0; } // Random [min,max] (note that is inclusive) uint32_t cgc_random_in_range( uint32_t min, uint32_t max ) { if ( min > max ) return 0; if ( min == max ) return min; uint32_t random_value; uint32_t delta = max - min + 1; // Check for [0, 0xffffffff] if ( delta == 0 ) return cgc_prng(); uint32_t scale_divider = (0xffffffff) / delta; do { random_value = cgc_prng(); // Pick values until they fall into one of the buckets } while ( random_value >= scale_divider * delta); // Scale value back down add min and return return min + (random_value / scale_divider); } ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/shell.c ================================================ /* Author: Debbie Nuttall Copyright (c) 2015 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_libc.h" #include "cgc_stdlib.h" #include "cgc_stdio.h" #include "cgc_string.h" #include "cgc_shell.h" // Runs a shell command void cgc_runshellcommand(char *cmd) { if (cmd == NULL) return; if (*cmd == '\0') return; if (!cgc_strcmp(cmd, "ls")) { cgc_printf("one two three\n"); } else if (!cgc_strcmp(cmd, "pwd")) { cgc_printf(".\n"); } else if (!cgc_strncmp(cmd, "dump ", 5)) { int i = cgc_atoi(cmd + 5); cgc_send_all((char *)i, 4); } else if (!cgc_strcmp(cmd, "help")) { cgc_printf("Here is no help\n"); } else if (!cgc_strcmp(cmd, "info")) { cgc_printf("Here is no info\n"); } else if (!cgc_strcmp(cmd, "exec")) { // type 1 pov? } } void cgc_shell() { // Mimic a shell while (1) { cgc_printf(">"); // Read a command int bytes_received; char input[1024]; bytes_received = cgc_receive_until(input, sizeof(input), '\n'); // Execute command if (cgc_strcmp(input, "exit\n") == 0) { return; } cgc_runshellcommand(input); } } ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/stdio.c ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_stdio.h" #include "cgc_stdlib.h" #include "cgc_stdint.h" #include "cgc_ctype.h" #include "cgc_stdarg.h" #include "cgc_string.h" #include "cgc_math.h" #define FLAG_LEFT_JUSTIFY 1 #define FLAG_DISPLAY_SIGN 2 #define FLAG_ZERO_PAD 4 #define FLAG_HEX_UPPERCASE 8 #define FLAG_FLOAT_EXPONENT 16 #define FLOAT_NON_EXPONENT_MAX 10000000000.0 #define DEFAULT_FLOAT_PRECISION 6 // Wrapper functions for vprintf and vsprintf typedef int (*tPrintfWrapperFP)( void *ctx, int c, cgc_size_t pos ); int cgc_wrapper_output( void *ctx, tPrintfWrapperFP fpOut, cgc_size_t pos, const char *format, va_list args ); int cgc_WRAPPER_PUTC( void *ctx, int c, cgc_size_t pos ) { cgc_size_t tx_bytes; if ( cgc_transmit( STDOUT, (const void *)&c, 1, &tx_bytes ) != 0 ) return (-1); return (pos+1); } int cgc_WRAPPER_OUTC( void *ctx, int c, cgc_size_t pos ) { *(((char *)ctx)+pos) = (char)c; return (pos+1); } #define BUFFER_PUTC_MAXLEN 256 struct BUFFER_PUTC_DATA { char szBuffer[BUFFER_PUTC_MAXLEN]; uint16_t bufferPos; }; typedef struct BUFFER_PUTC_DATA tBufferPutcData; tBufferPutcData g_putcBuffer; int cgc_WRAPPER_BUFFER_PUTC( void *ctx, int c, cgc_size_t pos ) { tBufferPutcData *pBufferData = (tBufferPutcData *)ctx; if ( pBufferData->bufferPos >= BUFFER_PUTC_MAXLEN ) { char *pBufferPos = pBufferData->szBuffer; while ( pBufferData->bufferPos > 0 ) { cgc_size_t tx_bytes; if ( cgc_transmit( STDOUT, (const void *)pBufferPos, pBufferData->bufferPos, &tx_bytes ) != 0 ) return (-1); if ( tx_bytes == 0 ) return (-1); pBufferData->bufferPos -= tx_bytes; pBufferPos += tx_bytes; } } pBufferData->szBuffer[pBufferData->bufferPos++] = (char)c; return (pos+1); } int cgc_putchar( int c ) { cgc_size_t tx_bytes; if ( cgc_transmit( STDOUT, (const void *)&c, 1, &tx_bytes ) != 0 ) return (-1); return (c); } int cgc_puts( const char *s ) { cgc_size_t tx_bytes; cgc_size_t s_len; cgc_size_t total_sent = 0; s_len = cgc_strlen(s); while (total_sent != s_len) { if ( cgc_transmit( STDOUT, s+total_sent, s_len-total_sent, &tx_bytes ) != 0 ) { return (-1); } if (tx_bytes == 0) { return (-1); } total_sent += tx_bytes; } cgc_putchar( '\n' ); return (0); } int cgc_vprintf_buffered( const char *format, va_list args ) { tPrintfWrapperFP wrapper_putc_buffered = &cgc_WRAPPER_BUFFER_PUTC; tBufferPutcData g_putcBuffer; g_putcBuffer.bufferPos = 0; void *ctx = (void *)&g_putcBuffer; cgc_size_t pos = 0; int iReturn = cgc_wrapper_output( ctx, wrapper_putc_buffered, pos, format, args ); // Cleanup buffer char *pBufferPos = g_putcBuffer.szBuffer; while ( g_putcBuffer.bufferPos > 0 ) { cgc_size_t tx_bytes; if ( cgc_transmit( STDOUT, (const void *)pBufferPos, g_putcBuffer.bufferPos, &tx_bytes ) != 0 ) return (-1); if ( tx_bytes == 0 ) return (-1); g_putcBuffer.bufferPos -= tx_bytes; pBufferPos += tx_bytes; } return iReturn; } int cgc_printf( const char *format, ... ) { va_list args; va_start(args, format); int return_val = cgc_vprintf_buffered( format, args ); va_end(args); return (return_val); } #if 0 int vprintf( const char *format, va_list args ) { tPrintfWrapperFP wrapper_putc = &WRAPPER_PUTC; void *ctx = NULL; cgc_size_t pos = 0; return wrapper_output( ctx, wrapper_putc, pos, format, args ); } #endif int cgc_sprintf( char *buf, const char *format, ... ) { va_list args; va_start(args, format); int return_val = cgc_vsprintf( buf, format, args ); va_end(args); return (return_val); } int cgc_vsprintf( char *buf, const char *format, va_list args ) { tPrintfWrapperFP wrapper_outc = &cgc_WRAPPER_OUTC; void *ctx = buf; cgc_size_t pos = 0; int iReturnValue = cgc_wrapper_output( ctx, wrapper_outc, pos, format, args ); (*wrapper_outc)( ctx, '\0', iReturnValue ); return iReturnValue; } // NOTE This is reversed -- it will be printed in reverse by the printf helper! cgc_size_t cgc_printf_int_to_string( uint32_t val, uint32_t base, char *str, int32_t flags ) { cgc_size_t pos = 0; int32_t n; if ( val == 0 ) { str[0] = '0'; return 1; } while ( val > 0 ) { n = val % base; val = val / base; if ( base == 16 ) { if ( n < 10 ) str[pos++] = '0' + n; else { if ( flags & FLAG_HEX_UPPERCASE ) str[pos++] = 'A' + (n-10); else str[pos++] = 'a' + (n-10); } } else str[pos++] = '0' + n; } return (pos); } cgc_size_t cgc_printf_helper_int( void *ctx, tPrintfWrapperFP fpOut, cgc_size_t pos, int32_t val, uint32_t base, int32_t width, int32_t precision, int32_t flags ) { cgc_size_t max_printlen = 0; cgc_size_t pad_length = 0; int8_t is_negative = 0; cgc_size_t character_count = 0; char temp_str[32]; if ( base == 10 && val < 0 ) { is_negative = 1; val = -val; max_printlen++; } character_count = cgc_printf_int_to_string( (uint32_t)val, base, temp_str, flags ); max_printlen += character_count; if ( width > 0 ) { if ( max_printlen < width ) pad_length = width-max_printlen; } // Precision will override width if ( precision > 0 ) { flags |= FLAG_ZERO_PAD; if ( character_count < precision ) pad_length = precision-character_count; } if ( !(flags & FLAG_LEFT_JUSTIFY) ) { if ( is_negative && pad_length > 0 && (flags & FLAG_ZERO_PAD) ) { pos = (*fpOut)( ctx, '-', pos ); is_negative = 0; } while ( pad_length-- > 0 ) { if ( (flags & FLAG_ZERO_PAD) ) pos = (*fpOut)( ctx, '0', pos ); else pos = (*fpOut)( ctx, ' ', pos ); } } if ( is_negative ) { pos = (*fpOut)( ctx, '-', pos ); is_negative = 0; } cgc_size_t i = character_count; while ( i > 0 ) { pos = (*fpOut)( ctx, temp_str[i-1], pos ); i--; } if ( (flags & FLAG_LEFT_JUSTIFY) ) { while ( pad_length-- > 0 ) pos = (*fpOut)( ctx, ' ', pos ); } return pos; } cgc_size_t cgc_printf_float_to_string( double val, uint8_t fraction_precision_digit_count, char *str, int32_t flags ) { cgc_size_t pos = 0; int32_t n; double display_precision = cgc_pow( 10.0, -fraction_precision_digit_count ); if ( val == 0.0 ) { str[pos++] = '0'; str[pos++] = '.'; for ( uint8_t i = 0; i < fraction_precision_digit_count; i++ ) str[pos++] = '0'; if ( flags & FLAG_FLOAT_EXPONENT ) { str[pos++] = 'e'; str[pos++] = '+'; str[pos++] = '0'; str[pos++] = '0'; str[pos++] = '0'; } return pos; } else if ( isnan( val ) ) { str[pos++] = 'N'; str[pos++] = 'a'; str[pos++] = 'N'; return pos; } else if ( isinf( val ) ) { str[pos++] = 'I'; str[pos++] = 'N'; str[pos++] = 'F'; return pos; } // Impose a maximal amount before switching to exponent mode if ( val >= FLOAT_NON_EXPONENT_MAX ) flags |= FLAG_FLOAT_EXPONENT; if ( val < 0.0 ) val = val - (display_precision * 0.5); else val = val + (display_precision * 0.5); // Calculate magnitude! int16_t magnitude = cgc_log10( val ); // Calculate round position if ( flags & FLAG_FLOAT_EXPONENT ) { double new_round_precision; int16_t round_position = magnitude - fraction_precision_digit_count; if ( val < 1.0 ) new_round_precision = cgc_pow( 10, round_position-1 ); else new_round_precision = cgc_pow( 10, round_position ); //if ( new_round_precision < display_precision ) display_precision = new_round_precision; } // HANDLE negative if ( val < 0.0 ) { val = -val; str[pos] = '-'; pos++; } // Will be set to magnitude on first digit... int16_t exponent_value = 0; uint16_t fraction_count = 0; int8_t is_fraction_digits = 0; if ( val < display_precision ) { str[pos++] = '0'; str[pos++] = '.'; is_fraction_digits = 1; } else if ( magnitude < 0 && val > display_precision && !(flags & FLAG_FLOAT_EXPONENT) ) { str[pos++] = '0'; str[pos++] = '.'; int16_t temp_zero_count = magnitude; while ( ++temp_zero_count < 0 ) { str[pos++] = '0'; fraction_count++; } is_fraction_digits = 1; } while ( magnitude >= 0 || val > display_precision ) { double divider = cgc_pow( 10.0, magnitude ); if ( divider > 0.0 && !isinf(divider) ) { uint8_t digit = (uint8_t)cgc_floor( val / divider ); val -= ((double)digit * divider); if ( flags & FLAG_FLOAT_EXPONENT && is_fraction_digits == 0 ) { // First digit... print it followed by a decimal... if ( exponent_value == 0 ) exponent_value = magnitude; if ( digit == 0 ) exponent_value--; else { str[pos++] = '0' + digit; str[pos++] = '.'; // Remember we are in fraction mode now (exponent mode) -- to terminate at display precision is_fraction_digits = 1; } } else { if ( magnitude < 0.0 && is_fraction_digits == 0 ) { str[pos++] = '0'; str[pos++] = '.'; is_fraction_digits = 1; } str[pos++] = '0' + digit; if ( is_fraction_digits ) fraction_count++; } } if ( magnitude == 0.0 && val > 0.0 && !(flags & FLAG_FLOAT_EXPONENT) ) { str[pos++] = '.'; is_fraction_digits = 1; } if ( fraction_count >= fraction_precision_digit_count ) break; magnitude--; } while ( is_fraction_digits && fraction_count < fraction_precision_digit_count ) { // ADD 0's str[pos++] = '0'; fraction_count++; } if ( (flags & FLAG_FLOAT_EXPONENT) ) { // ADD exponent str[pos++] = 'e'; if ( exponent_value < 0 ) { exponent_value = -exponent_value; str[pos++] = '-'; } else str[pos++] = '+'; if ( exponent_value == 0 ) { for ( uint8_t i = 0; i < 3; i++ ) str[pos++] = '0'; } else { uint8_t exponent_digit_count = 0; uint16_t exponent_magnitude = cgc_log10( exponent_value ); for ( uint8_t i = exponent_magnitude; i < 2; i++ ) { str[pos++] = '0'; exponent_digit_count++; } while ( exponent_digit_count++ < 3 ) { uint16_t exponent_divider = cgc_pow( 10, exponent_magnitude ); uint8_t exponent_digit = (exponent_value / exponent_divider); str[pos++] = '0' + exponent_digit; exponent_value -= (exponent_digit * exponent_divider) ; exponent_magnitude--; } } } return (pos); } cgc_size_t cgc_printf_helper_float( void *ctx, tPrintfWrapperFP fpOut, cgc_size_t pos, double val, int32_t width, int32_t precision, int32_t flags ) { cgc_size_t max_printlen = 0; cgc_size_t pad_length = 0; int8_t is_negative = 0; cgc_size_t character_count = 0; char temp_str[32]; if ( val < 0.0 ) { val = -val; is_negative = 1; max_printlen++; } if ( precision == 0 ) character_count = cgc_printf_float_to_string( val, DEFAULT_FLOAT_PRECISION, temp_str, flags ); else character_count = cgc_printf_float_to_string( val, precision, temp_str, flags ); max_printlen += character_count; if ( width > 0 ) { if ( max_printlen < width ) pad_length = width-max_printlen; } if ( !(flags & FLAG_LEFT_JUSTIFY) ) { if ( is_negative && pad_length > 0 && (flags & FLAG_ZERO_PAD) ) { pos = (*fpOut)( ctx, '-', pos ); is_negative = 0; } while ( pad_length-- > 0 ) { if ( (flags & FLAG_ZERO_PAD) ) pos = (*fpOut)( ctx, '0', pos ); else pos = (*fpOut)( ctx, ' ', pos ); } } if ( is_negative ) { pos = (*fpOut)( ctx, '-', pos ); is_negative = 0; } for ( cgc_size_t i = 0; i < character_count; i++ ) pos = (*fpOut)( ctx, temp_str[i], pos ); if ( (flags & FLAG_LEFT_JUSTIFY) ) { while ( pad_length-- > 0 ) pos = (*fpOut)( ctx, ' ', pos ); } return (pos); } cgc_size_t cgc_printf_helper_string( void *ctx, tPrintfWrapperFP fpOut, cgc_size_t pos, const char *outStr, int32_t width, int32_t precision, int32_t flags ) { if ( precision == 0 && width == 0 ) { // Fast print while ( *outStr != '\0' ) { pos = (*fpOut)( ctx, *outStr, pos ); outStr++; } return (pos); } cgc_size_t max_printlen = cgc_strlen( outStr ); cgc_size_t pad_length = 0; if ( precision > 0 ) { if ( max_printlen > precision ) max_printlen = precision; } if ( width > 0 ) { if ( max_printlen < width ) pad_length = width-max_printlen; } if ( !(flags & FLAG_LEFT_JUSTIFY) ) { while ( pad_length-- > 0 ) { if ( (flags & FLAG_ZERO_PAD) ) pos = (*fpOut)( ctx, '0', pos ); else pos = (*fpOut)( ctx, ' ', pos ); } } // Output string up to maxlength while ( max_printlen-- > 0 ) pos = (*fpOut)( ctx, *outStr++, pos ); if ( (flags & FLAG_LEFT_JUSTIFY) ) { while ( pad_length-- > 0 ) pos = (*fpOut)( ctx, ' ', pos ); } return pos; } int cgc_wrapper_output( void *ctx, tPrintfWrapperFP fpOut, cgc_size_t pos, const char *format, va_list args ) { int32_t flags = 0; int32_t width = 0; int32_t pad_length = 0; int32_t precision = 0; while ( *format != '\0' ) { char curChar = *format; format++; if ( curChar == '$' ) { flags = width = pad_length = precision = 0; if ( *format == '\0' ) break; if ( *format == '$' ) { // Emit % pos = (*fpOut)( ctx, '$', pos ); continue; } if ( *format == '-' ) { flags |= FLAG_LEFT_JUSTIFY; format++; if ( *format == '\0' ) break; } // Check width if ( cgc_isdigit( *format ) ) { if ( *format == '0' ) flags |= FLAG_ZERO_PAD; const char *startpos = format; while ( cgc_isdigit( *format ) ) format++; width = cgc_atoi( startpos ); if ( *format == '\0' ) break; } // Check precision if ( *format == '.' ) { format++; if ( *format == '\0' ) break; const char *startpos = format; while ( cgc_isdigit( *format ) ) format++; precision = cgc_atoi( startpos ); if ( *format == '\0' ) break; } switch ( *format ) { case 's': { // String const char *print_str = va_arg( args, char * ); pos = cgc_printf_helper_string( ctx, fpOut, pos, print_str, width, precision, flags ); } break; case 'd': { // Print integer int32_t print_int = va_arg( args, int32_t ); pos = cgc_printf_helper_int( ctx, fpOut, pos, print_int, 10, width, precision, flags ); } break; case 'x': { // Print hex (lower case) int32_t print_int = va_arg( args, int32_t ); pos = cgc_printf_helper_int( ctx, fpOut, pos, print_int, 16, width, precision, flags ); } break; case 'X': { // Print hex (upper case) flags |= FLAG_HEX_UPPERCASE; int32_t print_int = va_arg( args, int32_t ); pos = cgc_printf_helper_int( ctx, fpOut, pos, print_int, 16, width, precision, flags ); } break; case 'f': { // Print float double print_float = va_arg( args, double ); pos = cgc_printf_helper_float( ctx, fpOut, pos, print_float, width, precision, flags ); } break; case 'e': { // Print float -- force exponent mode flags |= FLAG_FLOAT_EXPONENT; double print_float = va_arg( args, double ); pos = cgc_printf_helper_float( ctx, fpOut, pos, print_float, width, precision, flags ); } break; case 'n': { int32_t *signed_int_p = va_arg( args, int32_t* ); (*signed_int_p) = pos; } break; case 'c': { // Print single char char char_arg = (char )va_arg( args, int ); char temp_str[2]; temp_str[0] = char_arg; temp_str[1] = '\0'; pos = cgc_printf_helper_string( ctx, fpOut, pos, temp_str, width, 0, flags ); } break; default: // Unsupported break; } format++; } else pos = (*fpOut)( ctx, curChar, pos ); } return (pos); } ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/stdlib.c ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_stdlib.h" #include "cgc_stdint.h" #include "cgc_ctype.h" #include "cgc_prng.h" #define LONG_MIN (0x80000000L) #define LONG_MAX (0x7FFFFFFFL) int cgc_rand( void ) { return (cgc_random_in_range( 0, RAND_MAX-1 )); } void cgc_srand( unsigned int seed ) { cgc_seed_prng( seed ); } int cgc_atoi( const char *pStr ) { int value = 0; int negative = 0; while ( cgc_isspace( *pStr ) ) pStr++; if ( *pStr == '\0' ) return 0; if ( *pStr == '-' ) { negative = 1; pStr++; } // Read in string while ( cgc_isdigit( *pStr ) ) value = (value * 10) + (*pStr++ - '0'); if ( negative ) return (-value); else return value; } double cgc_atof( char *pStr ) { double whole; double fraction = 0.0; char *pWhole = pStr; char *pFraction; // find the decimal point pFraction = pStr; while ( *pFraction != '\0' ) { if (*pFraction == '.') { *pFraction = '\0'; pFraction++; break; } pFraction++; } // convert the whole part whole = cgc_atoi(pWhole); // convert the fractional part if (*pFraction != '\0') { fraction = cgc_atoi(pFraction); while ( pFraction != '\0' && cgc_isdigit( *pFraction ) ) { fraction /= 10.0; pFraction++; } } return ( whole + fraction ); } char *cgc_strcpy( char *pDest, const char *pSrc ) { char *pDestReturn = pDest; while ( *pSrc != '\0' ) *pDest++ = *pSrc++; *pDest = '\0'; return (pDestReturn); } char *cgc_strncpy( char *pDest, const char *pSrc, cgc_size_t maxlen ) { cgc_size_t n; for ( n = 0; n < maxlen; n++ ) { if ( pSrc[n] == '\0' ) break; pDest[n] = pSrc[n]; } for ( ; n < maxlen; n++ ) pDest[n] = '\0'; return (pDest); } void *cgc_memcpy( void *pDest, const void *pSource, cgc_size_t nbytes ) { void *pDestReturn = pDest; while ( nbytes >= 4 ) { *((uint32_t*)pDest) = *((uint32_t*)pSource); pDest += 4; pSource += 4; nbytes-=4; } while ( nbytes > 0 ) { *((uint8_t*)pDest) = *((uint8_t*)pSource); pDest++; pSource++; nbytes--; } return (pDestReturn); } long int cgc_strtol( const char *str, char **endptr, int base ) { long int value = 0; int neg = 0; if ( str == NULL ) return (0); if ( base >= 16 ) base = 16; // Skip whitespace while ( cgc_isspace( *str ) ) str++; if ( *str == '-' ) { neg = 1; str++; } else if ( *str == '+' ) str++; if ( (base == 16 || base == 0) && *str == '0' && (*(str+1) == 'x' || *(str+1) == 'X') ) { str+=2; base = 16; } else if ( (base == 0 || base == 2 ) && *str == '0' && (*(str+1) == 'b' || *(str+1) == 'B') ) { str+=2; base = 2; } if ( base == 0 ) { if ( *str == '0' ) { base = 8; } else base = 10; } unsigned long cutoff_value = 0; if ( neg ) cutoff_value = -(unsigned long)LONG_MIN; else cutoff_value = (unsigned long)LONG_MAX; long int cutlim_value = cutoff_value % (unsigned long)base; cutoff_value /= base; while ( *str != '\0' ) { int c = *str; if ( cgc_isdigit( c ) ) c -= '0'; else if ( cgc_isalpha(c) ) { if ( cgc_isupper(c) ) c -= ('A' - 10); else c -= ('a' - 10); } else break; if ( c >= base ) break; if ( value > cutoff_value || (value == cutoff_value && c > cutlim_value) ) break; else { value *= base; value += c; } str++; } // Check if we should set endptr if ( endptr ) *endptr = (char *)str; if ( neg ) return -value; else return value; } ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/lib/string.c ================================================ /* Copyright (c) 2015 Cromulence LLC Authors: Cromulence Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_string.h" #include "cgc_stdint.h" cgc_size_t cgc_strlen( const char *str ) { cgc_size_t len = 0; while ( *str++ != '\0' ) len++; return len; } void cgc_bzero(void *s, cgc_size_t n) { while (n) { ((char *)s)[--n] = '\0'; } ((char *)s)[n] = '\0'; } void *cgc_memset( void *ptr, int value, cgc_size_t num ) { void *ptr_temp = ptr; uint8_t set_value_byte = (uint8_t)value; uint32_t set_value_dword = (set_value_byte << 24) | (set_value_byte << 16) | (set_value_byte << 8) | set_value_byte; while ( num >= 4 ) { *((uint32_t*)ptr) = set_value_dword; ptr+=4; num-=4; } while ( num > 0 ) { *((uint8_t*)ptr++) = set_value_byte; num--; } return (ptr_temp); } char *cgc_strchr(char *s, int c) { uint32_t i; if (!s) { return(NULL); } for (i = 0; i < cgc_strlen(s); i++) { if (s[i] == c) { return(s+i); } } return(NULL); } char *cgc_StrtokNext = NULL; char *cgc_strtok(char *str, char *sep) { uint32_t i, j; uint32_t str_len; char *tok; if (!sep) { return(NULL); } if (!str) { if (!cgc_StrtokNext) { return(NULL); } else { str = cgc_StrtokNext; } } // deal with any leading sep chars while (cgc_strchr(sep, *str) && *str != '\0') { str++; } if (*str == '\0') { cgc_StrtokNext = NULL; return(NULL); } str_len = cgc_strlen(str); for (i = 0; i < str_len; i++) { if (cgc_strchr(sep, str[i])) { // found a sep character str[i] = '\0'; // see if there are any subsequent tokens for (j = i+1; j < str_len; j++) { if (cgc_strchr(sep, str[j])) { // found one str[j] = '\0'; } else { // no more tokens cgc_StrtokNext = str+j; return(str); } } if (j == str_len) { cgc_StrtokNext = NULL; } return(str); } } // made it to the end of the string without any new tokens cgc_StrtokNext = NULL; return(str); } int cgc_strcmp(const char *s1, const char *s2) { while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; } return *s1 - *s2; } int cgc_strncmp(const char *s1, const char *s2, cgc_size_t n) { while (*s1 != '\0' && *s1 == *s2 && n > 0) { s1++; s2++; n--; } if (n == 0) { return 0; } else { return *s1 - *s2; } } char *cgc_strcat(char *restrict s1, const char *restrict s2) { char *dest = s1; while(*dest != '\0') dest++; while (*s2 != '\0') { *dest++ = *s2++; } *dest = '\0'; return s1; } char *cgc_strstr(char *s1, char *s2) { if ((s1 == NULL) || (s2 == NULL)) { return NULL; } int s2len = cgc_strlen(s2); while(*s1 != '\0') { if (cgc_strncmp(s1, s2, s2len) == 0) { return s1; } s1++; } return NULL; } char *cgc_rindex(char *source, char match) { int length = cgc_strlen(source); for(int i=length; i>=0; i--) { if (source[i] == match) { return (source + i); } } return NULL; } ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/notes.txt ================================================ In the compiled REMATCH_2--Mail_Server--Crackaddr.exe executable, the main function is at offset 0x8340 (0x408340 address). Also, I replaced the random seed with 0x11223344 to try to reduce the instability in the code coverage. ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/src/cgc_crackaddr.h ================================================ #ifndef CRACKADDR_H #define CRACKADDR_H #define MAX_LINE 2500 #define MAX_NAME 1024 #define TRUE 1 #define FALSE 0 char *cgc_crackaddr(char *addr, char *output); #endif ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/src/cgc_sendmail.h ================================================ /* Author: Debbie Nuttall Copyright (c) 2015 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SENDMAIL_H #define SENDMAIL_H typedef struct address_s { char *name; char *home_server; int mode; struct address_s *next; }address; typedef struct message_s { address *sender; address *recipient; char *subject; int content_type; char *data; int data_length; struct message_s *next; }message; typedef struct address_book_s { int num_entries; address *root; } address_book; typedef struct mail_queue_s { int num_messages; char *name; message *root; struct mail_queue_s *next; }mail_queue; extern address_book *abook; extern mail_queue *root_queue; char *cgc_gen_random_string(int min, int max); address *cgc_add_random_addressbook_entry() ; void cgc_initialize_address_book() ; address *cgc_pick_address() ; mail_queue *cgc_add_random_queue() ; message *cgc_add_random_message(mail_queue *mq); void cgc_initialize_mail_queues(); void cgc_print_address_book(); void cgc_sendmail_post(char *line); mail_queue *cgc_locate_queue(char *name); void cgc_list_queue(char *line); void cgc_list_all_queues() ; #endif ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/src/crackaddr.c ================================================ /* Author: Debbie Nuttall Copyright (c) 2015 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_stdio.h" #include "cgc_ctype.h" #include "cgc_libc.h" #include "cgc_crackaddr.h" char *cgc_crackaddr(char *addr, char *output) { char *outp = output; char *endp = output + MAX_NAME - 5; char c; if (addr == NULL) { return NULL; } // strip whitespace while (*addr && cgc_isspace(*addr)) addr++; // Setup state int inA, inC; inA = inC = 0; int Blevel, Blevelout; Blevel = Blevelout = 0; int full; int Dmode = 0; int outputLevel = 0; while (*addr != '\0') { c = *addr++; full = outp >= endp; if ((!full) && (outputLevel > 0)){ *outp++ = c; } if (c == 'A') { outputLevel = 0; if (!inA) { inA = TRUE; endp--; } } if (c == 'a') { outputLevel++; if (outputLevel <= 1) { *outp++ = c; } if (inA) { inA = FALSE; endp++; } } // Equivalent to parents from original crackaddr // Bb can be embedded in other Bb if (c == 'B') { Blevel++; if (!full) { Blevelout++; endp--; outputLevel++; } } if ((c == 'b') && (Blevel > 0)) { Blevel--; outputLevel--; if (!full) { Blevelout--; endp++; } continue; } else if (c == 'b') { // Syntax error unmatched 'b' if ((!full) && (outputLevel > 0)){ outp--; } } // Equivalent to angle brackets from original crackaddr if ((c == 'C') && (!inC)) { inC = TRUE; #ifdef PATCHED_1 endp--; #endif } if ((c == 'c') && (inC)) { inC = FALSE; endp++; } if (c == 'D') { Dmode = !Dmode; } // Escapes next char if ((c == 'F') && (!full)) { if (*addr != '\0') { *outp++ = *addr++; } continue; } // Skips next char if (c == 'G') { if (*addr != '\0') { addr++; } continue; } } if (inA) { *outp++ = 'a'; } while((Blevelout-- > 0)&&(outp < endp - 2)) { *outp++ = 'b'; } if (inC) { *outp++ = 'c'; } if (Dmode) { *outp++ = 'D'; } *outp++ = '\0'; return output; } ================================================ FILE: corpus/cgc/REMATCH_2--Mail_Server--Crackaddr/src/sendmail.c ================================================ /* Author: Debbie Nuttall Copyright (c) 2015 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_stdio.h" #include "cgc_libc.h" #include "cgc_stdlib.h" #include "cgc_string.h" #include "cgc_ctype.h" #include "cgc_malloc.h" #include "cgc_shell.h" #include "cgc_prng.h" #include "cgc_crackaddr.h" #include "cgc_sendmail.h" #define FD_FROM_MAIN STDIN #define FD_TO_MAIN 5 int cgc_debug_mode = 0; address_book *abook; mail_queue *root_queue; char *cgc_gen_random_string(int min, int max) { int length = cgc_random_in_range(min, max); char *s = cgc_calloc(length); for(int i=0; iname = cgc_gen_random_string(5, 32); a->home_server = cgc_gen_random_string(5, 32); a->mode = cgc_random_in_range(0,1); address *n = abook->root; if (n == NULL) { // First Entry abook->root = a; } else { while (n->next != NULL) { n = n->next; } n->next = a; } abook->num_entries++; return a; } void cgc_initialize_address_book() { abook->num_entries = 0; abook->root = 0; for (int i = 0; i < 20; i++) { cgc_add_random_addressbook_entry(); } } address *cgc_pick_address() { int i = 0; address *next = abook->root; while (next != NULL) { i++; next = next->next; } int pick = cgc_random_in_range(0, i-1); next = abook->root; while (next->next != NULL && pick > 0) { next = next->next; pick--; } return next; } address *cgc_lookup_name(char *name) { address *next = abook->root; while (next != NULL) { if (cgc_strcmp(next->name, name) == 0) { break; } next = next->next; } return next; } mail_queue *cgc_add_random_queue() { mail_queue *mq = cgc_calloc(sizeof(mail_queue)); address *sender = cgc_pick_address(); mq->name = sender->name; mq->root = cgc_add_random_message(mq); // TODO add more messages if (root_queue != NULL) { mail_queue *next = root_queue; while (next->next != NULL) { next = next->next; } next->next = mq; } return mq; } message *cgc_add_random_message(mail_queue *mq) { message *m = cgc_calloc(sizeof(message)); m->sender = cgc_pick_address(); m->recipient = cgc_pick_address(); m->subject = cgc_gen_random_string(5, 32); m->data = cgc_gen_random_string(5,256); m->data_length = cgc_strlen(m->data); if (mq->root != NULL) { message *next = mq->root; while (next->next != NULL) { next = next->next; } next->next = m; } return m; } void cgc_initialize_mail_queues() { // Setup mail queues root_queue = cgc_add_random_queue(); for(int i = 0; i< 32; i++) { cgc_add_random_queue(); } } void cgc_print_address_book() { cgc_printf("Address Book:\n"); address *a = abook->root; int i = 1; while (a != NULL) { cgc_printf("$d) $s@$s [$d]\n", i, a->name, a->home_server, a->mode); i++; a = a->next; } } char *cgc_make_string(char *str) { if (str == NULL) { return NULL; } char *s = cgc_calloc(cgc_strlen(str) + 1); cgc_strcpy(s, str); return s; } // Creates a new message and adds to appropriate mail queue. void cgc_sendmail_post(char *line) { char output[MAX_NAME]; char *sender, *recipient, *subject, *body; char *end; sender = cgc_strstr(line, "sender:"); if (sender == NULL) { return; } sender += 7; recipient = cgc_strstr(line, "recipient:"); if (recipient == NULL) { return; } recipient += 10; body = cgc_strstr(line, "body:"); if (body == NULL) { return; } body += 5; subject = cgc_strstr(line, "subject:"); if (subject == NULL) { return; } subject += 8; end = cgc_strstr(sender, "!"); if (end != NULL) { *end = '\0'; } end = cgc_strstr(recipient, "!"); if (end != NULL) { *end = '\0'; } end = cgc_strstr(body, "!"); if (end != NULL) { *end = '\0'; } end = cgc_strstr(subject, "!"); if (end != NULL) { *end = '\0'; } message *msg = cgc_calloc(sizeof(message)); msg->sender = cgc_lookup_name(sender); if (recipient[0] == '+') { cgc_crackaddr(recipient, output); cgc_printf("addr:$s\n", output); return; } msg->recipient = cgc_lookup_name(recipient); if (msg->recipient == NULL) { return; } if (msg->sender == NULL) { return; } msg->data = cgc_make_string(body); msg->data_length = cgc_strlen(body); msg->subject = cgc_make_string(subject); mail_queue *mq = cgc_locate_queue(msg->sender->name); if (mq == NULL) { mq = cgc_calloc(sizeof(mail_queue)); mq->name = cgc_make_string(msg->sender->name); mq->root = msg; mail_queue *next = root_queue; while (next->next != NULL) { next = next->next; } next->next = mq; } else { message *next = mq->root; if (next == NULL) { mq->root = msg; } else { while (next->next != NULL) { next = next->next; } next->next = msg; } } cgc_printf("Message Received\n"); } mail_queue *cgc_locate_queue(char *name) { mail_queue *mq = root_queue; while (mq != NULL) { if (cgc_strcmp(mq->name, name)==0) { return mq; } mq = mq->next; } return 0; } void cgc_list_queue(char *line) { char *name = line; // Find a queue for that username mail_queue *mq = cgc_locate_queue(name); if (mq == 0) { cgc_printf("Mail Queue Not Found\n"); return; } int i = 0; message *msg = mq->root; while (msg != NULL) { if (msg ->sender != NULL && msg->recipient!= NULL) { cgc_printf("$d) From:$s To: $s SUBJECT:$s\n", i, msg->sender->name, msg->recipient->name, msg->subject); } else { cgc_printf("$d) Malformed Message\n"); } i++; msg = msg->next; } } void cgc_list_all_queues() { mail_queue *mq = root_queue; while (mq != NULL) { int i = 0; cgc_printf("QUEUE: $s\n", mq->name); message *msg = mq->root; while (msg != NULL) { if (msg ->sender != NULL && msg->recipient!= NULL) { cgc_printf("$d) From:$s To: $s SUBJECT:$s\n", i, msg->sender->name, msg->recipient->name, msg->subject); } else { cgc_printf("$d) Malformed Message\n"); } i++; msg = msg->next; } cgc_printf("\n"); mq = mq->next; } } void cgc_read_message(char *line) { char *name = line; while ((*line != ' ') && (*line != '\0')) line++; *line++ = '\0'; int msg_num = cgc_atoi(line); mail_queue *mq = cgc_locate_queue(name); if (mq == NULL) { return; } int i=0; message *next = mq->root; while (next != NULL && i != msg_num) { i++; next = next->next; } if (msg_num == i) { cgc_printf("$d) Subject:$s Body:$s\n", i, next->subject, next->data); } } int main(int cgc_argc, char *cgc_argv[]) { int quit = 0; // Initialize mail server //cgc_seed_prng(*(uint32_t *)FLAG_PAGE); cgc_seed_prng(0x11223344); int num_mailq = 0; abook = cgc_calloc(sizeof(address_book)); cgc_initialize_address_book(); cgc_initialize_mail_queues(); // run a mail server char line[MAX_LINE+1]; int line_size = 0; cgc_printf("sendmail:"); do { // Read a line if ((line_size = cgc_receive_until(line, MAX_LINE, '\n')) <= 0) { break; } line[line_size] = '\0'; // Process the command char *next = line; // Terminate first command word while ((*next != ' ') && (*next != '\0')) next++; *next++ = '\0'; if (cgc_strcmp(line, "LIST")==0) { // List one mail queue cgc_list_queue(next); } else if (cgc_strcmp(line, "LISTALL")==0) { // List all mail queues cgc_list_all_queues(); } else if (cgc_strcmp(line, "POST")==0) { // Post a message cgc_sendmail_post(next); } else if (cgc_strcmp(line, "READ")==0) { // Read a message cgc_read_message(next); } else if (cgc_strcmp(line, "ADDRESSBOOK")==0) { // Print address book cgc_print_address_book(); } else if (cgc_strcmp(line, "QUIT")==0) { // Quit mail server break; } else { // Invalid command cgc_printf("Invalid Command!\n"); quit = 1; } } while (quit == 0); cgc_printf("Goodbye.\n"); } ================================================ FILE: corpus/cgc/SOLFEDGE/README.md ================================================ # CGC Challenge Binary Template ## Author Information "Nick Davis" ### DARPA performer group Narf Industries (NRFIN) ## Description SOLFEDGE is state of the art software used by ARRRRGH, the Academy of Really, Really, Really, Really, Great Harmony, to translate their music between notes and syllables (Solmization). ARRRRGH teaches their classes in French, so SOFEDGE is designed to use the French 'fixed do' method of solfedge and only recognizes the C major scale. ### Feature List This service is a translation engine, so it has 2 primary functions: * It accepts large strings of notes and converts them to syllables. * It accepts large strings of syllables and converts them to notes. Valid notes are: * C * D * E * F * G * A * B And valid syllables are: * Ut * Re * Mi * Fa * Sol * La * Si A single harmony can contain up to 2048 syllables or notes. ## Vulnerability The buffer that stores syllables is 4096 bytes long. Assuming syllables are 2 bytes, that allows 2048 syllables. The syllable 'sol' is 3 bytes long, while the rest are 2 chars long. So, if the user provides a harmony that contains 2048 notes and one or more of those notes corresponds to the syllable 'sol', then writing those notes as syllables will go beyond the syllables buffer (page) and cause a segfault. More specifically, in operation.c:process\_notes, the while loop will cause write\_syllable\_to\_buf to write into the syllables\_buf until one of 3 conditions happens. 1) an error is returned, 2) all of the input bytes have been processed, and 3) the total chars written to the syllable buf has reached the MAX\_SYLLABLES\_BYTES. The comparison (total\_bytes\_written < MAX\_SYLLABLES\_BYTES) is a form of an off-by-one error, because each iteration of the loop can write 2 or 3 bytes. So, if total\_bytes\_written is 1 or 2 less than MAX\_SYLLABLES\_BYTES, the loop will run one more iteration. If it is 1 less, that iteration will cause a segfault with both a 2-btye and 3-byte syllable. If it is 2 less, that iteration will cause segfault with a 3-byte syllable. The segfault will occur in operations.c:write\_syllable\_to\_buf at either line 224 or 227. There are multiple ways to patch this vuln. The provided PATCHED version uses a more optimal patch that covers both failure conditions. It is also possible to put a much more restrictive bound on total\_bytes\_written (i.e. 100 or 2000), that will prevent the overflow, but will also greatly shorten the possible harmony length. A more extensive patch, that is unlikely for automated patching is to pass the total\_bytes\_written value into write\_syllable\_to\_buf, and perform a check that can calculate how many more bytes will fit into the buffer before it writes any bytes. It could then return an error value if there are not enough bytes remaining to complete the write. ### Generic class of vulnerability Improper Input Validation Improper Validation of Array Index Off-by-one Error ### CWE classification Improper Input Validation CWE-20 Improper Validation of Array Index CWE-129 Off-by-one Error CWE-193 ## Challenges * This CB is intentionally on the easier side. A SAT solver should be able to reason about the difference between the syllables' buffer size and the amount of data that can be written to it. * The way in which a team patches the vuln should indicate if they can reason that both failure points are due to a common cause. If they simply try to patch both failure points separately, they may not have been able to correlate the 2 failure points. * ================================================ FILE: corpus/cgc/SOLFEDGE/lib/cgc_libc.h ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #ifndef NARF_LIBC_H #define NARF_LIBC_H typedef signed char int8_t; typedef signed int int32_t; typedef unsigned int uint32_t; typedef unsigned char uint8_t; #define SUCCESS 0 #define ERRNO_ALLOC 505 // libc libs borrowed from EAGLE_00004 int cgc_send(const char *buf, const cgc_size_t size); int cgc_transmit_all(int fd, const char *buf, const cgc_size_t size); unsigned int cgc_recv_all(char *res_buf, cgc_size_t res_buf_size); unsigned int cgc_read_all(int fd, char *buf, unsigned int size); void *cgc_memset(void *dst, int c, unsigned int n); #endif ================================================ FILE: corpus/cgc/SOLFEDGE/lib/libc.c ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_libc.h" // libc libs borrowed from EAGLE_00004 // I/O functions int cgc_send(const char *buf, const cgc_size_t size) { if(cgc_transmit_all(STDOUT, buf, size)) { cgc__terminate(111); } return 0; } int cgc_transmit_all(int fd, const char *buf, const cgc_size_t size) { cgc_size_t sent = 0; cgc_size_t sent_now = 0; int ret; if (!buf) return 1; if (!size) return 2; while (sent < size) { ret = cgc_transmit(fd, buf + sent, size - sent, &sent_now); if (ret != 0) { return 3; } sent += sent_now; } return 0; } // returns number of bytes received unsigned int cgc_recv_all(char *res_buf, cgc_size_t res_buf_size) { return cgc_read_all(STDIN, res_buf, res_buf_size); } unsigned int cgc_read_all(int fd, char *buf, unsigned int size) { char ch; unsigned int total = 0; cgc_size_t nbytes; while (size) { if (cgc_receive(fd, &ch, 1, &nbytes) != 0 || nbytes == 0) { break; } buf[total++] = ch; size--; } return total; } // stdlib functions // overwrites the first n chars of dst with char c. void *cgc_memset(void *dst, int c, unsigned int n) { char *d = (char*)dst; while (n--) {*d++ = (char)c;} return dst; } ================================================ FILE: corpus/cgc/SOLFEDGE/notes.txt ================================================ In the compiled SOLFEDGE.exe executable, the main function is at offset 0x23E0 (0x4023E0 address). ================================================ FILE: corpus/cgc/SOLFEDGE/src/cgc_operation.h ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef OPERATION_H #define OPERATION_H 1 #define ALLOC(sz,p) if (SUCCESS != cgc_allocate(sz, 0, (void **)p)) {cgc__terminate(ERRNO_ALLOC);} #define RECV(v,s) if(s != cgc_recv_all((char *)v, s)) {cgc__terminate(ERR_RECV_FAILED);} #define SENDSI(v) cgc_send((char *)&v, sizeof(int32_t)); // 4096 syllables bytes, assuming 2 bytes per syllable is 2048 syllables // this assumption is wrong because one syllable is 3 bytes. #define MAX_SYLLABLES_BYTES 4096 // 2048 notes bytes, assyming 1 byte per note is 2048 notes // this assumption is correct. However, when the notes // get converted to syllables, if any note relates to the one // 3 byte syllable, then the total syllable bytes will exceed 4096 bytes. #define MAX_NOTES_BYTES 2048 #define BUFFER_LEN 4096 // notes enum { C = 1, D = 2, E = 3, F = 4, G = 5, A = 6, B = 7, }; // syllables enum { Ut = 1, Re = 2, Mi = 3, Fa = 4, Sol = 5, La = 6, Si = 7, }; enum { ERR_RECV_FAILED = -900, ERR_INVALID_CMD = -901, ERR_INVALID_NOTE = -902, ERR_INVALID_SYLLABLE = -903, ERR_TOO_MANY_NOTES = -904, ERR_TOO_MANY_SYLLABLES = -905, ERR_NO_NOTES = -906, ERR_NO_SYLLABLES = -907, }; extern int cgc_to_syllables(char *syllables_buf, char *notes_buf); extern int cgc_to_notes(char *syllables_buf, char *notes_buf); #endif ================================================ FILE: corpus/cgc/SOLFEDGE/src/cgc_service.h ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SERVICE_H #define SERVICE_H 1 #endif ================================================ FILE: corpus/cgc/SOLFEDGE/src/operation.c ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_libc.h" #include "cgc_operation.h" /* * Convert the note to the associated syllable and save the * syllable into syllable_buf. * * Returns: * Success: 2 or 3 (number of letters written to syllable_buf) * Failure: ERR_INVALID_NOTE */ int cgc_get_syllable_for_note_id(int note_id, char *syllable_buf) { switch(note_id) { case C: syllable_buf[0] = 'U'; syllable_buf[1] = 't'; return 2; case D: syllable_buf[0] = 'R'; syllable_buf[1] = 'e'; return 2; case E: syllable_buf[0] = 'M'; syllable_buf[1] = 'i'; return 2; case F: syllable_buf[0] = 'F'; syllable_buf[1] = 'a'; return 2; case G: syllable_buf[0] = 'S'; syllable_buf[1] = 'o'; syllable_buf[2] = 'l'; return 3; case A: syllable_buf[0] = 'L'; syllable_buf[1] = 'a'; return 2; case B: syllable_buf[0] = 'S'; syllable_buf[1] = 'i'; return 2; default: return ERR_INVALID_NOTE; } } /* * Convert the syllable to the associated note and save the * note into note_buf. * * If syllable is invalid, note_buf is undefined. * * Returns: * Success: SUCCESS * Failure: ERR_INVALID_SYLLABLE */ int cgc_get_note_for_syllable_id(int syllable_id, char *note_buf) { switch(syllable_id) { case Ut: note_buf[0] = 'C'; return SUCCESS; case Re: note_buf[0] = 'D'; return SUCCESS; case Mi: note_buf[0] = 'E'; return SUCCESS; case Fa: note_buf[0] = 'F'; return SUCCESS; case Sol: note_buf[0] = 'G'; return SUCCESS; case La: note_buf[0] = 'A'; return SUCCESS; case Si: note_buf[0] = 'B'; return SUCCESS; default: return ERR_INVALID_SYLLABLE; } } /* * Read the string and return the id of the first note. * * str is not expected to be null terminated. * * Returns: * Success: 1 thru 7 from notes enum * Failure: ERR_INVALID_NOTE */ int cgc_get_next_note_id(const char *str) { switch(str[0]) { case 'C': return C; case 'D': return D; case 'E': return E; case 'F': return F; case 'G': return G; case 'A': return A; case 'B': return B; default: return ERR_INVALID_NOTE; } } /* * Read the string and return the id of the * syllable at the beginning of the string. * * bytes_read is a 1 byte char buffer. * str is not expected to be null terminated. * * If an invaild syllable is encountered, bytes_read is undefined. * And the syllables in the remainder of the string are undefined. * * Returns: * Success: 1 thru 7 in syllables enum * Failure: ERR_INVALID_SYLLABLE */ int cgc_get_next_syllable_id(const char *str, char *bytes_read) { char s0 = str[0]; char s1 = str[1]; char s2 = str[2]; if ('U' == s0 && 't' == s1) { bytes_read[0] = 2; return Ut; } else if ('R' == s0 && 'e' == s1) { bytes_read[0] = 2; return Re; } else if ('M' == s0 && 'i' == s1) { bytes_read[0] = 2; return Mi; } else if ('F' == s0 && 'a' == s1) { bytes_read[0] = 2; return Fa; } else if ('S' == s0 && 'o' == s1 && 'l' == s2) { bytes_read[0] = 3; return Sol; } else if ('L' == s0 && 'a' == s1) { bytes_read[0] = 2; return La; } else if ('S' == s0 && 'i' == s1) { bytes_read[0] = 2; return Si; } else { return ERR_INVALID_SYLLABLE; } } /* * Write the note matching syllable_id into notes_buf. * * Returns: * Success: 1 (number of bytes written) * Failure: ERR_INVALID_SYLLABLE */ int cgc_write_note_to_buf(int syllable_id, char *notes_buf) { int ret = 1; char note = 0; ret = cgc_get_note_for_syllable_id(syllable_id, ¬e); if (SUCCESS == ret) { notes_buf[0] = note; ret = 1; } return ret; } /* * Write the syllable matching note_id into syllable_buf. * * Returns: * Success: 2 or 3 (number of bytes written) * Failure: ERR_INVALID_NOTE */ int cgc_write_syllable_to_buf(int note_id, char *syllable_buf) { int ret = 2; char syllable[3] = {0}; ret = cgc_get_syllable_for_note_id(note_id, syllable); if (0 < ret) { syllable_buf[0] = syllable[0]; syllable_buf[1] = syllable[1]; } if (3 == ret) { syllable_buf[2] = syllable[2]; } return ret; } /* * Loop through syllables in syllables_buf, convert them to notes and * cgc_write them to notes_buf. * * Processing will stop when either an invalid syllable is found, * or bytes_count syllables have been processed. * * Returns: * Success: total bytes written to notes_buf (> 0) * Failure: ERR_INVALID_SYLLABLE */ int cgc_process_syllables(uint32_t bytes_count, char *syllables_buf, char *notes_buf) { int ret = 1; char *s_buf_ptr = syllables_buf; char *n_buf_ptr = notes_buf; int syllable_id = 0; char bytes_read[1] = {0}; int total_bytes_written = 0; while ((0 < ret) && (0 < bytes_count)) { syllable_id = cgc_get_next_syllable_id(s_buf_ptr, bytes_read); if (0 < syllable_id) { s_buf_ptr += bytes_read[0]; bytes_count -= bytes_read[0]; ret = cgc_write_note_to_buf(syllable_id, n_buf_ptr); if (1 == ret) { n_buf_ptr += ret; total_bytes_written += ret; } } else { ret = syllable_id; } } // ret == 0 not possible. if (0 < ret) { ret = total_bytes_written; } return ret; } /* * Loop through notes in notes_buf, convert them to syllables and * cgc_write them to syllables_buf. * * Processing will stop when either an invalid note is found, * or bytes_count notes have been processed. * * Returns: * Success: total bytes written to syllables_buf (> 0) * Failure: ERR_INVALID_NOTE, ERR_TOO_MANY_NOTES */ int cgc_process_notes(uint32_t bytes_count, char *syllables_buf, char *notes_buf) { int ret = 1; char *s_buf_ptr = syllables_buf; char *n_buf_ptr = notes_buf; int note_id = 0; int total_bytes_written = 0; #if PATCHED while ((0 < ret) && ((MAX_SYLLABLES_BYTES - 2) > total_bytes_written) && (0 < bytes_count)) { #else while ((0 < ret) && (MAX_SYLLABLES_BYTES > total_bytes_written) && (0 < bytes_count)) { #endif note_id = cgc_get_next_note_id(n_buf_ptr); if (0 < note_id) { n_buf_ptr++; bytes_count--; ret = cgc_write_syllable_to_buf(note_id, s_buf_ptr); if ((2 == ret) || (3 == ret)) { s_buf_ptr += ret; total_bytes_written += ret; } } else { ret = note_id; } } // ret == 0 not possible. if (0 < ret) { ret = total_bytes_written; } return ret; } /* * Send resultant syllables to client. */ void cgc_send_syllables(uint32_t bytes_count, char *syllable_buf) { cgc_send(syllable_buf, bytes_count * sizeof(char)); } /* * Send resultant notes to client. */ void cgc_send_notes(uint32_t bytes_count, char *notes_buf) { cgc_send(notes_buf, bytes_count * sizeof(char)); } /* * Receive the token count. * * Returns: * Token count >= 0 */ uint32_t cgc_recv_bytes_count() { uint32_t count[1] = {0}; RECV(count, sizeof(uint32_t)); return count[0]; } /* * Control operation to convert received notes into syllables * and send resulting syllables back to client. * * Returns: * Success: SUCCESS * Failure: ERR_INVALID_NOTE, ERR_TOO_MANY_NOTES, * ERR_NO_NOTES, ERR_NO_SYLLABLES */ int cgc_to_syllables(char *syllables_buf, char *notes_buf) { int ret = 0; int total_bytes_written = 0; uint32_t bytes_count = cgc_recv_bytes_count(); if (0 >= bytes_count) { return ERR_NO_NOTES; } if (MAX_NOTES_BYTES < bytes_count) { return ERR_TOO_MANY_NOTES; } RECV(notes_buf, bytes_count); total_bytes_written = cgc_process_notes(bytes_count, syllables_buf, notes_buf); if (0 < total_bytes_written) { cgc_send_syllables(total_bytes_written, syllables_buf); ret = SUCCESS; } else if (0 == total_bytes_written) { ret = ERR_NO_SYLLABLES; } else { ret = total_bytes_written; } return ret; } /* * Control operation to convert received syllables into notes * and send resulting notes back to client. * * Returns: * Success: SUCCESS * Failure: ERR_INVALID_SYLLABLE, ERR_TOO_MANY_SYLLABLES, * ERR_NO_SYLLABLES, ERR_NO_NOTES */ int cgc_to_notes(char *syllables_buf, char *notes_buf) { int ret = 0; int total_bytes_written = 0; uint32_t bytes_count = cgc_recv_bytes_count(); if (0 >= bytes_count) { return ERR_NO_SYLLABLES; } if (MAX_SYLLABLES_BYTES < bytes_count) { return ERR_TOO_MANY_SYLLABLES; } RECV(syllables_buf, bytes_count); total_bytes_written = cgc_process_syllables(bytes_count, syllables_buf, notes_buf); if (0 < total_bytes_written) { cgc_send_notes(total_bytes_written, notes_buf); ret = SUCCESS; } else if (0 == total_bytes_written) { ret = ERR_NO_NOTES; } else { ret = total_bytes_written; } return ret; } ================================================ FILE: corpus/cgc/SOLFEDGE/src/service.c ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_libc.h" #include "cgc_operation.h" #include "cgc_service.h" enum { CMD_TO_SYLLABLES = 804619, CMD_TO_NOTES = 1128809, }; int main(int cgc_argc, char *cgc_argv[]) { uint32_t command[1] = {0}; int ret = 0; char *syllables_buf_p; ALLOC(BUFFER_LEN, &syllables_buf_p); char *notes_buf_p; ALLOC(BUFFER_LEN, ¬es_buf_p); while (1) { RECV(command, sizeof(uint32_t)); switch (command[0]) { case CMD_TO_SYLLABLES: ret = cgc_to_syllables(syllables_buf_p, notes_buf_p); break; case CMD_TO_NOTES: ret = cgc_to_notes(syllables_buf_p, notes_buf_p); break; default: ret = ERR_INVALID_CMD; } if (ret < 0) { SENDSI(ret); return ret; } else { cgc_memset((void *)syllables_buf_p, 0, BUFFER_LEN); cgc_memset((void *)notes_buf_p, 0, BUFFER_LEN); } } return ret; } ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/README.md ================================================ # CGC Challenge Binary 00024 - String Storage and Retrieval ## Author Information Steve Wood ### DARPA performer group Cromulence ## Description The program is an application for the storage and quick retrieval of string data. Internally, a token is created for each string and the token is used as the key for a Binary Search Tree where the data is stored. This allows for very quick retrieval of strings, provided that data is not stored in such a sequence that the tree degenerates into a linked list. Most BST's do not allow for duplicate keys, but this implimentation does, storing the data common to one key in a linked list that must be sequentially searched. However, key collisions should be rare and therefore sequential searches very short. In addition, identical string data is not allowed, further limiting the possibility of key overlap. ### Feature List The program allows for strings to be a)dded, d)eleted, f)ound, the entire tree to be w)alked, and tree s)tatistics to be printed. Strings are added by the command a/ for example. Strings to be added must be at least 10 characters in width. ## Vulnerability The vulnerability occurs in the processing of how data for duplicate keys is handled. If more than one data element resolves to the same key, the data items are stored in a linked list attached to that that BST node. During find or delete operations, they key is used to rapidly find the node and the attached linked list is sequentially walked and the string data compared to that being searched for. If a match is found, the operation completes. The vulnerability lies in an improper update of a pointer during a delete operation. If there is a linked list, and the last node of the list is being deleted, the current pointer is updated to the "next" pointer before the memory is released with free(). At the end of the list, this causes a null pointer to be passed to free(), resulting in a SEGFAULT. ### Generic class of vulnerability NULL Pointer Dereference. ### CWE classification CWE-476 ## Challenges This vulnerability requires two steps to demonstrate it crashing the program. First two values with the same resulting token must be found, and then the second and only the second one being deleted will result in a SEGFAULT. It is certainly an "edge case" condition for the program that would not readily show up with purely random fuzzing. ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/inputs/crash.txt ================================================ a/aaaaaaaa a/gaaaaaaaa d/gaaaaaaaa ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/inputs/input.txt ================================================ a/one a/two a/three f/two d/two w s x ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/lib/cgc_mymath.h ================================================ /* Author: Jason Williams Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __MYMATH_H__ #define __MYMATH_H__ double cgc_floor( double ); double cgc_round( double, double n ); #endif // __MYMATH_H__ ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/lib/cgc_stdarg.h ================================================ /* Author: Jason Williams Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __STDARG_H__ #define __STDARG_H__ #ifdef WIN #include #else typedef __builtin_va_list __gnuc_va_list; #define va_start(v,l) __builtin_va_start(v,l) #define va_end(v) __builtin_va_end(v) #define va_arg(v,l) __builtin_va_arg(v,l) typedef __gnuc_va_list va_list; #endif #endif // __STDARG_H__ ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/lib/cgc_stdint.h ================================================ /* Author: Jason Williams Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __STDINT_H__ #define __STDINT_H__ typedef signed char int8_t; typedef short int int16_t; typedef int int32_t; typedef long long int int64_t; typedef unsigned char uint8_t; typedef unsigned short int uint16_t; typedef unsigned int uint32_t; typedef unsigned long long int uint64_t; #endif // __STDINT_H__ ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/lib/cgc_stdlib.h ================================================ /* Author: Jason Williams Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __STDLIB_H__ #define __STDLIB_H__ #define INUSE_FLAG 1 #define FREE_FLAG 2 #include "libcgc.h" //typedef unsigned long cgc_size_t; typedef struct _heap_block_header { cgc_size_t remaining_size; struct _heap_block_header *next; char data[1]; } heap_block_header; typedef struct _heap_header { cgc_size_t size; char flags; } heap_header; typedef struct _heap_metadata { cgc_size_t mem_commit; cgc_size_t mem_free; cgc_size_t mem_inuse; heap_block_header *blocks; } heap_metadata; void *cgc_calloc(cgc_size_t count, cgc_size_t size); void cgc_free(void *ptr); void *cgc_malloc(cgc_size_t size); int cgc_isspace( int c ); int cgc_isdigit( int c ); int cgc_isnan( double val ); int cgc_isinf( double val ); double cgc_atof(const char *str); int cgc_atoi(const char *str); int cgc_islower( int c ); int cgc_isupper( int c ); int cgc_isalpha( int c ); int cgc_isalnum( int c ); int cgc_memcpy( void *dest, void *src, cgc_size_t n); char *cgc_strcpy( char *dest, char *src ); char *cgc_strncpy( char *, const char *, cgc_size_t ); int cgc_putc( int ); int cgc_printf( const char *fmt, ... ); int cgc_sprintf( char *str, const char *fmt, ... ); void cgc_bzero( void *, cgc_size_t ); void *cgc_memset(void *, int, cgc_size_t); int cgc_strcmp( const char *, const char * ); char *cgc_strncat( char *dest, const char *src, cgc_size_t n ); cgc_size_t cgc_getline( char *buffer, cgc_size_t len); cgc_size_t cgc_receive_until( char *, char, cgc_size_t ); int cgc_receive_bytes (unsigned char *buffer, cgc_size_t size) ; cgc_size_t cgc_strcat( char *, char* ); cgc_size_t cgc_strlen( char * ); cgc_size_t cgc_itoa( char *, cgc_size_t, cgc_size_t ); void cgc_puts( char *t ); char *cgc_strchr(const char *, int); char *cgc_strtok(char *, const char *); cgc_size_t cgc_write( const void *, cgc_size_t ); char *cgc_strdup( char * ); #endif // __STDLIB_H__ ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/lib/malloc.c ================================================ /* Author: Jason Williams Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_stdlib.h" #include "cgc_malloc.h" #define ALLOC_PAGE_SIZE (4096) #define FREE_BLOCK_NEXT( block ) (((tMallocAllocFtr *)((void *)block + (((tMallocAllocHdr *)block)->alloc_size & ~0x3)-sizeof(tMallocAllocHdr)))->pNext) #define FREE_BLOCK_PREV( block ) (((tMallocAllocFtr *)((void *)block + (((tMallocAllocHdr *)block)->alloc_size & ~0x3)-sizeof(tMallocAllocHdr)))->pPrev) #define SET_BIT( val, bit ) (val |= (bit)) #define CLEAR_BIT( val, bit ) (val &= ~(bit)) #define IS_BIT_SET( val, bit ) (val & (bit)) tMallocManager g_memManager; void *cgc_calloc( cgc_size_t count, cgc_size_t obj_size ) { cgc_size_t allocation_size = (count * obj_size); void *pMemBuffer; pMemBuffer = cgc_malloc( allocation_size ); cgc_memset( pMemBuffer, 0, allocation_size ); return (pMemBuffer); } void *cgc_add_free_list( cgc_size_t request_size ) { // Include header cgc_size_t grow_size = (request_size + 4); // Increases the size of the free list if ( grow_size % ALLOC_PAGE_SIZE != 0 ) { grow_size = (grow_size / ALLOC_PAGE_SIZE) + 1; grow_size *= ALLOC_PAGE_SIZE; } void *pAllocLocation; if ( cgc_allocate( grow_size, 0, &pAllocLocation ) != 0 ) { // Out of memory cgc__terminate( -2 ); } // Provision allocation tMallocAllocHdr *pNewAllocHdr = (tMallocAllocHdr *)pAllocLocation; tMallocAllocFtr *pNewAllocFtr = (tMallocAllocFtr *)(pAllocLocation + (grow_size-sizeof(tMallocAllocFtr))); pNewAllocHdr->alloc_size = (grow_size-sizeof(tMallocAllocHdr)); // Link at front pNewAllocFtr->pPrev = NULL; pNewAllocFtr->pNext = g_memManager.pFreeList; g_memManager.pFreeList = pNewAllocHdr; return (void*)pNewAllocHdr; } void *cgc_malloc( cgc_size_t alloc_size ) { // Allocate if ( alloc_size < 8 ) alloc_size = 8; else if ( alloc_size % 4 != 0 ) { alloc_size = (alloc_size >> 2) + 1; alloc_size = (alloc_size << 2); } // Scan free list for available objects void *pFreeCur; pFreeCur = g_memManager.pFreeList; for (;;) { if ( pFreeCur == NULL ) { // End of list -- no suitable allocations available pFreeCur = cgc_add_free_list( alloc_size ); } tMallocAllocHdr *pFreeCurHeader = ((tMallocAllocHdr *)pFreeCur); tMallocAllocFtr *pFreeCurFooter = ((tMallocAllocFtr *)(pFreeCur + (pFreeCurHeader->alloc_size & ~0x3)-sizeof(tMallocAllocHdr))); // Check for a suitable allocation if ( pFreeCurHeader->alloc_size >= alloc_size ) { // Claim this allocation void *pClaimAllocation = (pFreeCur + sizeof(tMallocAllocHdr)); // Split chunk cgc_size_t size_remaining = pFreeCurHeader->alloc_size - alloc_size; // Allocate this chunk and set size... pFreeCurHeader->alloc_size = alloc_size; // Set it to being inuse SET_BIT( pFreeCurHeader->alloc_size, MALLOC_INUSE_FLAG_BIT ); if ( size_remaining >= (sizeof(tMallocAllocHdr) + sizeof(tMallocAllocFtr)) ) { // Build a new free block void *pNewChunk = (pFreeCur + (alloc_size + sizeof(tMallocAllocHdr))); tMallocAllocHdr *pNewChunkHeader = ((tMallocAllocHdr *)pNewChunk); pNewChunkHeader->alloc_size = (size_remaining - sizeof(tMallocAllocHdr)); tMallocAllocFtr *pNewChunkFooter = pFreeCurFooter; if ( ((void *)pNewChunkHeader + (pNewChunkHeader->alloc_size & ~0x3)-sizeof(tMallocAllocHdr)) != pFreeCurFooter ) { cgc_printf( "Footer != in malloc" ); cgc__terminate( -3 ); } // Fix top link (if we need to) if ( g_memManager.pFreeList == pFreeCur ) { g_memManager.pFreeList = (void *)pNewChunkHeader; if ( pNewChunkFooter->pNext ) FREE_BLOCK_PREV( pNewChunkFooter->pNext ) = pNewChunkHeader; } else { // Fix up links if ( pNewChunkFooter->pPrev ) FREE_BLOCK_NEXT( pNewChunkFooter->pPrev ) = pNewChunk; if ( pNewChunkFooter->pNext ) FREE_BLOCK_PREV( pNewChunkFooter->pNext ) = pNewChunk; } // Mark the object as having a next object (aids in coalescing) SET_BIT( pFreeCurHeader->alloc_size, MALLOC_NEXT_FLAG_BIT ); } else { // Fix link (if we need to) if ( g_memManager.pFreeList == pFreeCur ) { g_memManager.pFreeList = (void *)pFreeCurFooter->pNext; if ( pFreeCurFooter->pNext ) FREE_BLOCK_PREV( pFreeCurFooter->pNext ) = NULL; } else { // Link around if ( pFreeCurFooter->pPrev ) FREE_BLOCK_NEXT( pFreeCurFooter->pPrev ) = pFreeCurFooter->pNext; if ( pFreeCurFooter->pNext ) FREE_BLOCK_PREV( pFreeCurFooter->pNext ) = pFreeCurFooter->pPrev; } } // Clear the allocation cgc_memset( (void *)(pFreeCur + sizeof(tMallocAllocHdr)), 0, alloc_size ); // Return the allocated memory return (pFreeCur+sizeof(tMallocAllocHdr)); } // Goto NEXT pFreeCur = pFreeCurFooter->pNext; } } void cgc_free( void *pItem ) { // Free an object and coalesce to neighboring block if available // Check neighbor for coalescing tMallocAllocHdr *pItemHdr = (tMallocAllocHdr *)(pItem - sizeof(tMallocAllocHdr)); // Verify inuse bit is set if ( !IS_BIT_SET(pItemHdr->alloc_size, MALLOC_INUSE_FLAG_BIT) ) { // Error object cannot be freed (inuse bit is not set) cgc__terminate( -2 ); } // Do we have a neighbor??? IF so perform coalescing if ( IS_BIT_SET( pItemHdr->alloc_size, MALLOC_NEXT_FLAG_BIT) ) { tMallocAllocHdr *pNeighbor = (pItem + (pItemHdr->alloc_size & ~0x3)); // Is neighbor inuse? If not -- go ahead and coalesce if ( !IS_BIT_SET(pNeighbor->alloc_size, MALLOC_INUSE_FLAG_BIT) ) { // Coalesce! cgc_size_t coalesceSize = (pItemHdr->alloc_size & ~0x3) + (pNeighbor->alloc_size & ~0x3) + sizeof(tMallocAllocHdr); // Set size // The inuse bit is cleared here pItemHdr->alloc_size = coalesceSize; // Set the next flag if the neighbor block has a next flag set if ( IS_BIT_SET( pNeighbor->alloc_size, MALLOC_NEXT_FLAG_BIT ) ) SET_BIT( pItemHdr->alloc_size, MALLOC_NEXT_FLAG_BIT ); // Relink if ( g_memManager.pFreeList == pNeighbor ) { g_memManager.pFreeList = pItemHdr; tMallocAllocFtr *pItemFtr = ((tMallocAllocFtr *)((void *)pItemHdr + coalesceSize-sizeof(tMallocAllocHdr))); if ( pItemFtr->pNext ) FREE_BLOCK_PREV( pItemFtr->pNext ) = pItemHdr; } else { tMallocAllocFtr *pItemFtr = ((tMallocAllocFtr *)((void *)pItemHdr + coalesceSize-sizeof(tMallocAllocHdr))); // Fix up links if ( pItemFtr->pPrev ) FREE_BLOCK_NEXT( pItemFtr->pPrev ) = pItemHdr; if ( pItemFtr->pNext ) FREE_BLOCK_PREV( pItemFtr->pNext ) = pItemHdr; } // Done return; } } // No coalesce possible, just link it to the top of the list CLEAR_BIT( pItemHdr->alloc_size, MALLOC_INUSE_FLAG_BIT ); tMallocAllocFtr *pItemFtr = ((tMallocAllocFtr *)((void *)pItemHdr + (pItemHdr->alloc_size & ~0x3)-sizeof(tMallocAllocHdr))); pItemFtr->pNext = g_memManager.pFreeList; pItemFtr->pPrev = NULL; if ( g_memManager.pFreeList ) FREE_BLOCK_PREV( g_memManager.pFreeList ) = pItemHdr; // Move to top of list g_memManager.pFreeList = pItemHdr; // Done return; } ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/lib/mymath.c ================================================ /* Author: Jason Williams Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_mymath.h" #include "cgc_stdint.h" double cgc_floor( double val ) { if ( val > 0.0 ) return cgc_rint( val + 0.5 ) - 1.0; else if ( val < 0.0 ) return cgc_rint( val - 0.5 ) + 1.0; else return 0.0; } double cgc_round_away_from_zero( double val ) { if ( val > 0.0 ) return cgc_rint( val + 0.5 ); else if ( val < 0.0 ) return cgc_rint( val - 0.5 ); else return 0.0; } double cgc_round( double val, double n ) { // Round to n digits n = cgc_rint( n ); double high_pow10 = cgc_pow( 10, n ); double low_pow10 = cgc_pow( 10, -n ); return (cgc_round_away_from_zero( val * high_pow10 ) * low_pow10); } ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/lib/new_printf.c ================================================ /* Author: Jason Williams Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_stdarg.h" #include "cgc_stdlib.h" #include "cgc_stdint.h" #include "cgc_mymath.h" // 5 digits of precision #define F32_PRECISION 0.00001 int cgc_putc( int c ) { cgc_size_t tx_count; if ( cgc_transmit( STDOUT, &c, 1, &tx_count ) != 0 ) cgc__terminate(2); return c; } void cgc_int_to_str( int val, char *buf ) { char temp_buf[32]; char *c = temp_buf; int count = 0; if ( buf == NULL ) return; if ( val < 0 ) { *buf = '-'; buf++; val *= -1; } do { *c = (val % 10) + '0'; val /= 10; c++; count++; } while ( val != 0 ); while ( count-- > 0 ) { c--; *buf = *c; buf++; } *buf = '\0'; } void cgc_uint_to_str( unsigned int val, char *buf ) { char temp_buf[32]; char *c = temp_buf; int count = 0; if ( buf == NULL ) return; do { *c = (val % 10) + '0'; val /= 10; c++; count++; } while ( val != 0 ); while ( count-- > 0 ) { c--; *buf = *c; buf++; } *buf = '\0'; } void cgc_int_to_hex( unsigned int val, char *buf ) { char temp_buf[32]; char *c = temp_buf; int count = 0; if ( buf == NULL ) return; do { *c = (val % 16) + '0'; if (*c > '9') { *c += 7; } val /= 16; c++; count++; } while ( val != 0 ); while ( count-- > 0 ) { c--; *buf = *c; buf++; } *buf = '\0'; } void cgc_float_to_str( double val, char *buf, int precision ) { if ( buf == NULL ) return; if ( cgc_isnan( val ) ) { cgc_strcpy( buf, "nan" ); } else if ( cgc_isinf( val ) ) { cgc_strcpy( buf, "inf" ); } else if ( val == 0.0 ) { cgc_strcpy( buf, "0.00000" ); } else { int digit; int m; int m1; int fraction_digit; int in_fraction; int neg = 0; char *c = buf; if ( val > 0.0 ) val = val + (F32_PRECISION * 0.5); else val = val - (F32_PRECISION * 0.5); // Negative numbers if ( val < 0.0 ) { neg = 1; *(c++) = '-'; val = -val; } // Calculate magnitude m = cgc_log10( val ); if ( m < 1.0 ) m = 0; fraction_digit = 0; in_fraction = 0; while ( val > F32_PRECISION || m >= 0 ) { double weight = cgc_pow( 10.0, m ); if ( weight > 0 && !cgc_isinf(weight) ) { digit = cgc_floor( val / weight ); val -= (digit * weight); *(c++) = '0' + digit; if ( in_fraction ) fraction_digit++; } if ( m == 0 && val > 0.0 ) { *(c++) = '.'; in_fraction = 1; fraction_digit = 0; } m--; } while ( in_fraction && fraction_digit < precision ) { *(c++) = '0'; fraction_digit++; } *c = '\0'; } } int cgc_vprintf( const char *fmt, va_list arg ) { int character_count = 0; char temp_buf[64]; int width, precision, zero_padding, left_justification; int i; int pad_len; if ( fmt == NULL ) return -1; while ( *fmt ) { if ( *fmt == '@' ) { fmt++; width=0; precision=0; zero_padding=0; left_justification=0; pad_len=0; // We handle width, precision, and justification (but not for floats yet) if (*fmt == '@') { cgc_putc('@'); fmt++; character_count++; continue; } if (*fmt == '-') { left_justification=1; ++fmt; } if (cgc_isdigit(*fmt)) { if (*fmt == '0') { zero_padding = 1; fmt++; } if (cgc_isdigit(*fmt)) { width = cgc_atoi(fmt); // skip past the width specification while (cgc_isdigit(*fmt)) fmt++; } } if (*fmt == '.') { // skip over the '.' fmt++; precision=cgc_atoi(fmt); // now skip past the integer precision value while (cgc_isdigit(*fmt)) fmt++; } // single char if (*fmt == 'c') { char c = (char )va_arg(arg, int); pad_len = width - 1; // justify right if (!left_justification) { for (i=0; i< pad_len; ++i) { if (zero_padding) cgc_putc('0'); else cgc_putc(' '); character_count++; } } // output the char itself cgc_putc(c); character_count++; // justify to the left if (left_justification) { for (i=0; i< pad_len; ++i) { // the option to pad with 0 is ignored when left justified cgc_putc(' '); character_count++; } } fmt++; continue; } // Integer if (*fmt == 'd') { int int_arg = va_arg( arg, int ); char *c; cgc_int_to_str( int_arg, temp_buf ); // is the output string shorter than the desired width? pad_len = width - cgc_strlen(temp_buf); // right justification if (!left_justification) { for (i=0; i< pad_len; ++i) { if (zero_padding) cgc_putc('0'); else cgc_putc(' '); character_count++; } } // now output the integer value c = temp_buf; while ( *c ) { cgc_putc( *c ); character_count++; c++; } // left justification if (left_justification) { for (i=0; i< pad_len; ++i) { // the option to pad with 0 is ignored when left justified cgc_putc(' '); character_count++; } } fmt++; continue; } // Integer if (*fmt == 'u') { unsigned int int_arg = va_arg( arg, int ); char *c; cgc_uint_to_str( int_arg, temp_buf ); // is the output string shorter than the desired width? pad_len = width - cgc_strlen(temp_buf); // right justification if (!left_justification) { for (i=0; i< pad_len; ++i) { if (zero_padding) cgc_putc('0'); else cgc_putc(' '); character_count++; } } // now output the integer value c = temp_buf; while ( *c ) { cgc_putc( *c ); character_count++; c++; } // left justification if (left_justification) { for (i=0; i< pad_len; ++i) { // the option to pad with 0 is ignored when left justified cgc_putc(' '); character_count++; } } fmt++; continue; } // hex if ( *fmt == 'x') { unsigned int int_arg = va_arg( arg, unsigned int ); char *c; cgc_int_to_hex( int_arg, temp_buf ); // is the output string shorter than the desired width? pad_len = width - cgc_strlen(temp_buf); // right justification if (!left_justification) { for (i=0; i< pad_len; ++i) { if (zero_padding) cgc_putc('0'); else cgc_putc(' '); character_count++; } } // now output the hex string c = temp_buf; while ( *c ) { cgc_putc( *c ); character_count++; c++; } // left justification if (left_justification) { for (i=0; i< pad_len; ++i) { // the option to pad with 0 is ignored when left justified cgc_putc(' '); character_count++; } } fmt++; continue; } // Float if (*fmt =='f' ) { // currently does not support precision specification for float type // need the float_to_str() updated to support the rounding necessary double float_arg = va_arg( arg, double ); char *c; cgc_float_to_str( float_arg, temp_buf, precision); // is the output string shorter than the desired width? pad_len = width - cgc_strlen(temp_buf); // pad the output with spaces or zeros if (!left_justification) { for (i=0; i< pad_len; ++i) { if (zero_padding) cgc_putc('0'); else cgc_putc(' '); character_count++; } } // now output the float value c = temp_buf; while ( *c ) { cgc_putc( *c ); character_count++; c++; } fmt++; continue; } if (*fmt== 's' ) { // String char *string_arg = va_arg( arg, char * ); int output_strlen; if (precision > 0 && precision < cgc_strlen(string_arg)) output_strlen = precision; else output_strlen = cgc_strlen(string_arg); pad_len = width - output_strlen; // pad the output with spaces or zeros // right justification if (!left_justification) { for (i=0; i< pad_len; ++i) { if (zero_padding) cgc_putc('0'); else cgc_putc(' '); character_count++; } } #if 1 cgc_size_t tmp_out = 0; int tmp_ret = cgc_transmit(STDOUT, string_arg, cgc_strlen(string_arg), &tmp_out); if (tmp_ret != 0) cgc__terminate(1); character_count += tmp_out; #else while ( *string_arg && output_strlen > 0 ) { putc( *string_arg ); character_count++; string_arg++; --output_strlen; } #endif // left justification if (left_justification) { for (i=0; i< pad_len; ++i) { // the option to pad with 0 is ignored when left justified cgc_putc(' '); character_count++; } } fmt++; continue; } } else { cgc_putc( *fmt ); fmt++; character_count++; } } return (character_count); } int cgc_vsprintf( char *str, const char *fmt, va_list arg ) { int character_count = 0; char temp_buf[64]; int width, precision, zero_padding, left_justification; int i; int pad_len; if ( fmt == NULL ) return -1; while ( *fmt ) { if ( *fmt == '@' ) { fmt++; width=0; precision=0; zero_padding=0; left_justification=0; pad_len=0; if (*fmt == '@') { str[character_count]='@'; fmt++; character_count++; continue; } if (*fmt == '-') { left_justification=1; ++fmt; } if (cgc_isdigit(*fmt)) { if (*fmt == '0') { zero_padding = 1; fmt++; } if (cgc_isdigit(*fmt)) { width = cgc_atoi(fmt); while (cgc_isdigit(*fmt)) fmt++; } } if (*fmt == '.') { // skip over the '.' fmt++; precision=cgc_atoi(fmt); // now skip past the integer precision value while (cgc_isdigit(*fmt)) fmt++; } // single char if (*fmt == 'c') { char c = (char )va_arg(arg, int); pad_len = width - 1; // justify right if (!left_justification) { for (i=0; i< pad_len; ++i) { if (zero_padding) str[character_count]='0'; else str[character_count]=' '; character_count++; } } str[character_count]=c; character_count++; // justify to the left if (left_justification) { for (i=0; i< pad_len; ++i) { // the option to pad with 0 is ignored when left justified str[character_count]=' '; character_count++; } } fmt++; continue; } // Integer if (*fmt == 'd') { int int_arg = va_arg( arg, int ); char *c; cgc_int_to_str( int_arg, temp_buf ); // is the output string shorter than the desired width? pad_len = width - cgc_strlen(temp_buf); // right justification if (!left_justification) { for (i=0; i< pad_len; ++i) { if (zero_padding) str[character_count]='0'; else str[character_count]=' '; character_count++; } } // now output the integer value c = temp_buf; while ( *c ) { str[character_count]=*c; character_count++; c++; } // left justification if (left_justification) { for (i=0; i< pad_len; ++i) { // the option to pad with 0 is ignored when left justified str[character_count]= ' '; character_count++; } } fmt++; continue; } // hex if ( *fmt == 'x') { unsigned int int_arg = va_arg( arg, unsigned int ); char *c; cgc_int_to_hex( int_arg, temp_buf ); // is the output string shorter than the desired width? pad_len = width - cgc_strlen(temp_buf); // right justification if (!left_justification) { for (i=0; i< pad_len; ++i) { if (zero_padding) str[character_count] = '0'; else str[character_count] = ' '; character_count++; } } // now output the hex string c = temp_buf; while ( *c ) { str[character_count]=*c; character_count++; c++; } // left justification if (left_justification) { for (i=0; i< pad_len; ++i) { // the option to pad with 0 is ignored when left justified str[character_count]=' '; character_count++; } } fmt++; continue; } // Float if (*fmt =='f' ) { double float_arg = va_arg( arg, double ); char *c; cgc_float_to_str( float_arg, temp_buf, precision); // is the output string shorter than the desired width? pad_len = width - cgc_strlen(temp_buf); // pad the output with spaces or zeros if (!left_justification) { for (i=0; i< pad_len; ++i) { if (zero_padding) str[character_count]='0'; else str[character_count]=' '; character_count++; } } // now output the float value c = temp_buf; while ( *c ) { str[character_count]= *c; character_count++; c++; } fmt++; continue; } if (*fmt== 's' ) { // String char *string_arg = va_arg( arg, char * ); int output_strlen; if (precision > 0 && precision < cgc_strlen(string_arg)) output_strlen = precision; else output_strlen = cgc_strlen(string_arg); pad_len = width - output_strlen; // pad the output with spaces or zeros // right justification if (!left_justification) { for (i=0; i< pad_len; ++i) { if (zero_padding) str[character_count]='0'; else str[character_count]=' '; character_count++; } } while ( *string_arg && output_strlen > 0 ) { str[character_count]= *string_arg; character_count++; string_arg++; --output_strlen; } // left justification if (left_justification) { for (i=0; i< pad_len; ++i) { // the option to pad with 0 is ignored when left justified str[character_count]=' '; character_count++; } } fmt++; continue; } } else { str[character_count]= *fmt; fmt++; character_count++; } } return (character_count); } int cgc_printf( const char *fmt, ... ) { va_list arg; int done; char large_buff[4096]; cgc_size_t tx_count; va_start( arg, fmt ); // done = vsprintf(large_buff, fmt, arg); // transmit( STDOUT, large_buff, done, &tx_count ); done = cgc_vprintf( fmt, arg ); va_end( arg ); return done; } int cgc_sprintf( char *str, const char *fmt, ... ) { va_list arg; int done; va_start( arg, fmt ); done = cgc_vsprintf( str, fmt, arg ); va_end( arg ); return done; } ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/lib/stdlib.c ================================================ /* Author: Jason Williams Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_stdlib.h" #include "cgc_stdint.h" int cgc_memcpy( void *dest, void *src, cgc_size_t n ) { cgc_size_t index = 0; while ( index < n ) { ((char*)dest)[index] = ((char*)src)[index]; index++; } return index; } int cgc_islower( int c ) { if ( c >= 0x61 && c <= 0x7a ) return 1; else return 0; } int cgc_isupper( int c ) { if ( c >= 0x41 && c <= 0x5a ) return 1; else return 0; } int cgc_isalpha( int c ) { return cgc_islower( c ) | cgc_isupper( c ); } int cgc_isalnum( int c ) { return cgc_isalpha( c ) | cgc_isdigit( c ); } int cgc_isspace( int c ) { if ( c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r' ) return 1; else return 0; } int cgc_isdigit( int c ) { if ( c >= '0' && c <= '9' ) return 1; else return 0; } int cgc_isnan( double val ) { return __builtin_isnan( val ); } int cgc_isinf( double val ) { return __builtin_isinf( val ); } double cgc_atof(const char* str) { if ( str == NULL ) return 0.0; double val = 0.0; double scale = 0.1; int sign = 1; int part; // Skip whitespace while ( cgc_isspace( str[0] ) ) str++; part = 0; // First part (+/-/./number is acceptable) while( str[0] != '\0' ) { if ( str[0] == '-' ) { if ( part != 0 ) return 0.0; sign = -1; part++; } else if ( str[0] == '+' ) { if ( part != 0 ) return 0.0; part++; } else if ( str[0] == '.' ) { if ( part == 0 || part == 1 ) part = 2; else return 0.0; } else if ( cgc_isdigit( *str ) ) { if ( part == 0 || part == 1 ) { // In integer part part = 1; val = (val * 10.0) + (*str - '0'); } else if ( part == 2 ) { val += ((*str - '0') * scale); scale /= 10.0; } else { // part invalid return 0.0; } } else break; str++; } return (sign * val); } int cgc_atoi(const char* str) { if ( str == NULL ) return 0; int integer_part = 0; int sign = 1; int part; int digit_count = 0; // Skip whitespace while ( cgc_isspace( str[0] ) ) str++; part = 0; // First part (+/-/number is acceptable) while( str[0] != '\0' ) { if ( str[0] == '-' ) { if ( part != 0 ) return 0; sign = -1; part++; } else if ( str[0] == '+' ) { if ( part != 0 ) return 0; part++; } else if ( cgc_isdigit( *str ) ) { if ( part == 0 || part == 1 ) { // In integer part part = 1; integer_part = (integer_part * 10) + (*str - '0'); digit_count++; if ( digit_count == 10 ) break; } else { // part invalid return 0; } } else break; str++; } return (sign * integer_part); } char *cgc_strcpy( char *dest, char *src ) { cgc_size_t i; for ( i = 0; ; i++ ) { if ( src[i] == '\0' ) break; dest[i] = src[i]; } dest[i] = '\0'; return (dest); } char *cgc_strncpy( char *dest, const char *src, cgc_size_t n ) { cgc_size_t i; for ( i = 0; i < n && src[i] != '\0'; i++) dest[i] = src[i]; for ( ; i < n; i++) dest[i] = '\0'; return (dest); } void cgc_bzero( void *buff, cgc_size_t len ) { cgc_size_t index = 0; unsigned char *c = buff; if ( buff == NULL ) { goto end; } if ( len == 0 ) { goto end; } for ( index = 0; index < len; index++ ) { c[index] = 0x00; } end: return; } void *cgc_memset(void *s, int c, cgc_size_t n) { unsigned char *t = (unsigned char *)s; while (--n) t[n] = (unsigned char)c; t[n] = (unsigned char)c; return(s); } int cgc_strcmp( const char *s1, const char *s2 ) { while ( *s1 && (*s1 == *s2) ) { s1++,s2++; } return (*(const unsigned char *)s1 - *(const unsigned char *)s2); } char *cgc_strncat ( char *dest, const char *src, cgc_size_t n ) { cgc_size_t dest_len = cgc_strlen(dest); cgc_size_t i; if (dest == NULL || src == NULL) { return(dest); } for (i = 0; i < n && src[i] != '\0'; i++) { dest[dest_len+i] = src[i]; } dest[dest_len+i] = '\0'; return(dest); } int cgc_flush_input(int fd) { cgc_fd_set read_fds; int err; int ready_fd; struct cgc_timeval tv; char buffer[1024]; cgc_size_t rcv_cnt; while (1) { cgc_bzero((char *)&read_fds, sizeof(read_fds)); FD_SET(fd, &read_fds); tv.tv_sec = 0; tv.tv_usec = 10; err = cgc_fdwait(fd + 1, &read_fds, NULL, &tv, &ready_fd); if (err != 0) { return err; } if (!FD_ISSET(fd, &read_fds)) break; else cgc_receive(fd, buffer, 1024, &rcv_cnt); } return 0; } // flush_input() cgc_size_t cgc_getline( char *buffer, cgc_size_t len) { int count; count = cgc_receive_until(buffer, '\n', len); if (count==len) buffer[len-1]=0; else { buffer[count]=0; } return (count); } cgc_size_t cgc_receive_until( char *dst, char delim, cgc_size_t max ) { cgc_size_t len = 0; cgc_size_t rx = 0; char c = 0; while( len < max ) { dst[len] = 0x00; if ( cgc_receive( STDIN, &c, 1, &rx ) != 0 ) { len = 0; goto end; } if ( c == delim ) { goto end; } dst[len] = c; len++; } end: if (len == max) cgc_flush_input(STDIN); return len; } int cgc_receive_bytes (unsigned char *buffer, cgc_size_t size) { cgc_size_t count=0; cgc_size_t remaining = 0; cgc_size_t rxbytes=0; remaining = size - count; while(remaining) { if (!cgc_receive(STDIN, buffer+count, remaining, &rxbytes) ) { remaining-=rxbytes; count+=rxbytes; } else { return(-1); } } return 0; } cgc_size_t cgc_strcat( char *dest, char* src ) { cgc_size_t length = 0; cgc_size_t start = 0; if ( dest == NULL || src == NULL) { goto end; } start = cgc_strlen( dest ); for ( ; src[length] != 0x00 ; start++, length++ ) { dest[start] = src[length]; } length = start; end: return length; } cgc_size_t cgc_strlen( char * str ) { cgc_size_t length = 0; if ( str == NULL ) { goto end; } while ( str[length] ) { length++; } end: return length; } cgc_size_t cgc_itoa( char *out, cgc_size_t val, cgc_size_t max ) { cgc_size_t length = 0; cgc_size_t end = 0; cgc_size_t temp = 0; if ( out == NULL ) { goto end; } // Calculate the needed length temp = val; do { end++; temp /= 10; } while ( temp ); // ensure we have enough room if ( end >= max ) { goto end; } length = end; // Subtract one to skip the null end--; do { out[end] = (val % 10) + 0x30; val /= 10; end--; } while ( val ); out[length] = 0x00; end: return length; } void cgc_puts( char *t ) { cgc_size_t size; cgc_transmit(STDOUT, t, cgc_strlen(t), &size); cgc_transmit(STDOUT, "\n", 1, &size); } char *cgc_strchr(const char *s, int c) { while (*s != '\0') { if (*s == c) { return((char *)s); } s++; } if (*s == c) { return((char *)s); } return(NULL); } char *cgc_token = NULL; char *cgc_prev_str = NULL; unsigned int cgc_prev_str_len = 0; char *cgc_prev_str_ptr = NULL; char *cgc_strtok(char *str, const char *delim) { char *start; char *end; char *t; int i; // invalid input if (delim == NULL) { return(NULL); } // called on existing string if (str == NULL) { if (cgc_prev_str == NULL) { return(NULL); } // already parsed through end of original str if (cgc_prev_str_ptr >= cgc_prev_str+cgc_prev_str_len) { return(NULL); } } else { // called with new string, so free the old one if (cgc_prev_str) { cgc_deallocate(cgc_prev_str, cgc_prev_str_len); cgc_prev_str = NULL; cgc_prev_str_len = 0; cgc_prev_str_ptr = NULL; } } // not been called before, so make a copy of the string if (cgc_prev_str == NULL) { if (cgc_strlen(str) > 4096) { // too big return(NULL); } cgc_prev_str_len = cgc_strlen(str); if (cgc_allocate(cgc_prev_str_len, 0, (void *)&cgc_prev_str)) { return(NULL); } cgc_strcpy(cgc_prev_str, str); cgc_prev_str_ptr = cgc_prev_str; } str = cgc_prev_str_ptr; // make sure the string isn't starting with a delimeter while (cgc_strchr(delim, str[0]) && str < cgc_prev_str+cgc_prev_str_len) { str++; } if (str >= cgc_prev_str+cgc_prev_str_len) { return(NULL); } // find the earliest next delimiter start = str; end = str+cgc_strlen(str); for (i = 0; i < cgc_strlen((char *)delim); i++) { if ((t = cgc_strchr(start, delim[i]))) { if (t != NULL && t < end) { end = t; } } } // populate the new token cgc_token = start; *end = '\0'; cgc_prev_str_ptr = end+1; return(cgc_token); } cgc_size_t cgc_write( const void *buf, cgc_size_t count ) { cgc_size_t size; cgc_transmit(STDOUT, buf, count, &size); if (count != size) return(-1); return(size); } char *cgc_strdup(char *s) { char *retval; if (!s) { return(NULL); } if (cgc_allocate(cgc_strlen(s)+1, 0, (void *)&retval)) { return(NULL); } cgc_bzero(retval, cgc_strlen(s)+1); cgc_strcpy(retval, s); return(retval); } ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/notes.txt ================================================ In the compiled String_Storage_and_Retrieval.exe executable, the main function is at offset 0x6ac0 (0x406ac0 address). ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/src/bst.c ================================================ /* Author: Steve Wood Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_bst.h" #include "cgc_malloc.h" #include "cgc_stdlib.h" #include "cgc_service.h" int cgc_insert_node(bst_node_type **head, data_item_type *data, unsigned long make_key()) { bst_node_type *tmp_node; unsigned long key_val=0; data_item_type *next_ptr; // if no data was passed in, return error if (data==0) return(-1); // make sure the tree already exists if (!*head) { *head=(bst_node_type *)cgc_calloc(1, sizeof(bst_node_type)); // memory allocation failed if (!*head) { return(-1); } // it does exist now, so just stick our data in it since no searching necessary key_val=make_key(data); if (key_val > 0) { (*head)->data=data; (*head)->data_count = 1; (*head)->key=key_val; return(0); } else { return(-1); } } // if (!head) key_val=make_key(data); tmp_node=*head; while (1) { if (key_val < tmp_node->key) { if (tmp_node->left != 0) { tmp_node=tmp_node->left; } else { tmp_node->left=(bst_node_type *)cgc_calloc(1, sizeof(bst_node_type)); if (!tmp_node->left) { return(-1); } tmp_node->left->data=data; tmp_node->left->data_count=1; tmp_node->left->key=key_val; return(0); } } else if (key_val > tmp_node->key) { if (tmp_node->right !=0) { tmp_node=tmp_node->right; } else { tmp_node->right=(bst_node_type *)cgc_calloc(1, sizeof(bst_node_type)); if (!tmp_node->right) { return(-1); } tmp_node->right->data=data; tmp_node->right->data_count=1; tmp_node->right->key=key_val; return(0); } } // should only happen if the keys are equal else { // if the strings don't match, its a key collision if (cgc_strcmp(tmp_node->data->name, data->name)!=0) { // so move to the end of the linked list and add the new data next_ptr=tmp_node->data; while (next_ptr->next != 0) next_ptr=next_ptr->next; next_ptr->next = data; // increment the count of the number in this linked list tmp_node->data_count++; } return(0); } } } // insert_node() int cgc_delete_node(bst_node_type **head, char *str, unsigned long key) { bst_node_type *tmp_node, *promote_node, *previous_node; data_item_type *prev_ptr, *next_ptr; if (!*head) { return(0); } // first locate the node tmp_node=*head; while(tmp_node->key != key) { previous_node=tmp_node; if (key < tmp_node->key) { tmp_node=tmp_node->left; } else { tmp_node=tmp_node->right; } // didn't find the key, just return if (tmp_node==0) { return(0); } } // a new special case since this implementation allows for multiple values stored for the same key // if there is more than one data element in the matching node, the node won't get deleted if (tmp_node->data_count > 1) { prev_ptr=tmp_node->data; next_ptr=prev_ptr->next; // check the first entry because if its the match we need to update the pointer stored in the BST node if (cgc_strcmp(tmp_node->data->name, str)==0) { cgc_free(prev_ptr); tmp_node->data=next_ptr; tmp_node->data_count--; return (0); } // otherwise, cycle through the remainder of the linked list looking for a match while (next_ptr!= 0) { // cgc_printf("looking for appended element\n"); if (cgc_strcmp(next_ptr->name, str)==0) { prev_ptr->next = next_ptr->next; tmp_node->data_count--; #ifdef PATCHED #else next_ptr=next_ptr->next; #endif cgc_free(next_ptr); break; } else { prev_ptr=next_ptr; next_ptr=next_ptr->next; } } return(0); } // if the node has no children, delete it if (tmp_node->left == 0 && tmp_node->right == 0) { //cgc_printf("only one child node on this delete\n"); if (tmp_node==*head) { cgc_free((*head)->data); cgc_free(*head); *head=0; return 0; } if (previous_node->key > key) previous_node->left=0; else previous_node->right=0; cgc_free(tmp_node->data); cgc_free(tmp_node); } // if the node has two children, promote its in-order predecessor and then delete the old node. else if (tmp_node->left != 0 && tmp_node->right !=0 ) { // first find the node to be promoted. Go left in the tree, and then all the way right. previous_node=tmp_node; promote_node=tmp_node->left; while (promote_node->right != 0) { previous_node=promote_node; promote_node=promote_node->right; } // Now delete the data from the node to be deleted and move the promote node's data there cgc_free(tmp_node->data); tmp_node->data=promote_node->data; // now delete the old location of the promote node if (promote_node->key > previous_node->key) { previous_node->right=promote_node->left; } else { previous_node->left=promote_node->left; } tmp_node->key=promote_node->key; tmp_node->data_count=promote_node->data_count; cgc_free(promote_node); } // if the node has one child, promote that child else { if (tmp_node==*head) { cgc_free((*head)->data); if (tmp_node->left !=0) *head=(*head)->left; else *head=(*head)->right; cgc_free(tmp_node); return 0; } if (tmp_node->left != 0) promote_node=tmp_node->left; else promote_node=tmp_node->right; if (previous_node->key > key) previous_node->left=promote_node; else previous_node->right=promote_node; cgc_free(tmp_node->data); cgc_free(tmp_node); } return(0); } // delete_node() void *cgc_find_node_by_key(bst_node_type *head, unsigned long key) { bst_node_type *tmp_node; if (!head) { return(0); } tmp_node=head; while(tmp_node->key != key) { if (key < tmp_node->key) { tmp_node=tmp_node->left; } else { tmp_node=tmp_node->right; } // key not found, return 0 if (tmp_node==0) { return(0); } } // otherwise, return the address of the node return(tmp_node->data); } // find_node() int cgc_walk_tree(bst_node_type *node) { data_item_type *tmp_ptr; if (node==0) return 0; // recursively walk the tree and print out the data from each node // classically called in-order traversal // first go left if (node->left != 0) cgc_walk_tree(node->left); // now process this node // since this can now be a linked list of data, set a pointer to the head tmp_ptr=node->data; // walk the linked list while (tmp_ptr!= 0) { cgc_printf("@s\n", tmp_ptr->name); tmp_ptr=tmp_ptr->next; } // now walk the right side of the node if (node->right !=0) cgc_walk_tree(node->right); return(0); } // walk_tree() // this is used to make a key for the BST based on the input string unsigned long cgc_make_key_from_name(void *data_item) { unsigned long key_val; char *tmp; key_val = 0; tmp=((data_item_type *)data_item)->name; key_val=cgc_str_token(tmp); return(key_val); } // count the nodes in the tree, noting how many are in the left vs right of the tree int cgc_count_bst_stats(bst_node_type *head, bst_stats_type *stats) { unsigned int total_node_count; unsigned int left_node_count; unsigned int right_node_count; unsigned int percent_left; unsigned int percent_right; bst_stats_type left_stats, right_stats; if (head == 0 || stats == 0) return -1; stats->node_count = 0; left_stats.node_count = 0; right_stats.node_count = 0; // first count the left side of the tree if (head->left != 0) { cgc_count_nodes(head->left, & left_stats); } left_node_count= left_stats.node_count; // then count the right side if (head->right != 0) { cgc_count_nodes(head->right, &right_stats); } right_node_count= right_stats.node_count; total_node_count = left_node_count + right_node_count; // include the head node in the total count if there's data if (head->data) total_node_count++; stats->node_count = total_node_count; // if there are less than 2 nodes, there can't be any in the left or right subtrees. if (total_node_count < 2) { stats->percent_left = 0; stats->percent_right = 0; } else { // but don't include it when calculating the balance of the tree so sub 1 stats->percent_left = left_node_count * 100 / (total_node_count - 1); stats->percent_right = right_node_count * 100 / (total_node_count - 1); } return 0; } // recursive function to count the number of nodes in the tree int cgc_count_nodes(bst_node_type *node, bst_stats_type *stats) { if (node->left != 0) { cgc_count_nodes(node->left, stats); } stats->node_count++; if (node->right != 0) { cgc_count_nodes(node->right, stats); } return 0; } ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/src/delete_matches.c ================================================ /* Author: Steve Wood Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_bst.h" #include "cgc_stdlib.h" void cgc_delete_matches(bst_node_type **head, int arg_count, char **args) { cgc_delete_node(head, args[1], cgc_make_key_from_name(args[1])); } ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/src/find_matches.c ================================================ /* Author: Steve Wood Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_bst.h" #include "cgc_stdlib.h" void cgc_find_matches(bst_node_type *head, int arg_count, char **args) { data_item_type *item; item=0; //use the built-in functionality of a BST and find by the key value item=cgc_find_node_by_key(head, cgc_make_key_from_name(args[1])); if (item) { while (item!= 0) { if (cgc_strcmp(args[1], item->name)==0 ) { cgc_printf("@s\n", item->name); break; } else item=item->next; } } } ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/src/parse.c ================================================ /* Author: Steve Wood Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_stdlib.h" int cgc_parse_command(char *buffer, char separator, char ***args) { int arg_count; char *tmp; int i; if (*buffer != 0) { // there is at least 1 because this function was called with a string arg_count = 1; tmp=buffer; } else return -1; // count how many fields are on the command so memory can be allocated for the pointers while(*tmp++ != 0) { if (*tmp==separator) ++arg_count; } // while // allocate one char pointer for each field in the command *args=cgc_calloc(arg_count, sizeof(char *)); // failed to allocate the memory if (*args==0) { return -1; } // start back from the beginning of the buffer tmp=buffer; // for each field, store a pointer to the first char for (i=0;i< arg_count; ++i) { (*args)[i]=tmp; while (*tmp!= separator && *tmp != 0) ++tmp; // if we found the separator, overwrite it with a null so our args will be properly terminated if (*tmp !=0) { *tmp=0; ++tmp; } } //for return (arg_count); } ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/src/service.c ================================================ /* Author: Steve Wood Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_stdlib.h" #include "cgc_service.h" #include "cgc_bst.h" int main(int cgc_argc, char *cgc_argv[]) { bst_node_type *head; data_item_type *item; bst_stats_type stats; long value; char buffer[2048]; cgc_size_t size; char **args; int arg_count; int i; // loop forever receiving command strings from STDIN. while (1) { size=cgc_getline(buffer, sizeof(buffer)); // ignore that a blank line was entered if (size==0) continue; // break the command into an array arg_count=cgc_parse_command(buffer, '/', &args); if (arg_count == -1) continue; // add a new element to the database if (args[0][0]=='a' && arg_count==2 ) { // if the string to add is less than 10 chars, ignore the add command if (cgc_strlen(args[1]) < 10) { cgc_free(args); continue; } item=cgc_calloc(1, sizeof(data_item_type)); if (!item) { cgc_printf("Unable to allocate memory\n"); return(-1); } cgc_strncpy(item->name, args[1], sizeof(item->name)-1); item->next = 0; cgc_insert_node(&head, item, cgc_make_key_from_name); } // search for elements of the database else if (args[0][0]=='f' && arg_count==2) { cgc_find_matches(head, arg_count, args); } // delete an element from the database else if (args[0][0]=='d' && arg_count==2) { cgc_delete_matches(&head, arg_count, args); } // time to cgc_exit else if (args[0][0]=='x') { return(0); } // walk the database and show all entries else if (args[0][0]=='w' ) { cgc_walk_tree(head); } else if (args[0][0]=='s' ) { // clear the stats cgc_bzero(&stats, sizeof(stats)); cgc_count_bst_stats(head, &stats); cgc_printf("# of Nodes: @d\n", stats.node_count); cgc_printf("\% left nodes: @d\n", stats.percent_left); cgc_printf("\% right nodes: @d\n", stats.percent_right); } cgc_free(args); } } // main ================================================ FILE: corpus/cgc/String_Storage_and_Retrieval/src/string_token.c ================================================ /* Author: Steve Wood Copyright (c) 2014 Cromulence LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_stdlib.h" unsigned long cgc_str_token( char *str) { unsigned long tmp_token; int i; if (str==0) return 0; // seed the token algorithm tmp_token = 5381; while (*str) { tmp_token = ((tmp_token << 5)+ tmp_token) + *str; ++str; } return tmp_token; } ================================================ FILE: corpus/cgc/UTF-late/README.md ================================================ # CGC Challenge Binary Template ## Author Information "Maxwell Koo" ### DARPA performer group Narf Industries (NRFIN) ## Description I'm so UTF-8, you so UTF-late. This is a network-enabled file server, which keeps all user files in memory for blazing-quick access. The API exposes a public dropbox folder, while preventing access to the protected admin area. The filesystem is UTF-8 aware, allowing arbitrary unicode filenames of a fixed length. Commands take the form of a 4-byte command identifier, followed by optional arguments. Commands may produce output, but will always end in a 4-byte success code, 0 on success or -1 on failure. ### Feature List The following features are available: - Read file, accepts a filename and returns the contents of that file - Write file, accepts a filename and a size, writes the contents to a file - List files, lists all files in the dropbox folder ## Vulnerability The UTF-8 decoding function does not properly handle overlong encodings, so it is possible to defeat the input validation protecting the admin area and write to files in that area. Files in the admin area contain a user-controlled pointer value, which allows arbitrary write access to a large portion of the process' address space. ### Generic class of vulnerability Improper handling of unicode encoding ### CWE classification CWE-20 Improper input validation CWE-22 Improper limitation of a pathname to a restricted directory CWE-176 Improper handling of unicode encoding CWE-822 Untrusted pointer dereference ## Challenges This CB is designed to test the ability for a CRS to comprehend complex string functions and stateful processes spanning multiple invocations of a function. UTF-8 is a complex variable-width encoding with many possible invalid byte sequences. The decoder implemented in this CB handles this by running each character through a state machine, invoking a function on each byte which records its state into a output variable and its return code. Reasoning over separate invocations of the decoder function is necessary in order to determine an input which will lead to triggering the vulnerability. This CB is inspired by several real-world path traversal and unicode vulnerabilities, including CVE-2000-0884 in Microsoft IIS. ================================================ FILE: corpus/cgc/UTF-late/lib/cgc_libc.h ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * @file libc.h * * Reusable standard C functions. */ #ifndef LIBC_H_ #define LIBC_H_ #include "libcgc.h" #define EXIT_SUCCESS 0 #define EXIT_FAILURE -1 #define PAGE_SIZE (1 << 12) /** * Return the lesser of a and b * * @param a The first value * @param b The second value * @return a if a < b else b */ #define MIN(a, b) ((a) < (b) ? (a) : (b)) /** * Return the greater of a and b * * @param a The first value * @param b The second value * @return a if a > b else b */ #define MAX(a, b) ((a) > (b) ? (a) : (b)) /** * Find the offset of a struct member * * @param type The struct type to examine * @param member The member to calculate the offset of * @return The offset of member in type */ #define OFFSETOF(type, member) ((cgc_size_t)(&((type *)NULL)->member)) /** * Find the container structure from a pointer to a member. * * @param type The struct type to examine * @param member The member ptr points to * @param ptr A pointer to a member * @return A pointer to the containing structure */ #define CONTAINEROF(type, member, ptr) ({ \ char *__ptr = (char *)(ptr); \ __ptr ? ((type *)(__ptr - OFFSETOF(type, member))) : NULL; \ }) /** * Read exactly n bytes from fd to a buffer. * * @param fd A file descriptor * @param buf The destination buffer * @param n The number of bytes to cgc_read * @return The number of bytes cgc_read, or negative on failure */ cgc_ssize_t cgc_read_all(int fd, void *buf, cgc_size_t n); /** * Write exactly n bytes to an fd from a buffer. * * @param fd A file descriptor * @param buf The source buffer * @param n The number of bytes to cgc_write * @return The number of bytes written, or negative on failure */ cgc_ssize_t cgc_write_all(int fd, void *buf, cgc_size_t n); /** * Allocate a chunk of memory on the heap. * * @param size The size of the chunk to allocate * @return A pointer to the new chunk, or NULL if allocation failed */ void *cgc_malloc(cgc_size_t size); /** * Free a chunk of memory allocated with malloc(). * * @param ptr The chunk to free */ void cgc_free(void *ptr); /** * Allocate a zeroed chunk of memory on the heap. * * Note: This differs from standard libc malloc by taking the full size of the * chunk to allocate as its only parameter. * * @param size The size of the chunk to allocate * @return A pointer to the new chunk, or NULL if allocation failed */ void *cgc_calloc(cgc_size_t size); /** * Resize a chunk of memory allocated with malloc(). * * @param ptr The chunk to resize * @param size The new size of the chunk * @return A pointer to the new chunk, or NULL if allocation failed */ void *cgc_realloc(void *ptr, cgc_size_t size); /** * Set the first n bytes of a block of memory to a value. * * @param ptr_ A pointer to a block of memory * @param val The value to set each byte to, interpretted as an unsigned char * @param n The number of bytes to set * @return ptr */ void *cgc_memset(void *ptr_, int val, cgc_size_t n); /** * Copy n bytes from src to dst. * * @param dst_ The destination buffer * @param src_ The source buffer * @param n The number of bytes to copy * @return dst */ void *cgc_memcpy(void *dst_, const void *src_, cgc_size_t n); /** * Copy at most the first n characters of a null-terminated string from src to * dst. * * @param dst The destination buffer * @param src The source buffer * @param n The maximum number of bytes to copy * @return dst */ char *cgc_strncpy(char *dst, const char *src, cgc_size_t n); /** * Return the length of a null-terminated string. * * @param s The string * @return The length of s */ cgc_size_t cgc_strlen(const char *s); /** * Compare two null-terminated strings * * @param a The first string * @param b The second string * @return negative if a < b, 0 if a == b, positive if a > b */ int cgc_strcmp(const char *a, const char *b); /** * Compare the first n bytes of two null-terminated strings * * @param a The first string * @param b The second string * @param n The number of bytes to compare * @return negative if a < b, 0 if a == b, positive if a > b */ int cgc_strncmp(const char *a, const char *b, cgc_size_t n); /** * Find the first instance of character c in s. * * @param s The string to search * @param c The character to find * @return A pointer to the first instance of c in s or NULL if not found */ char *cgc_strchr(const char *s, char c); /** * Find the last instance of character c in s. * * @param s The string to search * @param c The character to find * @return A pointer to the last instance of c in s or NULL if not found */ char *cgc_strrchr(const char *s, char c); /** * Break up a string into tokens separated by a character. * * NOTE: This differs from standard strtok by only accepting on delimiter * character. * * @param s The string to tokenize or NULL to continue, may be modified * @param d The delimeter character * @return The next token */ char *cgc_strtok(char *s, char d); /* The following is verbatim from EAGLE_00004, but isn't included in the * released binary (DEBUG is not defined), so this reuse shouldn't be a concern. */ #ifdef DEBUG #define _FILE_STATE_OPEN 1 #define _FILE_STATE_ERROR 2 #define _FILE_STATE_EOF 4 #define _FILE_HAVE_LAST 8 typedef struct _FILE { int fd; int state; int last; } FILE; extern FILE *cgc_stdin; extern FILE *cgc_stdout; extern FILE *cgc_stderr; /** * Formatted output to a stream. * * @param stream The stream to cgc_write to * @param format The format specifier * @return number of bytes written */ int fprintf(FILE * stream, const char *format, ...); /** * Formatted output to cgc_stdout. * * @param stream The stream to cgc_write to * @param format The format specifier * @return number of bytes written */ int cgc_printf(const char *format, ...); #endif /* DEBUG */ #endif /* LIBC_H_ */ ================================================ FILE: corpus/cgc/UTF-late/lib/cgc_list.h ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * @file list.h * * Routines for manipulating intrusive doubly-linked lists, based on Linux kernel list.h * interface */ #ifndef LIST_H_ #define LIST_H_ #include "cgc_libc.h" struct list_node { struct list_node *next, *prev; }; struct list { struct list_node *head, *tail; }; /** * Initialize a list * * @param list A pointer to the list */ #define list_init(list) do { (list)->head = (list)->tail = NULL; } while (0) /** * Get the entry associated with a particular node. * * @param type The type of the list entry * @param member The member of the entry containing the list node * @param node The list node * @return The entry associated with the node */ #define list_entry(type, member, node) CONTAINEROF(type, member, node) /** * Traverse a list in the forward direction. * * @param list A pointer to the list * @param cur The variable to use as an iterator */ #define list_for_each(list, cur) \ for (cur = (list)->head; cur != NULL; cur = (cur)->next) /** * Traverse a list in the backwards direction. * * @param list A pointer to the list * @param cur The variable to use as an iterator */ #define list_for_each_reverse(list, cur) \ for (cur = (list)->tail; cur != NULL; cur = (cur)->prev) /** * Traverse a list's entries in the forward direction. * * @param type The type of each list entry * @param member The member of each entry containing the list's nodes * @param list A pointer to the list * @param cur The variable to use as an iterator */ #define list_for_each_entry(type, member, list, cur) \ for (cur = list_entry(type, member, (list)->head); \ cur != NULL; \ cur = list_entry(type, member, (cur)->member.next)) /** * Traverse a list's entries in the backwards direction. * * @param type The type of each list entry * @param member The member of each entry containing the list's nodes * @param list A pointer to the list * @param cur The variable to use as an iterator */ #define list_for_each_entry_reverse(type, member, list, cur) \ for (cur = list_entry(type, member, (list)->tail); \ cur != NULL; \ cur = list_entry(type, member, (cur)->member.prev)) /** * Safely traverse a list in the forward direction using a temporary. * * @param list A pointer to the list * @param n Temporary variable * @param cur The variable to use as an iterator */ #define list_for_each_safe(list, n, cur) \ for (cur = (list)->head, n = cur ? cur->next : NULL; \ cur != NULL; \ cur = n, n = cur ? (cur)->next : NULL) /** * Safely traverse a list in the backwards direction using a temporary. * * @param list A pointer to the list * @param n Temporary variable * @param cur The variable to use as an iterator */ #define list_for_each_safe_reverse(list, n, cur) \ for (cur = (list)->tail, n = cur ? cur->prev : NULL; \ cur != NULL; \ cur = n, n = cur ? (cur)->prev : NULL) /** * Safely traverse a list's entries in the forward direction using a temporary. * * @param type The type of each list entry * @param member The member of each entry containing the list's nodes * @param list A pointer to the list * @param n Temporary variable * @param cur The variable to use as an iterator */ #define list_for_each_entry_safe(type, member, list, n, cur) \ for (cur = list_entry(type, member, (list)->head), \ n = cur ? list_entry(type, member, (cur)->member.next) : NULL; \ cur != NULL; \ cur = n, \ n = cur ? list_entry(type, member, (cur)->member.next) : NULL) /** * Safely traverse a list's entries in the backwards direction using a * temporary. * * @param type the type of each list entry * @param member the member of each entry containing the list's nodes * @param list a pointer to the list * @param n Temporary variable * @param cur The variable to use as an iterator */ #define list_for_each_entry_safe_reverse(type, member, list, n, cur) \ for (cur = list_entry(type, member, (list)->tail), \ n = cur ? list_entry(type, member, (cur)->member.prev) : NULL; \ cur != NULL; \ cur = n, \ n = cur ? list_entry(type, member, (cur)->member.prev) : NULL) /** * Get the length of a list. * * @param list The list * @return The length of the list */ static inline unsigned int list_length(struct list *list) { unsigned int ret = 0; struct list_node *cur; list_for_each(list, cur) ret++; return ret; } /** * Insert a new node before a specific node. * * @param list The list * @param succ The node to insert before * @param toadd The node to add */ static inline void cgc_list_insert_before(struct list *list, struct list_node *succ, struct list_node *toadd) { if (toadd == NULL) return; toadd->next = toadd->prev = NULL; if (list->head == NULL && list->tail == NULL) { // Singleton list list->head = list->tail = toadd; } else if (succ == NULL) { // Insert at tail toadd->prev = list->tail; list->tail->next = toadd; toadd->next = NULL; list->tail = toadd; } else if (succ == list->head) { // Insert at head toadd->next = list->head; list->head->prev = toadd; toadd->prev = NULL; list->head = toadd; } else { // General case toadd->prev = succ->prev; toadd->next = succ; succ->prev->next = toadd; succ->prev = toadd; } } /** * Insert a new entry before a specific entry. * * @param type The type of the list entry * @param member The member of each entry containing the list's nodes * @param list A pointer to the list * @param succ The entry to insert before * @param toadd The entry to add */ #define list_insert_entry_before(type, member, list, succ, toadd) \ list_insert_before(list, succ ? &((type *)(succ))->member : NULL, \ &(toadd)->member) /** * Insert a new node after a specific node. * * @param list The list * @param pred The node to insert after * @param toadd The node to add */ static inline void cgc_list_insert_after(struct list *list, struct list_node *pred, struct list_node *toadd) { if (pred == NULL) cgc_list_insert_before(list, list->head, toadd); else cgc_list_insert_before(list, pred->next, toadd); } /** * Insert a new entry after a specific entry. * * @param type The type of the list entry * @param member The member of each entry containing the list's nodes * @param list The list * @param pred The entry to insert after * @param toadd The entry to add */ #define list_insert_entry_after(type, member, list, pred, toadd) \ list_insert_after(list, pred ? &((type *)(pred))->member : NULL, \ &(toadd)->member) /** * Insert a new node at the head of a list. * * @param list The list * @param toadd The node to add */ static inline void cgc_list_push_front(struct list *list, struct list_node *toadd) { cgc_list_insert_before(list, list->head, toadd); } /** * Insert a new entry at the head of a list. * * @param type The type of the list entry * @param member The member of each entry containing the list's nodes * @param list A pointer to the list * @param toadd The entry to add */ #define list_push_entry_front(type, member, list, toadd) \ cgc_list_push_front(list, &(toadd)->member) /** * Insert a new node at the tail of a list. * * @param list The list * @param toadd The node to add */ static inline void cgc_list_push_back(struct list *list, struct list_node *toadd) { cgc_list_insert_after(list, list->tail, toadd); } /** * Insert a new entry at the tail of a list. * * @param type The type of the list entry * @param member The member of each entry containing the list's nodes * @param list A pointer to the list * @param toadd The entry to add */ #define list_push_entry_back(type, member, list, toadd) \ list_push_back(list, &(toadd)->member) /** * Insert a new node in order as specified by a comparison function. * * @param list The list * @param toadd The node to add * @param cmp The comparison function */ static inline void cgc_list_insert_in_order(struct list *list, struct list_node *toadd, int (*cmp)(const struct list_node *, const struct list_node *)) { struct list_node *cur; list_for_each(list, cur) if (cmp(toadd, cur) >= 0) { cgc_list_insert_before(list, cur, toadd); return; } cgc_list_push_back(list, toadd); } /** * Insert a new entry in order as specified by a comparison function. * * Note: the comparision function still operates on struct list_nodes. * * @param type The type of each list entry * @param member The member of each entry containing the list's nodes * @param list A pointer to the list * @param toadd The entry to add * @param cmp The comparison function */ #define list_insert_entry_in_order(type, member, list, toadd, cmp) \ cgc_list_insert_in_order(list, &(toadd)->member, cmp) /** * Find a node in the list based on a predicate function. * * @param list The list * @param pred The predicate function * @param data Data to pass to the predicate function * @return The first list_node that matches from the head of the list, or NULL * if not found */ static inline struct list_node * cgc_list_find(const struct list *list, int (*pred)(const struct list_node *, void *), void *data) { struct list_node *cur; list_for_each(list, cur) if (pred(cur, data)) return cur; return NULL; } /** * Find an entry in the list based on a predicate function. * * Note: the predicate function still operates on struct list_nodes. * * @param type The type of each list entry * @param member The member of each entry containing the list's nodes * @param list A pointer to the list * @param pred The predicate function * @param data Data to pass to the predicate function * @return The first entry that matches from the head of the list, or NULL if * not found */ #define list_find_entry(type, member, list, pred, data) \ list_entry(type, member, cgc_list_find(list, pred, data)) /** * Remove a node from a list. * * @param list The list * @param torem The node to remove */ static inline void cgc_list_remove(struct list *list, struct list_node *torem) { if (torem == NULL) return; if (torem->prev == NULL) list->head = torem->next; else torem->prev->next = torem->next; if (torem->next == NULL) list->tail = torem->prev; else torem->next->prev = torem->prev; } /** * Remove an entry from a list. * * @param type The type of each list entry * @param member The member of each entry containing the list's nodes * @param list A pointer to the list * @param torem The entry to remove */ #define list_remove_entry(type, member, list, torem) ({ \ type *__torem = (torem); \ cgc_list_remove(list, __torem ? &__torem->member : NULL); \ }) /** * Remove and return the node at the head of the list. * * @param list The list * @return The node at the head of the list */ static inline struct list_node * list_pop_front(struct list *list) { struct list_node *ret = list->head; cgc_list_remove(list, ret); return ret; } /** * Remove and return the entry at the head of the list. * * @param type The type of each list entry * @param member The member of each entry containing the list's nodes * @param list A pointer to the list * @return The entry at the head of the list */ #define list_pop_entry_front(type, member, list) \ list_entry(type, member, list_pop_front(list)) /** * Remove and return the node at the tail of the list. * * @param list The list * @return The node at the tail of the list */ static inline struct list_node * list_pop_back(struct list *list) { struct list_node *ret = list->tail; cgc_list_remove(list, ret); return ret; } /** * Remove and return the entry at the tail of the list. * * @param type The type of each list entry * @param member The member of each entry containing the list's nodes * @param list A pointer to the list * @return The entry at the tail of the list */ #define list_pop_entry_back(type, member, list) \ list_entry(type, member, list_pop_back(list)) #endif /* LIST_H_ */ ================================================ FILE: corpus/cgc/UTF-late/lib/cgc_utf8.h ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * @file utf8.h * * Encode and decode UTF-8 encoded strings. */ #ifndef UTF8_H_ #define UTF8_H_ #include "libcgc.h" enum utf8_decode_state { ACCEPT, REJECT, TWO_BYTE, THREE_BYTE, FOUR_BYTE }; typedef unsigned char utf8char; typedef unsigned int ucscodepoint; enum utf8_decode_state cgc_utf8_decode(enum utf8_decode_state state, utf8char c, ucscodepoint *out); cgc_ssize_t cgc_utf8_encode(ucscodepoint in, utf8char *out, cgc_size_t size); cgc_size_t cgc_utf8_canonicalize(utf8char *dst, const utf8char *src, cgc_size_t n); #endif /* UTF8_H_ */ ================================================ FILE: corpus/cgc/UTF-late/lib/cgc_vfs.h ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * @file vfs.h * * In-memory virtual file system. */ #ifndef VFS_H_ #define VFS_H_ #include "cgc_libc.h" #include "cgc_list.h" #include "cgc_utf8.h" #define MAX_FILE_NAME_LENGTH 16 struct directory { utf8char name[MAX_FILE_NAME_LENGTH]; struct directory *parent; struct list subdirectories; struct list files; struct list_node list; }; struct file { utf8char name[MAX_FILE_NAME_LENGTH]; struct directory *parent; cgc_size_t size; unsigned char *contents; struct list_node list; }; struct vfs { struct directory *root; }; int cgc_vfs_init(struct vfs *vfs); void cgc_vfs_destroy(struct vfs *vfs); struct directory *cgc_lookup_dir(const struct vfs *vfs, const utf8char *path); struct file *cgc_lookup_file(const struct vfs *vfs, const utf8char *path); struct directory *cgc_create_dir(struct vfs *vfs, const utf8char *path); struct directory *cgc_create_dir_in_dir(struct vfs *vfs, struct directory *dir, const utf8char *name); struct file *cgc_create_file(struct vfs *vfs, const utf8char *path); struct file *cgc_create_file_in_dir(struct vfs *vfs, struct directory *dir, const utf8char *name); void cgc_delete_file(struct vfs *vfs, struct file *file); #ifdef DEBUG void dump_vfs(const struct vfs *vfs); #endif #endif /* VFS_H_ */ ================================================ FILE: corpus/cgc/UTF-late/lib/libc.c ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_libc.h" cgc_ssize_t cgc_read_all(int fd, void *buf, cgc_size_t n) { cgc_ssize_t ret = 0; cgc_size_t cgc_read; while (n) { if (cgc_receive(fd, (char *)(buf + ret), n, &cgc_read) != 0) return -1; n -= cgc_read; ret += cgc_read; } return ret; } cgc_ssize_t cgc_write_all(int fd, void *buf, cgc_size_t n) { cgc_ssize_t ret = 0; cgc_size_t written; while (n) { if (cgc_transmit(fd, (char *)(buf + ret), n, &written) != 0) return -1; n -= written; ret += written; } return ret; } void * cgc_memset(void *ptr_, int val, cgc_size_t n) { unsigned char *ptr = ptr_; while (n--) *ptr++ = (unsigned char)val; return ptr_; } void * cgc_memcpy(void *dst_, const void *src_, cgc_size_t n) { unsigned char *dst = dst_; const unsigned char *src = src_; while (n--) *dst++ = *src++; return dst_; } cgc_size_t cgc_strlen(const char *s) { cgc_size_t ret = 0; while (*s++) ret++; return ret; } int cgc_strcmp(const char *a, const char *b) { for (; *a && *a == *b; a++, b++) ; return *(const unsigned char *)a - *(const unsigned char *)b; } int cgc_strncmp(const char *a, const char *b, cgc_size_t n) { for (; --n && *a && *a == *b; a++, b++) ; return *(const unsigned char *)a - *(const unsigned char *)b; } char * cgc_strncpy(char *dst, const char *src, cgc_size_t n) { cgc_size_t i = 0; for (; i < n && src[i]; i++) dst[i] = src[i]; for (; i < n; i++) dst[i] = '\0'; return dst; } char * cgc_strchr(const char *s, char c) { for (; *s; s++) if (*s == c) return (char *)s; return NULL; } char * cgc_strrchr(const char *s, char c) { const char *orig_s = s; for (; *s; s++) ; for (; s >= orig_s; s--) if (*s == c) return (char *)s; return NULL; } char * cgc_strtok(char *s, char d) { static char *prev = NULL; char *token, *ret; if (s == NULL && prev == NULL) return NULL; if (prev == NULL) prev = s; ret = prev; while (*ret == d) ret++; if ((token = cgc_strchr(prev, d)) != NULL) { *token = '\0'; prev = token + 1; } else { prev = NULL; } return ret; } /* The following is verbatim from EAGLE_00004, but isn't included in the * released binary (DEBUG is not defined), so this reuse shouldn't be a concern. */ #ifdef DEBUG #ifdef WIN #include #else typedef __builtin_va_list va_list; #define va_start(ap, param) __builtin_va_start(ap, param) #define va_end(ap) __builtin_va_end(ap) #define va_arg(ap, type) __builtin_va_arg(ap, type) #endif static FILE std_files[3] = { {0, _FILE_STATE_OPEN}, {1, _FILE_STATE_OPEN}, {2, _FILE_STATE_OPEN} }; FILE *cgc_stdin = &std_files[0]; FILE *cgc_stdout = &std_files[1]; FILE *cgc_stderr = &std_files[2]; int vfprintf(FILE *stream, const char *format, va_list ap); int vdprintf(int fd, const char *format, va_list ap); #define IS_DIGIT 1 #define IS_UPPER 2 #define IS_LOWER 4 #define IS_SPACE 8 #define IS_XDIGIT 16 #define IS_CTRL 32 #define IS_BLANK 64 #define IS_ALPHA (IS_LOWER | IS_UPPER) #define IS_ALNUM (IS_ALPHA | IS_DIGIT) static unsigned char type_flags[256] = { 0, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_SPACE | IS_BLANK, IS_SPACE, IS_SPACE, IS_SPACE, IS_SPACE, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_SPACE | IS_BLANK, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, 0, 0, 0, 0, 0, 0, 0, IS_UPPER | IS_XDIGIT, IS_UPPER | IS_XDIGIT, IS_UPPER | IS_XDIGIT, IS_UPPER | IS_XDIGIT, IS_UPPER | IS_XDIGIT, IS_UPPER | IS_XDIGIT, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, 0, 0, 0, 0, 0, 0, IS_LOWER | IS_XDIGIT, IS_LOWER | IS_XDIGIT, IS_LOWER | IS_XDIGIT, IS_LOWER | IS_XDIGIT, IS_LOWER | IS_XDIGIT, IS_LOWER | IS_XDIGIT, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, 0, 0, 0, 0, 0, }; int isalpha(int c) { return (type_flags[c & 0xff] & IS_ALPHA) != 0; } int isdigit(int c) { return (type_flags[c & 0xff] & IS_DIGIT) != 0; } int isxdigit(int c) { return (type_flags[c & 0xff] & IS_XDIGIT) != 0; } int toupper(int c) { if (isalpha(c)) { return c & ~0x20; } return c; } int vfprintf(FILE * stream, const char *format, va_list ap) { return vdprintf(stream->fd, format, ap); } int fprintf(FILE * stream, const char *format, ...) { va_list va; va_start(va, format); return vfprintf(stream, format, va); } int cgc_printf(const char *format, ...) { va_list va; va_start(va, format); return vfprintf(cgc_stdout, format, va); } struct _fd_printer { int fd; int err; unsigned int count; }; //if flag != 0 return number of chars output so far static unsigned int fd_printer(char ch, void *_fp, int flag) { struct _fd_printer *fp = (struct _fd_printer *)_fp; if (flag) { return fp->count; } else { fp->count++; cgc_transmit(fp->fd, &ch, 1, NULL); } return 0; } #define STATE_NORMAL 0 #define STATE_ESCAPE 1 #define STATE_PERCENT 2 #define STATE_OCTAL 3 #define STATE_HEX 4 #define STATE_FLAGS 5 #define STATE_WIDTH 6 #define STATE_PRECISION 7 #define STATE_LENGTH 8 #define STATE_CONVERSION 9 #define STATE_WIDTH_ARG 10 #define STATE_WIDTH_VAL 11 #define STATE_PRECISION_ARG 12 #define STATE_PRECISION_VAL 13 #define STATE_NARG 15 #define FLAGS_TICK 1 #define FLAGS_LEFT 2 #define FLAGS_SIGN 4 #define FLAGS_SPACE 8 #define FLAGS_HASH 16 #define FLAGS_ZERO 32 #define LENGTH_H 1 #define LENGTH_HH 2 #define LENGTH_L 3 #define LENGTH_J 5 #define LENGTH_Z 6 #define LENGTH_T 7 #define LENGTH_CAPL 8 static char *r_utoa(unsigned int val, char *outbuf) { char *p = outbuf; *p = '0'; while (val) { *p++ = (val % 10) + '0'; val /= 10; } return p != outbuf ? (p - 1) : p; } //outbuf needs to be at least 22 chars static char *r_llotoa(unsigned long long val, char *outbuf) { char *p = outbuf; *p = '0'; while (val) { *p++ = (val & 7) + '0'; val >>= 3; } return p != outbuf ? (p - 1) : p; } static char *r_otoa(unsigned int val, char *outbuf) { return r_llotoa(val, outbuf); } //outbuf needs to be at least 22 chars static char *r_llxtoa(unsigned long long val, char *outbuf, int caps) { char *p = outbuf; *p = '0'; while (val) { char digit = (char)(val & 0xf); if (digit < 10) { digit += '0'; } else { digit = caps ? (digit + 'A' - 10) : (digit + 'a' - 10); } *p++ = digit; val >>= 4; } return p != outbuf ? (p - 1) : p; } static char *r_xtoa(unsigned int val, char *outbuf, int caps) { return r_llxtoa(val, outbuf, caps); } static int hex_value_of(char ch) { if (isdigit(ch)) { return ch - '0'; } else if (isalpha(ch)) { return toupper(ch) - 'A' + 10; } return -1; } //func is responsible for outputing the given character //user is a pointer to data required by func static void printf_core(unsigned int (*func)(char, void *, int), void *user, const char *format, va_list ap) { int state = STATE_NORMAL; int flags; int digit_count = 0; int value = 0; char ch; int arg_count = 0; int width_value; int prec_value; int field_arg; int length; char **args = (char**)ap; for (ch = *format++; ch; ch = *format++) { switch (state) { case STATE_NORMAL: if (ch == '%') { state = STATE_PERCENT; } else if (ch == '\\') { state = STATE_ESCAPE; } else { func(ch, user, 0); } break; case STATE_ESCAPE: switch (ch) { case 'n': func('\n', user, 0); break; case 't': func('\t', user, 0); break; case 'r': func('\r', user, 0); break; case 'b': func('\b', user, 0); break; case 'f': func('\f', user, 0); break; case 'v': func('\v', user, 0); break; case '\\': case '\'': case '"': func(ch, user, 0); break; case 'x': state = STATE_HEX; digit_count = 0; value = 0; break; default: if (ch > '0' && ch < '8') { state = STATE_OCTAL; digit_count = 1; value = ch - '0'; } else { func(*format, user, 0); } break; } if (state == STATE_ESCAPE) { state = STATE_NORMAL; } break; case STATE_PERCENT: if (ch == '%') { func(ch, user, 0); state = STATE_NORMAL; } else { state = STATE_NARG; flags = 0; format--; } break; case STATE_OCTAL: if (ch > '0' && ch < '8' && digit_count < 3) { digit_count++; value = value * 8 + (ch - '0'); if (digit_count == 3) { func(value, user, 0); state = STATE_NORMAL; } } else { func(value, user, 0); state = STATE_NORMAL; format--; } break; case STATE_HEX: if (isxdigit(ch) && digit_count < 2) { digit_count++; value = value * 16 + hex_value_of(ch); if (digit_count == 2) { func(value, user, 0); state = STATE_NORMAL; } } else { func(value, user, 0); state = STATE_NORMAL; format--; } break; case STATE_NARG: width_value = -1; prec_value = -1; flags = 0; length = 0; field_arg = -1; if (ch == '0') { format--; state = STATE_FLAGS; break; } if (isdigit(ch)) { //could be width or could be arg specifier or a 0 flag //width and arg values don't start with 0 width_value = 0; while (isdigit(ch)) { width_value = width_value * 10 + (ch - '0'); ch = *format++; } if (ch == '$') { field_arg = width_value - 1; width_value = 0; state = STATE_FLAGS; } else { //this was a width format--; state = STATE_PRECISION; } } else { format--; state = STATE_FLAGS; } break; case STATE_FLAGS: switch (ch) { case '\'': flags |= FLAGS_TICK; break; case '-': flags |= FLAGS_LEFT; break; case '+': flags |= FLAGS_SIGN; break; case ' ': flags |= FLAGS_SPACE; break; case '#': flags |= FLAGS_HASH; break; case '0': flags |= FLAGS_ZERO; break; default: format--; if ((flags & (FLAGS_ZERO | FLAGS_LEFT)) == (FLAGS_ZERO | FLAGS_LEFT)) { //if both '-' and '0' appear, '0' is ignored flags &= ~FLAGS_ZERO; } state = STATE_WIDTH; break; } break; case STATE_WIDTH: if (ch == '*') { ch = *format++; int width_arg = 0; if (isdigit(ch)) { while (isdigit(ch)) { width_arg = width_arg * 10 + (ch - '0'); ch = *format++; } width_arg--; if (ch != '$') { //error } } else { width_arg = arg_count++; format--; } width_value = (int)args[width_arg]; } else if (isdigit(ch)) { width_value = 0; while (isdigit(ch)) { width_value = width_value * 10 + (ch - '0'); ch = *format++; } format--; } else { //no width specified format--; } state = STATE_PRECISION; break; case STATE_PRECISION: if (ch == '.') { //have a precision ch = *format++; if (ch == '*') { ch = *format++; int prec_arg = 0; if (isdigit(ch)) { while (isdigit(ch)) { prec_arg = prec_arg * 10 + (ch - '0'); ch = *format++; } prec_arg--; if (ch != '$') { //error } } else { prec_arg = arg_count++; format--; } prec_value = (int)args[prec_arg]; } else if (isdigit(ch)) { prec_value = 0; while (isdigit(ch)) { prec_value = prec_value * 10 + (ch - '0'); ch = *format++; } format--; } else { //no precision specified format--; } } else { //no precision specified format--; } state = STATE_LENGTH; break; case STATE_LENGTH: switch (ch) { case 'h': length = LENGTH_H; if (*format == 'h') { length++; format++; } break; case 'l': length = LENGTH_L; if (*format == 'l') { // length++; format++; } break; case 'j': length = LENGTH_J; break; case 'z': length = LENGTH_Z; break; case 't': length = LENGTH_T; break; case 'L': length = LENGTH_CAPL; break; default: format--; break; } state = STATE_CONVERSION; break; case STATE_CONVERSION: { char num_buf[32]; char *num_ptr; int use_caps = 1; int sign; int val; if (field_arg == -1) { field_arg = arg_count++; } switch (ch) { case 'd': case 'i': { int len; switch (length) { case LENGTH_H: val = (short)(int)args[field_arg]; sign = val < 0; if (sign) { val = -val; } num_ptr = r_utoa(val, num_buf); break; case LENGTH_HH: val = (char)(int)args[field_arg]; sign = val < 0; if (sign) { val = -val; } num_ptr = r_utoa(val, num_buf); break; case LENGTH_L: default: val = (long)args[field_arg]; sign = val < 0; if (sign) { val = -val; } num_ptr = r_utoa(val, num_buf); break; } len = num_ptr - num_buf + 1; if (width_value == -1) { //by default min length is the entire value width_value = len; if (sign || (flags & FLAGS_SIGN)) { width_value++; } } if (prec_value == -1) { //by default max is entire value prec_value = len; if ((flags & FLAGS_ZERO) != 0 && prec_value < width_value) { //widen precision if necessary to pad to width with '0' if (sign || (flags & FLAGS_SIGN)) { prec_value = width_value - 1; } else { prec_value = width_value; } } } else { if (prec_value < len) { prec_value = len; } //number won't need leading zeros flags &= ~FLAGS_ZERO; } if (flags & FLAGS_LEFT) { if (sign) { func('-', user, 0); if (width_value > 0) { width_value--; } } else if ((flags & FLAGS_SIGN) != 0) { func('+', user, 0); if (width_value > 0) { width_value--; } } while (prec_value > len) { func('0', user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > (prec_value + 1)) { func(' ', user, 0); width_value--; } if (sign) { func('-', user, 0); if (width_value > 0) { width_value--; } } else if ((flags & FLAGS_SIGN) != 0) { func('+', user, 0); if (width_value > 0) { width_value--; } } if (width_value > prec_value) { func(' ', user, 0); width_value--; } while (prec_value > len) { func('0', user, 0); prec_value--; } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; } } break; } case 'o': { int len; switch (length) { case LENGTH_H: num_ptr = r_otoa((unsigned short)(unsigned int)args[field_arg], num_buf); break; case LENGTH_HH: num_ptr = r_otoa((unsigned char)(unsigned int)args[field_arg], num_buf); break; case LENGTH_L: default: num_ptr = r_otoa((unsigned long)args[field_arg], num_buf); break; } if (flags & FLAGS_HASH) { if (*num_ptr != '0') { num_ptr++; *num_ptr = '0'; } } len = num_ptr - num_buf + 1; if (width_value == -1) { //by default min length is the entire value width_value = len; } if (prec_value == -1) { //by default max is entire value prec_value = len; if ((flags & FLAGS_ZERO) != 0 && prec_value < width_value) { //widen precision if necessary to pad to width with '0' prec_value = width_value; } } else { if (prec_value < len) { prec_value = len; } flags &= ~FLAGS_ZERO; } if (flags & FLAGS_LEFT) { while (prec_value > len) { func('0', user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > prec_value) { func(' ', user, 0); width_value--; } while (prec_value > len) { func('0', user, 0); prec_value--; } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; } } break; } case 'u': { int len; switch (length) { case LENGTH_H: num_ptr = r_utoa((unsigned short)(unsigned int)args[field_arg], num_buf); break; case LENGTH_HH: num_ptr = r_utoa((unsigned char)(unsigned int)args[field_arg], num_buf); break; case LENGTH_L: default: num_ptr = r_utoa((unsigned long)args[field_arg], num_buf); break; } len = num_ptr - num_buf + 1; if (width_value == -1) { //by default min length is the entire value width_value = len; } if (prec_value == -1) { //by default max is entire value prec_value = len; if ((flags & FLAGS_ZERO) != 0 && prec_value < width_value) { //widen precision if necessary to pad to width with '0' prec_value = width_value; } } else { if (prec_value < len) { prec_value = len; } flags &= ~FLAGS_ZERO; } if (flags & FLAGS_LEFT) { while (prec_value > len) { func('0', user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > prec_value) { func(' ', user, 0); width_value--; } while (prec_value > len) { func('0', user, 0); prec_value--; } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; } } break; } case 'x': use_caps = 0; //now fall into X case case 'X': { int len; switch (length) { case LENGTH_H: num_ptr = r_xtoa((unsigned short)(unsigned int)args[field_arg], num_buf, use_caps); break; case LENGTH_HH: num_ptr = r_xtoa((unsigned char)(unsigned int)args[field_arg], num_buf, use_caps); break; case LENGTH_L: default: num_ptr = r_xtoa((unsigned long)args[field_arg], num_buf, use_caps); break; } len = num_ptr - num_buf + 1; if (width_value == -1) { //by default min length is the entire value width_value = len; } if (prec_value == -1) { //by default max is entire value prec_value = len; if ((flags & FLAGS_ZERO) != 0 && prec_value < width_value) { //widen precision if necessary to pad to width with '0' prec_value = width_value; } } else { if (prec_value < len) { prec_value = len; } flags &= ~FLAGS_ZERO; } if (flags & FLAGS_LEFT) { if (flags & FLAGS_HASH && (len != 1 || *num_ptr != '0')) { func('0', user, 0); if (width_value > 0) { width_value--; } func(use_caps ? 'X' : 'x', user, 0); if (width_value > 0) { width_value--; } } while (prec_value > len) { func('0', user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > (prec_value + 2)) { func(' ', user, 0); width_value--; } if (flags & FLAGS_HASH && (len != 1 || *num_ptr != '0')) { func('0', user, 0); if (width_value > 0) { width_value--; } func(use_caps ? 'X' : 'x', user, 0); if (width_value > 0) { width_value--; } } else { while (width_value > prec_value) { func(' ', user, 0); width_value--; } } while (prec_value > len) { func('0', user, 0); prec_value--; } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; } } break; } case 'f': case 'F': break; case 'e': case 'E': break; case 'g': case 'G': break; case 'a': case 'A': break; case 'c': { unsigned char ch = (unsigned char)(unsigned int)args[field_arg]; if (width_value == -1) { width_value = 1; } if (flags & FLAGS_LEFT) { func((char)ch, user, 0); if (width_value > 0) { width_value--; } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > 1) { func(' ', user, 0); width_value--; } func(ch, user, 0); } break; } case 's': { const char *s_arg = (const char *)args[field_arg]; int len = cgc_strlen(s_arg); if (width_value == -1) { //by default min length is the entire string width_value = len; } if (prec_value == -1 || prec_value > len) { //by default max is entire string but no less than width prec_value = len; } if (flags & FLAGS_LEFT) { while (prec_value != 0) { func(*s_arg++, user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > prec_value) { func(' ', user, 0); width_value--; } while (prec_value != 0) { func(*s_arg++, user, 0); prec_value--; } } break; } case 'p': { int len; flags |= FLAGS_HASH; num_ptr = r_xtoa((unsigned int)args[field_arg], num_buf, 0); len = num_ptr - num_buf + 1; if (prec_value == -1) { //by default max is entire value prec_value = len; } else { if (prec_value < len) { prec_value = len; } flags &= ~FLAGS_ZERO; } if (width_value == -1) { //by default min length is the entire value width_value = prec_value + 2; } if (flags & FLAGS_LEFT) { func('0', user, 0); if (width_value > 0) { width_value--; } func('x', user, 0); if (width_value > 0) { width_value--; } while (prec_value > len) { func('0', user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > (prec_value + 2)) { func(' ', user, 0); width_value--; } func('0', user, 0); if (width_value > 0) { width_value--; } func('x', user, 0); if (width_value > 0) { width_value--; } while (prec_value > len) { func('0', user, 0); prec_value--; } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; } } break; } case 'n': { void *np = (void*)args[field_arg]; unsigned int len = func(0, user, 1); switch (length) { case LENGTH_HH: *(unsigned char*)np = (unsigned char)len; break; case LENGTH_H: *(unsigned short*)np = (unsigned short)len; break; case LENGTH_L: default: *(unsigned int*)np = len; break; } break; } case 'C': break; case 'S': break; default: break; } state = STATE_NORMAL; break; } } } } int vdprintf(int fd, const char *format, va_list ap) { struct _fd_printer fp; fp.fd = fd; fp.err = 0; fp.count = 0; printf_core(fd_printer, &fp, format, ap); return fp.count; } #endif /* DEBUG */ ================================================ FILE: corpus/cgc/UTF-late/lib/malloc.c ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_libc.h" #include "cgc_list.h" struct chunk { cgc_size_t header; struct list_node list; } __attribute__((packed)); static cgc_size_t size_class_sizes[] = { 16, 32, 64, 128, 256, 512, 1024, 2048 }; #define NUM_SIZE_CLASSES (sizeof(size_class_sizes) / sizeof(cgc_size_t)) static struct list freelists[NUM_SIZE_CLASSES] = {}; #define ALIGN(x, a) (((x) + (a - 1)) & ~(a - 1)) #define IS_PAGE_ALIGNED(a) ((((cgc_size_t)a) & (PAGE_SIZE - 1)) == 0) #define CHUNK_OVERHEAD (2 * sizeof(cgc_size_t)) #define MIN_SIZE (size_class_sizes[0]) #define MAX_SIZE (PAGE_SIZE - 1) #define SIZE_CLASS_INDEX(size) (cgc_log_base_two(size) - cgc_log_base_two(MIN_SIZE)) #define CHUNK_SIZE(chunk) ((chunk)->header & ~1) #define IS_ALLOCATED(chunk) ((chunk)->header & 1) #define FOOTER(chunk) ((cgc_size_t *)((char *)chunk + CHUNK_SIZE(chunk) - sizeof(cgc_size_t))) #define NEXT_CHUNK(chunk) ((struct chunk *)((char *)chunk + CHUNK_SIZE(chunk))) #define PREV_SIZE(chunk) (CHUNK_SIZE(((struct chunk *)((cgc_size_t *)chunk - 1)))) #define PREV_CHUNK(chunk) ((struct chunk *)((char *)chunk - PREV_SIZE(chunk))) static inline unsigned int cgc_log_base_two(unsigned int x) { unsigned long ret = 0; while (x >>= 1) ret++; return ret; } static inline void * cgc_chunk_to_ptr(struct chunk *chunk) { return (char *)chunk + OFFSETOF(struct chunk, list); } static inline struct chunk * cgc_ptr_to_chunk(void *ptr) { return CONTAINEROF(struct chunk, list, ptr); } static void cgc_mark_allocated(struct chunk *chunk) { chunk->header |= 1; *FOOTER(chunk) = chunk->header; } static void cgc_mark_free(struct chunk *chunk) { chunk->header &= ~1; *FOOTER(chunk) = chunk->header; } static int cgc_find_fit(const struct list_node *chunk_, void *size_) { cgc_size_t size = (cgc_size_t)size_; struct chunk *chunk = list_entry(struct chunk, list, chunk_); return CHUNK_SIZE(chunk) >= size; } static int cgc_size_cmp(const struct list_node *a_, const struct list_node *b_) { const struct chunk *a = list_entry(struct chunk, list, a_); const struct chunk *b = list_entry(struct chunk, list, b_); return CHUNK_SIZE(b) - CHUNK_SIZE(a); } static void * cgc_allocate_large_chunk(cgc_size_t size) { struct chunk *chunk; if (cgc_allocate(size, 0, (void **)&chunk) != 0) return NULL; chunk->header = size; cgc_mark_allocated(chunk); return cgc_chunk_to_ptr(chunk); } static struct chunk * cgc_grow_heap(void) { struct chunk *chunk; if (cgc_allocate(PAGE_SIZE, 0, (void **)&chunk) != 0) return NULL; chunk->header = PAGE_SIZE; cgc_mark_free(chunk); return chunk; } static struct chunk * cgc_split_chunk(struct chunk *chunk, cgc_size_t size) { struct chunk *new = (struct chunk *)((char *)chunk + size); cgc_size_t orig_size = CHUNK_SIZE(chunk); if (CHUNK_SIZE(chunk) <= size + MIN_SIZE) return chunk; chunk->header = size; cgc_mark_allocated(chunk); new->header = orig_size - size; cgc_mark_allocated(new); cgc_free(cgc_chunk_to_ptr(new)); return chunk; } static struct chunk * cgc_coalesce(struct chunk *chunk) { struct chunk *next = NULL, *prev = NULL; unsigned int size_class; if (!IS_PAGE_ALIGNED(chunk)) prev = PREV_CHUNK(chunk); if (!IS_PAGE_ALIGNED(NEXT_CHUNK(chunk))) next = NEXT_CHUNK(chunk); if (prev && !IS_ALLOCATED(prev)) { size_class = SIZE_CLASS_INDEX(CHUNK_SIZE(prev)); if (size_class < NUM_SIZE_CLASSES) { list_remove_entry(struct chunk, list, &freelists[size_class], prev); prev->header = CHUNK_SIZE(chunk) + CHUNK_SIZE(prev); cgc_mark_free(prev); chunk = prev; } } if (next && !IS_ALLOCATED(next)) { size_class = SIZE_CLASS_INDEX(CHUNK_SIZE(next)); if (size_class < NUM_SIZE_CLASSES) { list_remove_entry(struct chunk, list, &freelists[size_class], next); chunk->header = CHUNK_SIZE(chunk) + CHUNK_SIZE(next); cgc_mark_free(chunk); } } return chunk; } void * cgc_malloc(cgc_size_t size) { struct chunk *chunk = NULL; unsigned int size_class; if (ALIGN(size, sizeof(cgc_size_t)) + CHUNK_OVERHEAD < size) return NULL; size = ALIGN(size, sizeof(cgc_size_t)) + CHUNK_OVERHEAD; if (size < MIN_SIZE) size = MIN_SIZE; if (size > MAX_SIZE) return cgc_allocate_large_chunk(size); size_class = SIZE_CLASS_INDEX(size); if (size_class >= NUM_SIZE_CLASSES) return NULL; while (chunk == NULL && size_class < NUM_SIZE_CLASSES) chunk = list_find_entry(struct chunk, list, &freelists[size_class++], cgc_find_fit, (void *)size); if (chunk == NULL) chunk = cgc_grow_heap(); else list_remove_entry(struct chunk, list, &freelists[size_class - 1], chunk); cgc_mark_allocated(chunk); if (CHUNK_SIZE(chunk) > size) chunk = cgc_split_chunk(chunk, size); return cgc_chunk_to_ptr(chunk); } void cgc_free(void *ptr) { struct chunk *chunk = cgc_ptr_to_chunk(ptr); unsigned int size_class; if (ptr == NULL) return; // This should only happen if allocated through allocate_large_chunk(). If // it's not page_aligned, something bad happened and we just leak the chunk. if (CHUNK_SIZE(chunk) > MAX_SIZE) { if (IS_PAGE_ALIGNED(chunk)) cgc_deallocate(chunk, ALIGN(CHUNK_SIZE(chunk), PAGE_SIZE)); return; } cgc_mark_free(chunk); chunk = cgc_coalesce(chunk); size_class = SIZE_CLASS_INDEX(CHUNK_SIZE(chunk)); if (size_class >= NUM_SIZE_CLASSES) return; list_insert_entry_in_order(struct chunk, list, &freelists[size_class], chunk, cgc_size_cmp); } void * cgc_calloc(cgc_size_t size) { void *ret = cgc_malloc(size); if (ret) cgc_memset(ret, '\0', size); return ret; } void * cgc_realloc(void *ptr, cgc_size_t size) { cgc_size_t orig_size; void *ret; if (ptr == NULL) return cgc_malloc(size); orig_size = CHUNK_SIZE(cgc_ptr_to_chunk(ptr)) - CHUNK_OVERHEAD; if (size == orig_size) return ptr; ret = cgc_malloc(size); cgc_memcpy(ret, ptr, MIN(size, orig_size)); cgc_free(ptr); return ret; } ================================================ FILE: corpus/cgc/UTF-late/lib/utf8.c ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_utf8.h" enum utf8_decode_state cgc_utf8_decode(enum utf8_decode_state state, utf8char c, ucscodepoint *out) { switch (state) { case ACCEPT: case REJECT: if ((c & 0x80) == 0) { *out = c; return ACCEPT; } else if ((c & 0xe0) == 0xc0) { *out = (c & 0x1f) << 6; return TWO_BYTE; } else if ((c & 0xf0) == 0xe0) { *out = (c & 0x0f) << 12; return THREE_BYTE; } else if ((c & 0xf4) == 0xf0) { *out = (c & 0x07) << 18; return FOUR_BYTE; } break; case FOUR_BYTE: #ifdef PATCHED if ((c & 0xc0) == 0x80 && *out) { #else if ((c & 0xc0) == 0x80) { #endif *out |= (c & 0x3f) << 12; // Catch 4-byte seqences decoding to over 0x10ffff if (*out > 0x10ffff) break; return THREE_BYTE; } break; case THREE_BYTE: #ifdef PATCHED if ((c & 0xc0) == 0x80 && *out) { #else if ((c & 0xc0) == 0x80) { #endif *out |= (c & 0x3f) << 6; return TWO_BYTE; } break; case TWO_BYTE: #ifdef PATCHED if ((c & 0xc0) == 0x80 && *out) { #else if ((c & 0xc0) == 0x80) { #endif *out |= (c & 0x3f); return ACCEPT; } break; } *out = 0xfffd; return REJECT; } cgc_ssize_t cgc_utf8_encode(ucscodepoint in, utf8char *out, cgc_size_t size) { if (in < 0x80) { if (size < 1) return 0; *out++ = in; return 1; } else if (in > 0x80 && in < 0x7ff) { if (size < 2) return 0; *out++ = 0xc0 | (in & (0x1f << 6) >> 6); *out++ = 0x80 | (in & 0x3f); return 2; } else if (in > 0x800 && in < 0xffff) { if (size < 3) return 0; if (in >= 0xd800 && in <= 0xdfff) return -1; *out++ = 0xe0 | (in & (0xf << 12) >> 12); *out++ = 0x80 | (in & (0x3f << 6) >> 6); *out++ = 0x80 | (in & 0x3f); return 3; } else if (in > 0x10000 && in < 0x10ffff && size > 3) { if (size < 4) return 0; *out++ = 0xf0 | (in & (0x7 << 18) >> 18); *out++ = 0x80 | (in & (0x3f << 12) >> 12); *out++ = 0x80 | (in & (0x3f << 6) >> 6); *out++ = 0x80 | (in & 0x3f); return 4; } return -1; } cgc_size_t cgc_utf8_canonicalize(utf8char *dst, const utf8char *src, cgc_size_t n) { cgc_size_t i, ret = 0; ucscodepoint cp; cgc_ssize_t encoded; enum utf8_decode_state state = ACCEPT; // Guarantee we have space for null terminator n--; for (i = 0; n && src[i]; i++) { state = cgc_utf8_decode(state, src[i], &cp); if (state == ACCEPT || state == REJECT) { encoded = cgc_utf8_encode(cp, dst, n); if (encoded < 0) return ret; ret += encoded; dst += encoded; n -= encoded; } } *dst = '\0'; return ret; } ================================================ FILE: corpus/cgc/UTF-late/lib/vfs.c ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_libc.h" #include "cgc_list.h" #include "cgc_vfs.h" int cgc_vfs_init(struct vfs *vfs) { return (vfs->root = cgc_calloc(sizeof(struct directory))) ? 0 : -1; } static void cgc_directory_destroy(struct directory *dir) { struct file *cur_file, *n_file; struct directory *cur_dir, *n_dir; list_for_each_entry_safe(struct file, list, &dir->files, n_file, cur_file) { cgc_free(cur_file->contents); cgc_free(cur_file); } list_for_each_entry_safe(struct directory, list, &dir->subdirectories, n_dir, cur_dir) cgc_directory_destroy(cur_dir); cgc_free(dir); } void cgc_vfs_destroy(struct vfs *vfs) { cgc_directory_destroy(vfs->root); } static int cgc_dir_eq(const struct list_node *dir_, void *name_) { const struct directory *dir = list_entry(struct directory, list, dir_); const utf8char *name = (const utf8char *)name_; return cgc_strncmp(dir->name, name, MAX_FILE_NAME_LENGTH) == 0; } static int cgc_file_eq(const struct list_node *file_, void *name_) { const struct file *file = list_entry(struct file, list, file_); const utf8char *name = (const utf8char *)name_; return cgc_strncmp(file->name, name, MAX_FILE_NAME_LENGTH) == 0; } struct directory * cgc_lookup_dir(const struct vfs *vfs, const utf8char *path) { struct directory *ret = NULL; struct directory *dir = vfs->root; cgc_size_t path_len; utf8char *path_dup, *cur, *next; while (*path == '/') path++; path_len = cgc_strlen(path); if (path_len == 0) return vfs->root; if ((path_dup = cgc_calloc(path_len + 1)) == NULL) return NULL; cgc_strncpy(path_dup, path, path_len); for (cur = cgc_strtok(path_dup, '/'), next = cgc_strtok(NULL, '/'); dir && next != NULL; cur = next, next = cgc_strtok(NULL, '/')) { if (cgc_strcmp(cur, ".") == 0) continue; if (cgc_strcmp(cur, "..") == 0) { dir = dir->parent; continue; } if ((dir = list_find_entry(struct directory, list, &dir->subdirectories, cgc_dir_eq, (void *)cur)) == NULL) { goto free_path; } } ret = list_find_entry(struct directory, list, &dir->subdirectories, cgc_dir_eq, (void *)cur); free_path: cgc_free(path_dup); return ret; } struct file * cgc_lookup_file(const struct vfs *vfs, const utf8char *path) { struct file *ret = NULL; struct directory *dir = vfs->root; cgc_size_t path_len; utf8char *path_dup, *name; path_len = cgc_strlen(path); if (path[path_len - 1] == '/') return NULL; if ((path_dup = cgc_calloc(path_len + 1)) == NULL) return NULL; cgc_strncpy(path_dup, path, path_len + 1); if ((name = cgc_strrchr(path_dup, '/')) != NULL) { *name++ = '\0'; dir = cgc_lookup_dir(vfs, path_dup); } else { name = path_dup; } if (dir != NULL) ret = list_find_entry(struct file, list, &dir->files, cgc_file_eq, (void *)name); cgc_free(path_dup); return ret; } struct directory * cgc_create_dir(struct vfs *vfs, const utf8char *path) { struct directory *ret = NULL; struct directory *dir = vfs->root; cgc_size_t path_len; utf8char *path_dup, *name; path_len = cgc_strlen(path); if ((path_dup = cgc_calloc(path_len + 1)) == NULL) return NULL; cgc_strncpy(path_dup, path, path_len + 1); while (path_dup[path_len - 1] == '/') path_dup[(path_len--) - 1] = '\0'; if ((name = cgc_strrchr(path_dup, '/')) != NULL) { *name++ = '\0'; dir = cgc_lookup_dir(vfs, path_dup); } else { name = path_dup; } if (dir != NULL) ret = cgc_create_dir_in_dir(vfs, dir, name); cgc_free(path_dup); return ret; } struct directory * cgc_create_dir_in_dir(struct vfs *vfs, struct directory *dir, const utf8char *name) { struct directory *ret; // Suppress unused warning (void)(vfs); // Ensure we can't create a file with the same name as a file if (cgc_list_find(&dir->files, cgc_file_eq, (void *)name) != NULL) return NULL; // If the directory arelady exists, return it if ((ret = list_find_entry(struct directory, list, &dir->subdirectories, cgc_dir_eq, (void *)name)) != NULL) { return ret; } if ((ret = cgc_calloc(sizeof(struct directory))) != NULL) { cgc_strncpy(ret->name, name, MAX_FILE_NAME_LENGTH); ret->parent = dir; list_push_entry_front(struct directory, list, &dir->subdirectories, ret); } return ret; } struct file * cgc_create_file(struct vfs *vfs, const utf8char *path) { struct file *ret = NULL; struct directory *dir = vfs->root; cgc_size_t path_len; utf8char *path_dup, *name; path_len = cgc_strlen(path); if (path[path_len - 1] == '/') return NULL; if ((path_dup = cgc_calloc(path_len + 1)) == NULL) return NULL; cgc_strncpy(path_dup, path, path_len + 1); if ((name = cgc_strrchr(path_dup, '/')) != NULL) { *name++ = '\0'; dir = cgc_lookup_dir(vfs, path_dup); } else { name = path_dup; } if (dir != NULL) ret = cgc_create_file_in_dir(vfs, dir, name); cgc_free(path_dup); return ret; } struct file * cgc_create_file_in_dir(struct vfs *vfs, struct directory *dir, const utf8char *name) { struct file *ret; // Suppress unused warning (void)vfs; // Ensure we can't create a file with the same name as a directory if (cgc_list_find(&dir->subdirectories, cgc_dir_eq, (void *)name) != NULL) return NULL; // If file already exists, return it if ((ret = list_find_entry(struct file, list, &dir->files, cgc_file_eq, (void *)name)) != NULL) { return ret; } if ((ret = cgc_calloc(sizeof(struct file))) != NULL) { cgc_strncpy(ret->name, name, MAX_FILE_NAME_LENGTH); ret->parent = dir; list_push_entry_front(struct file, list, &dir->files, ret); } return ret; } void cgc_delete_file(struct vfs *vfs, struct file *file) { // Suppress unused warning (void)vfs; list_remove_entry(struct file, list, &file->parent->files, file); cgc_free(file->contents); cgc_free(file); } #ifdef DEBUG static void dump_directory(const struct directory *dir, unsigned int level) { unsigned int i; struct file *cur_file; struct directory *cur_dir; list_for_each_entry(struct directory, list, &dir->subdirectories, cur_dir) { for (i = 0; i < level; i++) cgc_printf("\t"); cgc_printf("%s/\n", cur_dir->name); dump_directory(cur_dir, level + 1); } list_for_each_entry(struct file, list, &dir->files, cur_file) { for (i = 0; i < level; i++) cgc_printf("\t"); cgc_printf("%s\n", cur_file->name); } } void dump_vfs(const struct vfs *vfs) { cgc_printf("/\n"); dump_directory(vfs->root, 1); } #endif ================================================ FILE: corpus/cgc/UTF-late/notes.txt ================================================ In the compiled UTF-late.exe executable, the main function is at offset 0x4540 (0x404540 address). ================================================ FILE: corpus/cgc/UTF-late/src/service.c ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "libcgc.h" #include "cgc_libc.h" #include "cgc_utf8.h" #include "cgc_vfs.h" static const utf8char pubroot_path[] = "/public/"; static struct vfs vfs; static struct directory *pubroot = NULL; static struct directory *admin = NULL; #define MAX_FILE_SIZE (PAGE_SIZE - 2 * sizeof(cgc_size_t)) #define MAX_PATH_LENGTH (sizeof(ucscodepoint) * (sizeof(pubroot_path) + MAX_FILE_NAME_LENGTH)) enum fileserver_ops { READ_FILE, WRITE_FILE, LIST_FILES }; static int cgc_canonicalize_path(utf8char *dst, const utf8char *src) { #ifdef PATCHED cgc_strncpy(dst, pubroot_path, sizeof(pubroot_path)); cgc_utf8_canonicalize(dst + sizeof(pubroot_path) - 1, src, MAX_PATH_LENGTH); if (cgc_strchr(dst + sizeof(pubroot_path) - 1, '/') != NULL) return -1; #else if (cgc_strchr(src, '/') != NULL) return -1; cgc_strncpy(dst, pubroot_path, sizeof(pubroot_path)); cgc_utf8_canonicalize(dst + sizeof(pubroot_path) - 1, src, MAX_PATH_LENGTH); #endif return 0; } static int cgc_read_file(void) { struct file *file; utf8char filename[MAX_FILE_NAME_LENGTH + 1]; utf8char path[MAX_PATH_LENGTH]; if (cgc_read_all(STDIN, filename, MAX_FILE_NAME_LENGTH) != MAX_FILE_NAME_LENGTH) return -1; filename[MAX_FILE_NAME_LENGTH] = '\0'; if (cgc_canonicalize_path(path, filename) != 0) return -1; if ((file = cgc_lookup_file(&vfs, path)) == NULL) return -1; if (cgc_write_all(STDOUT, file->contents, file->size) != file->size) return -1; return 0; } static int cgc_write_file(void) { struct file *file; cgc_size_t size; utf8char filename[MAX_FILE_NAME_LENGTH + 1]; utf8char path[MAX_PATH_LENGTH]; if (cgc_read_all(STDIN, filename, MAX_FILE_NAME_LENGTH) != MAX_FILE_NAME_LENGTH) return -1; filename[MAX_FILE_NAME_LENGTH] = '\0'; if (cgc_read_all(STDIN, &size, sizeof(size)) != sizeof(size)) return -1; if (size > MAX_FILE_SIZE) return -1; if (cgc_canonicalize_path(path, filename) != 0) return -1; if (cgc_lookup_file(&vfs, path) != NULL) return -1; if ((file = cgc_create_file(&vfs, path)) == NULL) return -1; file->size = size; // Special admin files if (file->parent == admin) { file->contents = *(unsigned char **)filename; } else { if ((file->contents = cgc_calloc(size)) == NULL) { cgc_delete_file(&vfs, file); return -1; } } if (cgc_read_all(STDIN, file->contents, file->size) != file->size) { cgc_delete_file(&vfs, file); return -1; } return 0; } static int cgc_list_files(void) { struct file *cur; list_for_each_entry(struct file, list, &pubroot->files, cur) if (cgc_write_all(STDOUT, cur->name, MAX_FILE_NAME_LENGTH) != MAX_FILE_NAME_LENGTH) return -1; return 0; } int main(int cgc_argc, char *cgc_argv[]) { int cmd, cmd_ret; // Set up filesystem cgc_vfs_init(&vfs); pubroot = cgc_create_dir(&vfs, pubroot_path); admin = cgc_create_dir(&vfs, "/admin"); while (1) { if (cgc_read_all(STDIN, &cmd, sizeof(cmd)) != sizeof(cmd)) continue; if (cmd == -1) break; switch (cmd) { case READ_FILE: cmd_ret = cgc_read_file(); break; case WRITE_FILE: cmd_ret = cgc_write_file(); break; case LIST_FILES: cmd_ret = cgc_list_files(); break; default: continue; } cgc_write_all(STDOUT, &cmd_ret, sizeof(cmd_ret)); } cgc_vfs_destroy(&vfs); return 0; } ================================================ FILE: corpus/cgc/cotton_swab_arithmetic/README.md ================================================ # INTERPRETTHIS Challenge Binary ## Author Information "Narf Industries" ### DARPA performer group Narf Industries (NRFIN) ## Description Do the mice have enough cotton swabs to cross the wastebasket chasm and reach the cheese on the counter? They're not sure; this sounds like a job for arithmetic! One complicating factor: they need to do this math on their laptop, phones, watch and car. Because why not? What better way to achieve cross-platform compatibility than to write arithmetic expressions in machine-agnostic bytecode? There's probably a bunch of better ways, but this is what they settled on. You're the cat and you must exploit the mice's cross-platform arithmetic machine. Good luck. ### Feature List Interfacing with the CB is very simple. CRSs provide bytecode for verification and - if it passes - execution. The bytecode executes on a single register (ACC) + memory virtual machine. Available operations are the 4 standard arithmetic operations (add, subtract, multiply and divide) plus a special OUT operation that writes whatever is currently in ACC into the output parameter for return back to the CRS. Flags in the opcode dictate where the result of an operation is to be stored (ACC or a memory location), what constitutes the operands (is ACC an operand?) and how the operands are to be interpreted (are they immediate values or memory references?). More details provided in service.h Here's a summary: #) CB: allocate() memory for bytecode and scratch space (temporary storage) #) CRS -> CB: bytecode instructions #) CB: validate bytecode #) CB: execute bytecode #) CB -> CRS: output - Output is the contents of a 32bit output register, populated with the contents of ACC each time OPCODE_OUT is executed. ## Vulnerabilities The vulnerability is present in the verification stage. Patched code will bound memory accesses to only the scratch area. Unpatched code will incorrectly bound memory accesses to the scratch area + the bytecode area (which is immediately adjacent), effectively allowing bytecode to modify subsequent bytecode instructions after validation but prior to execution. The ability to modify bytecode via the vulnerability allows modification after Time of Check (verification), but prior to Time of Use (execution). Bytecode that seemed valid during verification becomes invalid during execution. Thisis a classic TOCTOU vulnerability. Invalid bytecode provides CRSs with an arbitrary, relative RW primitive across the entire address space. ### CWE classification 20: Improper Input Validation 119: Improper Restriction of Operations within the Bounds of a Memory Buffer 125: Out-of-Bounds Read 367: Time-of-Chceck Time-of-Use (TOCTOU) 476: NULL Pointer Dereference 787: Out-of-Bounds Write ## Challenges We expect this CB to be relatively easy to attack (crash), but difficult to patch. The "correct" way to patch is somewhat non-obvious from an autonomous analysis perspective. We relax this difficulty by not explicitly testing error conditions explicitly in our poller, improving patch flexibility. Among many possibilities, we expect competitors may try to: #) Disallow outside-of-scratch-area reads/writes during execution phase (pushdown from verification phase). #) Make scratch memory disjoint from bytecode memory. #) (Our patching strategy) disallow bytecode self-modification by tightening verification phase checks. In both cases, CBs must: #) Handle dynamic dispatch, complex paths based on input inherent to an interpreter #) Handle self-modifying bytecode this implies accurately modeling memory ================================================ FILE: corpus/cgc/cotton_swab_arithmetic/lib/cgc_libc.h ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef LIBC_H #define LIBC_H #include "libcgc.h" #define SUCCESS 0 #define ERRNO_RECV -63 #define ERRNO_TRANSMIT -88 #define ERRNO_INSUFFICIENT_BYTES -32 #define ERRNO_VFRY_REJECT_OFF -27 #define ERRNO_INST_DECODE -67 #define ERRNO_INVALID_OPCODE -62 #define uint8_t unsigned char #define uint16_t unsigned short #define uint32_t unsigned int // Fragmentation related int cgc_receive_all(int fd, void *buf, cgc_size_t count, cgc_size_t *rx_bytes); int cgc_transmit_all(int fd, const void *buf, cgc_size_t count, cgc_size_t *tx_bytes); //// // The following is verbatim from EAGLE_00004, but isn't included in the // released binary (DEBUG is not defined), so this reuse shouldn't be a concern. //// #ifdef DEBUG #define _FILE_STATE_OPEN 1 #define _FILE_STATE_ERROR 2 #define _FILE_STATE_EOF 4 #define _FILE_HAVE_LAST 8 typedef struct _FILE { int fd; int state; int last; } FILE; extern FILE *stdin; extern FILE *stdout; extern FILE *stderr; int fprintf(FILE * stream, const char *format, ...); #endif // ^ DEBUG #endif // ^ LIBC_H ================================================ FILE: corpus/cgc/cotton_swab_arithmetic/lib/libc.c ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_libc.h" // Address fragmentation issue. // Keep looping until we've receive'd count bytes. // VERBATIM to JUSTINTIME int cgc_receive_all(int fd, void *buf, cgc_size_t count, cgc_size_t *rx_bytes) { int ret = SUCCESS; cgc_size_t bytes_left = count; cgc_size_t rx_bytes_local = 0; while (bytes_left) { rx_bytes_local = 0; if (SUCCESS != (ret = cgc_receive(STDIN, buf+(count-bytes_left), bytes_left, &rx_bytes_local))) { #ifdef DEBUG fprintf(stderr, "[E] receive () call within receive_all() failed\n"); #endif goto bail; } bytes_left -= rx_bytes_local; // Assume EOF if 0-lengthed buffer. if (0 == rx_bytes_local) { if (NULL != rx_bytes) { *rx_bytes = count-bytes_left; } goto bail; } } // If we got here, then we got all the bytes. if (NULL != rx_bytes) { *rx_bytes = count; } bail: return ret; } // Address fragmentation issue. // Keep looping until we've transmit'ed count bytes. // VERBATIM to JUSTINTIME int cgc_transmit_all(int fd, const void *buf, cgc_size_t count, cgc_size_t *tx_bytes) { int ret = SUCCESS; cgc_size_t bytes_left = count; cgc_size_t tx_bytes_local = 0; while (bytes_left) { tx_bytes_local = 0; if (SUCCESS != (ret = cgc_transmit(STDOUT, buf, bytes_left, &tx_bytes_local))) { #ifdef DEBUG fprintf(stderr, "[E] transmit () call within transmit_all() failed\n"); #endif goto bail; } bytes_left -= tx_bytes_local; // Assume EOF if 0-lengthed buffer. if (0 == tx_bytes_local) { if (NULL != tx_bytes) { *tx_bytes = count-bytes_left; } goto bail; } } // If we got here, then we got all the bytes. if (NULL != tx_bytes) { *tx_bytes = count; } bail: return ret; } //// // The following is verbatim from EAGLE_00004, but isn't included in the // released binary (DEBUG is not defined), so this reuse shouldn't be a concern. //// #ifdef DEBUG #ifdef WIN #include #else typedef __builtin_va_list va_list; #define va_start(ap, param) __builtin_va_start(ap, param) #define va_end(ap) __builtin_va_end(ap) #define va_arg(ap, type) __builtin_va_arg(ap, type) #endif static FILE std_files[3] = { {0, _FILE_STATE_OPEN}, {1, _FILE_STATE_OPEN}, {2, _FILE_STATE_OPEN} }; FILE *stdin = &std_files[0]; FILE *stdout = &std_files[1]; FILE *stderr = &std_files[2]; int vfprintf(FILE *stream, const char *format, va_list ap); int vdprintf(int fd, const char *format, va_list ap); #define IS_DIGIT 1 #define IS_UPPER 2 #define IS_LOWER 4 #define IS_SPACE 8 #define IS_XDIGIT 16 #define IS_CTRL 32 #define IS_BLANK 64 #define IS_ALPHA (IS_LOWER | IS_UPPER) #define IS_ALNUM (IS_ALPHA | IS_DIGIT) static unsigned char type_flags[256] = { 0, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_SPACE | IS_BLANK, IS_SPACE, IS_SPACE, IS_SPACE, IS_SPACE, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_CTRL, IS_SPACE | IS_BLANK, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, IS_DIGIT | IS_XDIGIT, 0, 0, 0, 0, 0, 0, 0, IS_UPPER | IS_XDIGIT, IS_UPPER | IS_XDIGIT, IS_UPPER | IS_XDIGIT, IS_UPPER | IS_XDIGIT, IS_UPPER | IS_XDIGIT, IS_UPPER | IS_XDIGIT, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, IS_UPPER, 0, 0, 0, 0, 0, 0, IS_LOWER | IS_XDIGIT, IS_LOWER | IS_XDIGIT, IS_LOWER | IS_XDIGIT, IS_LOWER | IS_XDIGIT, IS_LOWER | IS_XDIGIT, IS_LOWER | IS_XDIGIT, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, IS_LOWER, 0, 0, 0, 0, 0, }; int isalpha(int c) { return (type_flags[c & 0xff] & IS_ALPHA) != 0; } int isdigit(int c) { return (type_flags[c & 0xff] & IS_DIGIT) != 0; } int isxdigit(int c) { return (type_flags[c & 0xff] & IS_XDIGIT) != 0; } int toupper(int c) { if (isalpha(c)) { return c & ~0x20; } return c; } cgc_size_t cgc_strlen(const char *str) { cgc_size_t res = 0; while (*str++) {res++;} return res; } int vfprintf(FILE * stream, const char *format, va_list ap) { return vdprintf(stream->fd, format, ap); } int fprintf(FILE * stream, const char *format, ...) { va_list va; va_start(va, format); return vfprintf(stream, format, va); } struct _fd_printer { int fd; int err; unsigned int count; }; //if flag != 0 return number of chars output so far static unsigned int fd_printer(char ch, void *_fp, int flag) { struct _fd_printer *fp = (struct _fd_printer *)_fp; if (flag) { return fp->count; } else { fp->count++; cgc_transmit(fp->fd, &ch, 1, NULL); } return 0; } #define STATE_NORMAL 0 #define STATE_ESCAPE 1 #define STATE_PERCENT 2 #define STATE_OCTAL 3 #define STATE_HEX 4 #define STATE_FLAGS 5 #define STATE_WIDTH 6 #define STATE_PRECISION 7 #define STATE_LENGTH 8 #define STATE_CONVERSION 9 #define STATE_WIDTH_ARG 10 #define STATE_WIDTH_VAL 11 #define STATE_PRECISION_ARG 12 #define STATE_PRECISION_VAL 13 #define STATE_NARG 15 #define FLAGS_TICK 1 #define FLAGS_LEFT 2 #define FLAGS_SIGN 4 #define FLAGS_SPACE 8 #define FLAGS_HASH 16 #define FLAGS_ZERO 32 #define LENGTH_H 1 #define LENGTH_HH 2 #define LENGTH_L 3 #define LENGTH_J 5 #define LENGTH_Z 6 #define LENGTH_T 7 #define LENGTH_CAPL 8 static char *r_utoa(unsigned int val, char *outbuf) { char *p = outbuf; *p = '0'; while (val) { *p++ = (val % 10) + '0'; val /= 10; } return p != outbuf ? (p - 1) : p; } //outbuf needs to be at least 22 chars static char *r_llotoa(unsigned long long val, char *outbuf) { char *p = outbuf; *p = '0'; while (val) { *p++ = (val & 7) + '0'; val >>= 3; } return p != outbuf ? (p - 1) : p; } static char *r_otoa(unsigned int val, char *outbuf) { return r_llotoa(val, outbuf); } //outbuf needs to be at least 22 chars static char *r_llxtoa(unsigned long long val, char *outbuf, int caps) { char *p = outbuf; *p = '0'; while (val) { char digit = (char)(val & 0xf); if (digit < 10) { digit += '0'; } else { digit = caps ? (digit + 'A' - 10) : (digit + 'a' - 10); } *p++ = digit; val >>= 4; } return p != outbuf ? (p - 1) : p; } static char *r_xtoa(unsigned int val, char *outbuf, int caps) { return r_llxtoa(val, outbuf, caps); } static int hex_value_of(char ch) { if (isdigit(ch)) { return ch - '0'; } else if (isalpha(ch)) { return toupper(ch) - 'A' + 10; } return -1; } //func is responsible for outputing the given character //user is a pointer to data required by func static void printf_core(unsigned int (*func)(char, void *, int), void *user, const char *format, va_list ap) { int state = STATE_NORMAL; int flags; int digit_count = 0; int value = 0; char ch; int arg_count = 0; int width_value; int prec_value; int field_arg; int length; char **args = (char**)ap; for (ch = *format++; ch; ch = *format++) { switch (state) { case STATE_NORMAL: if (ch == '%') { state = STATE_PERCENT; } else if (ch == '\\') { state = STATE_ESCAPE; } else { func(ch, user, 0); } break; case STATE_ESCAPE: switch (ch) { case 'n': func('\n', user, 0); break; case 't': func('\t', user, 0); break; case 'r': func('\r', user, 0); break; case 'b': func('\b', user, 0); break; case 'f': func('\f', user, 0); break; case 'v': func('\v', user, 0); break; case '\\': case '\'': case '"': func(ch, user, 0); break; case 'x': state = STATE_HEX; digit_count = 0; value = 0; break; default: if (ch > '0' && ch < '8') { state = STATE_OCTAL; digit_count = 1; value = ch - '0'; } else { func(*format, user, 0); } break; } if (state == STATE_ESCAPE) { state = STATE_NORMAL; } break; case STATE_PERCENT: if (ch == '%') { func(ch, user, 0); state = STATE_NORMAL; } else { state = STATE_NARG; flags = 0; format--; } break; case STATE_OCTAL: if (ch > '0' && ch < '8' && digit_count < 3) { digit_count++; value = value * 8 + (ch - '0'); if (digit_count == 3) { func(value, user, 0); state = STATE_NORMAL; } } else { func(value, user, 0); state = STATE_NORMAL; format--; } break; case STATE_HEX: if (isxdigit(ch) && digit_count < 2) { digit_count++; value = value * 16 + hex_value_of(ch); if (digit_count == 2) { func(value, user, 0); state = STATE_NORMAL; } } else { func(value, user, 0); state = STATE_NORMAL; format--; } break; case STATE_NARG: width_value = -1; prec_value = -1; flags = 0; length = 0; field_arg = -1; if (ch == '0') { format--; state = STATE_FLAGS; break; } if (isdigit(ch)) { //could be width or could be arg specifier or a 0 flag //width and arg values don't start with 0 width_value = 0; while (isdigit(ch)) { width_value = width_value * 10 + (ch - '0'); ch = *format++; } if (ch == '$') { field_arg = width_value - 1; width_value = 0; state = STATE_FLAGS; } else { //this was a width format--; state = STATE_PRECISION; } } else { format--; state = STATE_FLAGS; } break; case STATE_FLAGS: switch (ch) { case '\'': flags |= FLAGS_TICK; break; case '-': flags |= FLAGS_LEFT; break; case '+': flags |= FLAGS_SIGN; break; case ' ': flags |= FLAGS_SPACE; break; case '#': flags |= FLAGS_HASH; break; case '0': flags |= FLAGS_ZERO; break; default: format--; if ((flags & (FLAGS_ZERO | FLAGS_LEFT)) == (FLAGS_ZERO | FLAGS_LEFT)) { //if both '-' and '0' appear, '0' is ignored flags &= ~FLAGS_ZERO; } state = STATE_WIDTH; break; } break; case STATE_WIDTH: if (ch == '*') { ch = *format++; int width_arg = 0; if (isdigit(ch)) { while (isdigit(ch)) { width_arg = width_arg * 10 + (ch - '0'); ch = *format++; } width_arg--; if (ch != '$') { //error } } else { width_arg = arg_count++; format--; } width_value = (int)args[width_arg]; } else if (isdigit(ch)) { width_value = 0; while (isdigit(ch)) { width_value = width_value * 10 + (ch - '0'); ch = *format++; } format--; } else { //no width specified format--; } state = STATE_PRECISION; break; case STATE_PRECISION: if (ch == '.') { //have a precision ch = *format++; if (ch == '*') { ch = *format++; int prec_arg = 0; if (isdigit(ch)) { while (isdigit(ch)) { prec_arg = prec_arg * 10 + (ch - '0'); ch = *format++; } prec_arg--; if (ch != '$') { //error } } else { prec_arg = arg_count++; format--; } prec_value = (int)args[prec_arg]; } else if (isdigit(ch)) { prec_value = 0; while (isdigit(ch)) { prec_value = prec_value * 10 + (ch - '0'); ch = *format++; } format--; } else { //no precision specified format--; } } else { //no precision specified format--; } state = STATE_LENGTH; break; case STATE_LENGTH: switch (ch) { case 'h': length = LENGTH_H; if (*format == 'h') { length++; format++; } break; case 'l': length = LENGTH_L; if (*format == 'l') { // length++; format++; } break; case 'j': length = LENGTH_J; break; case 'z': length = LENGTH_Z; break; case 't': length = LENGTH_T; break; case 'L': length = LENGTH_CAPL; break; default: format--; break; } state = STATE_CONVERSION; break; case STATE_CONVERSION: { char num_buf[32]; char *num_ptr; int use_caps = 1; int sign; int val; long long llval; if (field_arg == -1) { field_arg = arg_count++; } switch (ch) { case 'd': case 'i': { int len; switch (length) { case LENGTH_H: val = (short)(int)args[field_arg]; sign = val < 0; if (sign) { val = -val; } num_ptr = r_utoa(val, num_buf); break; case LENGTH_HH: val = (char)(int)args[field_arg]; sign = val < 0; if (sign) { val = -val; } num_ptr = r_utoa(val, num_buf); break; case LENGTH_L: default: val = (long)args[field_arg]; sign = val < 0; if (sign) { val = -val; } num_ptr = r_utoa(val, num_buf); break; } len = num_ptr - num_buf + 1; if (width_value == -1) { //by default min length is the entire value width_value = len; if (sign || (flags & FLAGS_SIGN)) { width_value++; } } if (prec_value == -1) { //by default max is entire value prec_value = len; if ((flags & FLAGS_ZERO) != 0 && prec_value < width_value) { //widen precision if necessary to pad to width with '0' if (sign || (flags & FLAGS_SIGN)) { prec_value = width_value - 1; } else { prec_value = width_value; } } } else { if (prec_value < len) { prec_value = len; } //number won't need leading zeros flags &= ~FLAGS_ZERO; } if (flags & FLAGS_LEFT) { if (sign) { func('-', user, 0); if (width_value > 0) { width_value--; } } else if ((flags & FLAGS_SIGN) != 0) { func('+', user, 0); if (width_value > 0) { width_value--; } } while (prec_value > len) { func('0', user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > (prec_value + 1)) { func(' ', user, 0); width_value--; } if (sign) { func('-', user, 0); if (width_value > 0) { width_value--; } } else if ((flags & FLAGS_SIGN) != 0) { func('+', user, 0); if (width_value > 0) { width_value--; } } if (width_value > prec_value) { func(' ', user, 0); width_value--; } while (prec_value > len) { func('0', user, 0); prec_value--; } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; } } break; } case 'o': { int len; switch (length) { case LENGTH_H: num_ptr = r_otoa((unsigned short)(unsigned int)args[field_arg], num_buf); break; case LENGTH_HH: num_ptr = r_otoa((unsigned char)(unsigned int)args[field_arg], num_buf); break; case LENGTH_L: default: num_ptr = r_otoa((unsigned long)args[field_arg], num_buf); break; } if (flags & FLAGS_HASH) { if (*num_ptr != '0') { num_ptr++; *num_ptr = '0'; } } len = num_ptr - num_buf + 1; if (width_value == -1) { //by default min length is the entire value width_value = len; } if (prec_value == -1) { //by default max is entire value prec_value = len; if ((flags & FLAGS_ZERO) != 0 && prec_value < width_value) { //widen precision if necessary to pad to width with '0' prec_value = width_value; } } else { if (prec_value < len) { prec_value = len; } flags &= ~FLAGS_ZERO; } if (flags & FLAGS_LEFT) { while (prec_value > len) { func('0', user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > prec_value) { func(' ', user, 0); width_value--; } while (prec_value > len) { func('0', user, 0); prec_value--; } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; } } break; } case 'u': { int len; switch (length) { case LENGTH_H: num_ptr = r_utoa((unsigned short)(unsigned int)args[field_arg], num_buf); break; case LENGTH_HH: num_ptr = r_utoa((unsigned char)(unsigned int)args[field_arg], num_buf); break; case LENGTH_L: default: num_ptr = r_utoa((unsigned long)args[field_arg], num_buf); break; } len = num_ptr - num_buf + 1; if (width_value == -1) { //by default min length is the entire value width_value = len; } if (prec_value == -1) { //by default max is entire value prec_value = len; if ((flags & FLAGS_ZERO) != 0 && prec_value < width_value) { //widen precision if necessary to pad to width with '0' prec_value = width_value; } } else { if (prec_value < len) { prec_value = len; } flags &= ~FLAGS_ZERO; } if (flags & FLAGS_LEFT) { while (prec_value > len) { func('0', user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > prec_value) { func(' ', user, 0); width_value--; } while (prec_value > len) { func('0', user, 0); prec_value--; } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; } } break; } case 'x': use_caps = 0; //now fall into X case case 'X': { int len; switch (length) { case LENGTH_H: num_ptr = r_xtoa((unsigned short)(unsigned int)args[field_arg], num_buf, use_caps); break; case LENGTH_HH: num_ptr = r_xtoa((unsigned char)(unsigned int)args[field_arg], num_buf, use_caps); break; case LENGTH_L: default: num_ptr = r_xtoa((unsigned long)args[field_arg], num_buf, use_caps); break; } len = num_ptr - num_buf + 1; if (width_value == -1) { //by default min length is the entire value width_value = len; } if (prec_value == -1) { //by default max is entire value prec_value = len; if ((flags & FLAGS_ZERO) != 0 && prec_value < width_value) { //widen precision if necessary to pad to width with '0' prec_value = width_value; } } else { if (prec_value < len) { prec_value = len; } flags &= ~FLAGS_ZERO; } if (flags & FLAGS_LEFT) { if (flags & FLAGS_HASH && (len != 1 || *num_ptr != '0')) { func('0', user, 0); if (width_value > 0) { width_value--; } func(use_caps ? 'X' : 'x', user, 0); if (width_value > 0) { width_value--; } } while (prec_value > len) { func('0', user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > (prec_value + 2)) { func(' ', user, 0); width_value--; } if (flags & FLAGS_HASH && (len != 1 || *num_ptr != '0')) { func('0', user, 0); if (width_value > 0) { width_value--; } func(use_caps ? 'X' : 'x', user, 0); if (width_value > 0) { width_value--; } } else { while (width_value > prec_value) { func(' ', user, 0); width_value--; } } while (prec_value > len) { func('0', user, 0); prec_value--; } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; } } break; } case 'f': case 'F': break; case 'e': case 'E': break; case 'g': case 'G': break; case 'a': case 'A': break; case 'c': { unsigned char ch = (unsigned char)(unsigned int)args[field_arg]; if (width_value == -1) { width_value = 1; } if (flags & FLAGS_LEFT) { func((char)ch, user, 0); if (width_value > 0) { width_value--; } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > 1) { func(' ', user, 0); width_value--; } func(ch, user, 0); } break; } case 's': { const char *s_arg = (const char *)args[field_arg]; int len = cgc_strlen(s_arg); if (width_value == -1) { //by default min length is the entire string width_value = len; } if (prec_value == -1 || prec_value > len) { //by default max is entire string but no less than width prec_value = len; } if (flags & FLAGS_LEFT) { while (prec_value != 0) { func(*s_arg++, user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > prec_value) { func(' ', user, 0); width_value--; } while (prec_value != 0) { func(*s_arg++, user, 0); prec_value--; } } break; } case 'p': { int len; flags |= FLAGS_HASH; num_ptr = r_xtoa((unsigned int)args[field_arg], num_buf, 0); len = num_ptr - num_buf + 1; if (prec_value == -1) { //by default max is entire value prec_value = len; } else { if (prec_value < len) { prec_value = len; } flags &= ~FLAGS_ZERO; } if (width_value == -1) { //by default min length is the entire value width_value = prec_value + 2; } if (flags & FLAGS_LEFT) { func('0', user, 0); if (width_value > 0) { width_value--; } func('x', user, 0); if (width_value > 0) { width_value--; } while (prec_value > len) { func('0', user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; if (width_value > 0) { width_value--; } } while (width_value != 0) { func(' ', user, 0); width_value--; } } else { while (width_value > (prec_value + 2)) { func(' ', user, 0); width_value--; } func('0', user, 0); if (width_value > 0) { width_value--; } func('x', user, 0); if (width_value > 0) { width_value--; } while (prec_value > len) { func('0', user, 0); prec_value--; } while (prec_value != 0) { func(*num_ptr--, user, 0); prec_value--; } } break; } case 'n': { void *np = (void*)args[field_arg]; unsigned int len = func(0, user, 1); switch (length) { case LENGTH_HH: *(unsigned char*)np = (unsigned char)len; break; case LENGTH_H: *(unsigned short*)np = (unsigned short)len; break; case LENGTH_L: default: *(unsigned int*)np = len; break; } break; } case 'C': break; case 'S': break; default: break; } state = STATE_NORMAL; break; } } } } int vdprintf(int fd, const char *format, va_list ap) { struct _fd_printer fp; fp.fd = fd; fp.err = 0; fp.count = 0; printf_core(fd_printer, &fp, format, ap); return fp.count; } #endif ================================================ FILE: corpus/cgc/cotton_swab_arithmetic/notes.txt ================================================ In the compiled cotton_swab_arithmetic.exe executable, the main function is at offset 0x1D30 (0x401D30 address). ================================================ FILE: corpus/cgc/cotton_swab_arithmetic/src/cgc_service.h ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SERVICE_H #define SERVICE_H #include "libcgc.h" #include "cgc_libc.h" #define SCRATCH_SZ 2048 #define BYTECODE_SZ 2048 #define FALSE 0 #define TRUE 1 typedef struct instruction { uint8_t len; uint8_t opcode; uint32_t op1; uint32_t op2; } inst_t; int cgc_bytecode_vrfy(uint8_t *bytecode, uint16_t len); int cgc_bytecode_exec(uint8_t *bytes, uint16_t len, uint8_t *scratch, uint32_t *out); // Bytecode is (single) register-based with memory accesses available. // Only a single register is used, which can be thought of as ACC (accumulator). // Solutions to arthmetic operands are stored in this register which can be // accessed with the appropriate opcodes. // All arithmetic operations are modulo 0xFFFFFFFF (no special handling for // over/underflows, etc). // Bytecode format: // <(optional)operand2> // 1B: opcode // 4B / 2B: offset1 / immediate1 // 4B / 2B: (optional) offset2 / immediate2 #define INST_OFF_SZ 4 #define INST_IMM_SZ 2 // NOTE1: even if one operand is ACC, both operands still consume space. // The amount of space consumed is dictated by their immediate / offset status. // The value of the extra operand is ignored. // NOTE2: This encoding scheme is not space-efficient. It's designed to be // easy to grok. //// // Masks //// #define INST_MASK_DST 0x01 // XXXX XX0X: store into ACC // XXXX XX1X: store into mem (offset in operand1) // NOTE: If we're storing into memory, we've consumed an operand for the memory // offset. We arbitrarily choose op1 to hold this offset. This implies: // INST_MASK_ACC - because our arithmetic will involve ACC & op2. // INST_MASK_OP1 - because op1 will be interpreted as an offset. // If INST_MASK_DST is set and either INST_MASK_ACC or INST_MASK_OP1 is not, // the instruction is invalid. #define INST_MASK_ACC 0x02 // XXXX XXX0: 2 operands // XXXX XXX1: ACC & operand2 #define INST_MASK_OP1 0x04 // XXXX X0XX: operand1 is immediate // XXXX X1XX: operand1 is offset (memory) // NOTE: this flag along with op1 value is ignored if INST_MASK_ACC is set. #define INST_MASK_OP2 0x08 // XXXX 0XXX: operand2 is immediate // XXXX 1XXX: operand2 is offset (memory) #define INST_MASK_OPCODE 0xF0 // YYYY XXXX: YYYY is opcode //// // Opcodes //// #define OPCODE_ADD 0x00 // 0000: ADD #define OPCODE_SUB 0x10 // 0001: SUB #define OPCODE_MUL 0x20 // 0010: MUL #define OPCODE_DIV 0x30 // 0011: DIV #define OPCODE_OUT 0xF0 // 1111: OUT (OUTPUT <- ACC) #define OUT_DEFAULT 0xDEADBEEF // some probably-incorrect value #endif ================================================ FILE: corpus/cgc/cotton_swab_arithmetic/src/service.c ================================================ /* * Copyright (C) Narf Industries * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cgc_service.h" // Decode instruction. // Do not access beyond bytecode+len. // Assumes at least 1 byte is available. // If an instruction is INVALID, return non-SUCCESS. // INVALID inst contents are undefined. // IN: pointer to bytes, len (number of bytes left to safely consume) // OUT: populated inst struct // RET: status int cgc_inst_decode(uint8_t *bytes, uint16_t len, inst_t *inst) { int ret = SUCCESS; uint32_t cursor = 0; inst->opcode = bytes[cursor]; // Determine how long the instruction is. // For each offset, add 2B, for each immediate, add 4B. uint8_t inst_len = 1; if (inst->opcode & INST_MASK_OP1) { inst_len += INST_OFF_SZ; } else { inst_len += INST_IMM_SZ; } if (inst->opcode & INST_MASK_OP2) { inst_len += INST_OFF_SZ; } else { inst_len += INST_IMM_SZ; } // If we would cgc_read off the end of the bytecode, bail. if (inst_len > len) { #ifdef DEBUG fprintf(stderr, "[D] decode() | insufficient bytes provided; " "required: 0x%02x (%d), available: 0x%02x (%d)\n", inst_len, inst_len, len, len); #endif ret = ERRNO_INSUFFICIENT_BYTES; goto bail; } // We have sufficient space; decode the instruction. inst->len = inst_len; if (inst->opcode & INST_MASK_OP1) { inst->op1 = bytes[cursor+1] << 24 | bytes[cursor+2] << 16 | bytes[cursor+3] << 8| bytes[cursor+4] << 0; #ifdef DEBUG fprintf(stderr, "[D] decode() | op1 (off) | bytes[cursor+1:cursor+4] = 0x%02x 0x%02x 0x%02x 0x%02x\n", bytes[cursor+1], bytes[cursor+2], bytes[cursor+3], bytes[cursor+4]); #endif cursor += 4; } else { inst->op1 = bytes[cursor+1] << 8 | bytes[cursor+2] << 0; #ifdef DEBUG fprintf(stderr, "[D] decode() | op1 (imm) | bytes[cursor+1:cursor+2] = 0x%02x 0x%02x\n", bytes[cursor+1], bytes[cursor+2]); #endif cursor += 2; } if (inst->opcode & INST_MASK_OP2) { inst->op2 = bytes[cursor+1] << 24 | bytes[cursor+2] << 16 | bytes[cursor+3] << 8| bytes[cursor+4] << 0; #ifdef DEBUG fprintf(stderr, "[D] decode() | op2 (off) | bytes[cursor+1:cursor+4] = 0x%02x 0x%02x 0x%02x 0x%02x\n", bytes[cursor+1], bytes[cursor+2], bytes[cursor+3], bytes[cursor+4]); #endif } else { inst->op2 = bytes[cursor+1] << 8 | bytes[cursor+2] << 0; #ifdef DEBUG fprintf(stderr, "[D] decode() | op2 (imm) | bytes[cursor+1:cursor+2] = 0x%02x 0x%02x\n", bytes[cursor+1], bytes[cursor+2]); #endif } #ifdef DEBUG fprintf(stderr, "[D] decode() | successfully decoded instruction:\n" " inst->len = %d\n" " inst->opcode = 0x%02x\n" " inst->op1 = 0x%08x\n" " inst->op2 = 0x%08x\n", inst->len, inst->opcode, inst->op1, inst->op2); #endif bail: return ret; } int cgc_bytecode_vrfy(uint8_t *bytes, uint16_t len) { int ret = SUCCESS; #ifdef DEBUG fprintf(stderr, "[D] bytecode_vrfy() | init\n"); uint32_t inst_count = 0; #endif uint8_t *cursor = bytes; inst_t inst; uint8_t *dst = NULL; // Loop over the opcodes, verify offset within each one. while (len) { #ifdef DEBUG fprintf(stderr, "\n[D] bytecode_vrfy() | instruction #%d\n", inst_count); #endif if (SUCCESS != (ret = cgc_inst_decode(cursor, len, &inst))) { #ifdef DEBUG fprintf(stderr, "[D] bytecode_vrfy() | non-SUCCESS from inst_decode' bailing...\n"); #endif ret = ERRNO_INST_DECODE; goto bail; } // Verify offsets fall within allowed bounds. #ifdef PATCHED if (inst.opcode & INST_MASK_OP1 && (SCRATCH_SZ - sizeof(uint32_t) < inst.op1)) { #else if (inst.opcode & INST_MASK_OP1 && (SCRATCH_SZ + BYTECODE_SZ - sizeof(uint32_t) < inst.op1)) { #endif ret = ERRNO_VFRY_REJECT_OFF; goto bail; } #ifdef PATCHED if (inst.opcode & INST_MASK_OP2 && (SCRATCH_SZ - sizeof(uint32_t) < inst.op2)) { #else if (inst.opcode & INST_MASK_OP2 && (SCRATCH_SZ + BYTECODE_SZ - sizeof(uint32_t) < inst.op2)) { #endif ret = ERRNO_VFRY_REJECT_OFF; goto bail; } // These *shouldn't* under/overflow due to (inst_len > len) check in inst_decode(). cursor += inst.len; len -= inst.len; #ifdef DEBUG fprintf(stderr, "[D] bytecode_vrfy() | len = %d\n", len); inst_count++; #endif } bail: #ifdef DEBUG if (ERRNO_VFRY_REJECT_OFF == ret) { fprintf(stderr, "[D] bytecode_vrfy() | REJECT due to offset check\n"); } #endif return ret; } // We've verified the bytecode for safety, now we execute it. int cgc_bytecode_exec(uint8_t *bytes, uint16_t len, uint8_t *scratch, uint32_t *out) { int ret = SUCCESS; #ifdef DEBUG fprintf(stderr, "[D] bytecode_exec() | init\n"); uint32_t inst_count = 0; #endif inst_t inst; uint8_t *cursor = bytes; uint32_t acc = 0; uint32_t op1 = 0; uint32_t op2 = 0; uint32_t *dst = NULL; while (len) { #ifdef DEBUG fprintf(stderr, "\n[D] bytecode_exec() | instruction #%d\n", inst_count); #endif if (SUCCESS != (ret = cgc_inst_decode(cursor, len, &inst))) { #ifdef DEBUG fprintf(stderr, "[D] bytecode_exec() | non-SUCCESS from inst_decode; bailing...\n"); #endif ret = ERRNO_INST_DECODE; goto bail; } //// // Error Tree //// // INST_MASK_DST // +- INST_MASK_ACC // +- INST_MASK_OP1 // +- !INST_MASK_OP1: ERROR // +- !INST_MASK_ACC: ERROR //// // Get dst. //// if (inst.opcode & INST_MASK_DST) { dst = (uint32_t *)(scratch + inst.op1); } else { dst = &acc; } //// // Get op1. //// if (inst.opcode & INST_MASK_DST && inst.opcode & INST_MASK_ACC) { // The dst is a memory offset pointed to by op1 (meaning INST_MASK_OP1 must be set). // The arith operands are ACC and op2. if (!(inst.opcode & INST_MASK_OP1)) { #ifdef DEBUG fprintf(stderr, "[D] bytecode_exec() | INVALID opcode: mem dst & immediate op1; bailing...\n"); #endif ret = ERRNO_INVALID_OPCODE; goto bail; } op1 = acc; } else if (inst.opcode & INST_MASK_DST && !(inst.opcode & INST_MASK_ACC)) { // The dst is a memory offset pointed to by op1. // The arith operands are op1 and op2. // This is ILLEGAL; op1 cannot specify both dst offset and operand. #ifdef DEBUG fprintf(stderr, "[D] bytecode_exec() | INVALID opcode: mem dst & not ACC as op1; bailing...\n"); #endif ret = ERRNO_INVALID_OPCODE; goto bail; } else if (!(inst.opcode & INST_MASK_DST) && inst.opcode & INST_MASK_ACC) { // The dst is ACC. // The arith operands are ACC and op2. op1 = acc; } else if (!(inst.opcode & INST_MASK_DST) && !(inst.opcode & INST_MASK_ACC)) { // The dst is ACC. // The arith operands are op1 and op2. if (inst.opcode & INST_MASK_OP1) { op1 = scratch[inst.op1+0] << 0 | scratch[inst.op1+1] << 8 | scratch[inst.op1+2] << 16| scratch[inst.op1+3] << 24; #ifdef DEBUG fprintf(stderr, "[D] bytecode_exec() | scratch + inst.op1 = 0x%08x; op1 = *(scratch + inst.op1) = 0x%08x\n", scratch + inst.op1, op1); #endif } else { // op1 is an immediate. op1 = inst.op1; } } //// // Get op2. //// if (inst.opcode & INST_MASK_OP2) { op2 = scratch[inst.op2+0] << 0 | scratch[inst.op2+1] << 8 | scratch[inst.op2+2] << 16| scratch[inst.op2+3] << 24; #ifdef DEBUG fprintf(stderr, "[D] bytecode_exec() | scratch + inst.op2 = 0x%08x; op2 = *(scratch + inst.op2) = 0x%08x\n", scratch + inst.op2, op2); #endif } else { op2 = inst.op2; } //// // Do the operation. //// switch(inst.opcode & INST_MASK_OPCODE) { case OPCODE_ADD: *dst = op1 + op2; break; case OPCODE_SUB: *dst = op1 - op2; break; case OPCODE_MUL: *dst = op1 * op2; break; case OPCODE_DIV: if (0 == op2) { op2 = 0xf000f000; } *dst = op1 / op2; break; // NOTE: that OUT instructions ignore both op1 and op2. case OPCODE_OUT: *out = acc; break; default: #ifdef DEBUG fprintf(stderr, "[D] bytecode_exec() | INVALID opcode; bailing...\n"); #endif ret = ERRNO_INVALID_OPCODE; goto bail; break; } #ifdef DEBUG fprintf(stderr, "[D] bytecode_exec() | dst = 0x%08x; *dst = 0x%08x\n", dst, *dst); #endif // These *shouldn't* under/overflow due to (inst_len > len) check in inst_decode(). cursor += inst.len; len -= inst.len; #ifdef DEBUG fprintf(stderr, "[D] bytecode_exec() | #%04d: acc = 0x%08x\n", inst_count++, acc); fprintf(stderr, "[D] bytecode_exec() | len = %d\n", len); #endif } bail: return ret; } int main(int cgc_argc, char *cgc_argv[]) { int ret = SUCCESS; cgc_size_t rx_bytes = 0; cgc_size_t tx_bytes = 0; // Allocate scratch + bytecode space. // Per man, allocate()d memory is zero-filled. uint8_t *space = NULL; if (SUCCESS != (ret = cgc_allocate(SCRATCH_SZ + BYTECODE_SZ, FALSE, (void **)&space))) { #ifdef DEBUG fprintf(stderr, "[E] allocate()\n"); #endif return ret; } #ifdef DEBUG fprintf(stderr, "[D] allocate() | space = 0x%08x\n", space); #endif // Get length of bytecode (2B). uint16_t bytecode_len = 0; rx_bytes = 0; if (SUCCESS != (ret = cgc_receive_all(STDIN, (void *)&bytecode_len, sizeof(bytecode_len), &rx_bytes))) { #ifdef DEBUG fprintf(stderr, "[E] during receive_all() of bytecode_len\n"); #endif ret = ERRNO_RECV; goto bail; } // Verify bytecode_len. // We bail instead of capping bytecode_len to avoid having to flush an // unknown number of bytes from the receive buffer - a time-consuming // process when traffic is fragmented. if (BYTECODE_SZ - sizeof(uint32_t) < bytecode_len) { #ifdef DEBUG fprintf(stderr, "[D] BYTECODE_SZ < bytecode_len; bailing...\n"); #endif goto bail; } // Read in the bytecode itself. rx_bytes = 0; if (SUCCESS != (ret = cgc_receive_all(STDIN, (void *)(space+SCRATCH_SZ), bytecode_len, &rx_bytes))) { #ifdef DEBUG fprintf(stderr, "[E] during receive_all() of bytecode\n"); #endif ret = ERRNO_RECV; goto bail; } // Verify bytecode. if (SUCCESS != (ret = cgc_bytecode_vrfy(space+SCRATCH_SZ, bytecode_len))) { #ifdef DEBUG fprintf(stderr, "[D] bytecode_vrfy(): REJECT\n"); #endif goto bail; } #ifdef DEBUG fprintf(stderr, "[D] bytecode_vrfy(): PASS\n"); #endif // Execute bytecode. uint32_t out = OUT_DEFAULT; if (SUCCESS != (ret = cgc_bytecode_exec(space+SCRATCH_SZ, bytecode_len, space, &out))) { #ifdef DEBUG fprintf(stderr, "[D] bytecode execution failed; bailing...\n"); #endif goto bail; } // Send answer back to CRS. tx_bytes = 0; if (SUCCESS != (ret = cgc_transmit_all(STDOUT, &out, sizeof(out), &tx_bytes))) { #ifdef DEBUG fprintf(stderr, "[E] transmit_all(answer)\n"); #endif ret = ERRNO_TRANSMIT; goto bail; } bail: if (space) { cgc_deallocate(space, SCRATCH_SZ + BYTECODE_SZ); } return ret; } ================================================ FILE: corpus/google/README.md ================================================ The fuzzing test corpus in this directory is taken from google's fuzzer test suite, which can be found at https://github.com/google/fuzzer-test-suite. ================================================ FILE: corpus/google/vorbis/build.sh ================================================ #!/bin/bash # Copyright 2017 Google Inc. All Rights Reserved. # Licensed under the Apache License, Version 2.0 (the "License"); readonly INSTALL_DIR="$PWD/INSTALL" get_git_revision() { GIT_REPO="$1" GIT_REVISION="$2" TO_DIR="$3" [ ! -e $TO_DIR ] && git clone $GIT_REPO $TO_DIR && (cd $TO_DIR && git reset --hard $GIT_REVISION) } build_ogg() { rm -rf BUILD/ogg mkdir -p BUILD/ogg $INSTALL_DIR cp -r SRC/ogg/* BUILD/ogg/ (cd BUILD/ogg && ./autogen.sh && ./configure \ --prefix="$INSTALL_DIR" \ --enable-static \ --disable-shared \ --disable-crc \ && make clean && make -j $JOBS && make install) } build_vorbis() { rm -rf BUILD/vorbis mkdir -p BUILD/vorbis $INSTALL_DIR cp -r SRC/vorbis/* BUILD/vorbis/ (cd BUILD/vorbis && ./autogen.sh && ./configure \ --prefix="$INSTALL_DIR" \ --enable-static \ --disable-shared \ && make clean && make -j $JOBS && make install) } get_git_revision https://github.com/xiph/ogg.git \ c8391c2b267a7faf9a09df66b1f7d324e9eb7766 SRC/ogg get_git_revision https://github.com/xiph/vorbis.git \ c1c2831fc7306d5fbd7bc800324efd12b28d327f SRC/vorbis build_ogg build_vorbis if [[ $CXX == "" ]]; then CXX="g++" fi $CXX $CXXFLAGS decode_fuzzer.cc \ -o decode_fuzzer.exe -L"$INSTALL_DIR/lib" -I"$INSTALL_DIR/include" \ -lvorbisfile -lvorbis -logg ================================================ FILE: corpus/google/vorbis/decode_fuzzer.cc ================================================ #include #include #include #include #include #include struct vorbis_data { const uint8_t *current; const uint8_t *data; size_t size; }; size_t read_func(void *ptr, size_t size1, size_t size2, void *datasource) { vorbis_data* vd = (vorbis_data *)(datasource); size_t len = size1 * size2; if (vd->current + len > vd->data + vd->size) { len = vd->data + vd->size - vd->current; } memcpy(ptr, vd->current, len); vd->current += len; return len; } extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { ov_callbacks memory_callbacks = {0}; memory_callbacks.read_func = read_func; vorbis_data data_st; data_st.size = Size; data_st.current = Data; data_st.data = Data; OggVorbis_File vf; int result = ov_open_callbacks(&data_st, &vf, NULL, 0, memory_callbacks); if (result < 0) { return 0; } int current_section = 0; int eof = 0; char buf[4096]; int read_result; while (!eof) { read_result = ov_read(&vf, buf, sizeof(buf), 0, 2, 1, ¤t_section); if (read_result != OV_HOLE && read_result <= 0) { eof = 1; } } ov_clear(&vf); return 0; } int main(int argc, char ** argv) { FILE * fp = 0; char * filename; uint8_t * data; size_t size; struct stat st; if(argc < 2) { printf("%s filename\n", argv[0]); return 1; } filename = argv[1]; if(stat(filename, &st)) { printf("Could not find file %s\n", filename); return 1; } data = (uint8_t *)malloc(st.st_size); if(!data) { printf("Malloc failed\n"); return 1; } fp = fopen(filename, "r"); if(!fp) { printf("Could not open file %s\n", filename); return 1; } size = fread(data, 1, st.st_size, fp); fclose(fp); LLVMFuzzerTestOneInput(data, size); free(data); return 0; } ================================================ FILE: corpus/google/vorbis/decode_fuzzer.exe.stackdump ================================================ Exception: STATUS_ACCESS_VIOLATION at rip=001004283AA rax=4200000042000000 rbx=0000000000000000 rcx=000006FFFFCD0010 rdx=0000000000002824 rsi=0000000000000004 rdi=00000000FFFFB040 r8 =0000000000002824 r9 =0000000000002824 r10=0000000000000002 r11=0000000000000000 r12=0000000000000008 r13=0000000600044F60 r14=0000000600044A20 r15=0000000000000004 rbp=00000000FFFFB0C0 rsp=00000000FFFFB020 program=C:\killerbeez\src\killerbeez\corpus\google\vorbis\decode_fuzzer.exe, pid 4380, thread main cs=0033 ds=002B es=002B fs=0053 gs=002B ss=002B Stack trace: Frame Function Args 000FFFFB0C0 001004283AA (000FFFFB250, 000FFFFB370, 00000000530, 00100414BB0) 000FFFFB4D0 00100429A6D (000FFFFCA00, 000FFFFC730, 00000000000, 0060004105A) 000FFFFC7D8 00100402618 (001800BAEA3, 000FFFFC750, 000FFFFCAE0, 000FFFFCAE0) 000FFFFB608 00100402868 (00600043630, 00600000410, 000FFFFCC50, 00100000002) 000FFFFB740 00100407637 (001800DDD4B, 006000003A0, 000FFFFC9D0, 00000000002) 000FFFFB740 00100401322 (00000000F96, 00100000001, 001801C31D0, 0008E458768) 000FFFFCC00 00100401494 (000FFFFCC50, 30001000000FF00, 00180047B51, 00180046B90) 000FFFFCCB0 00180047BC2 (00000000000, 00000000000, 00000000000, 00000000000) 00000000000 00180045863 (00000000000, 00000000000, 00000000000, 00000000000) 000FFFFFFF0 00180045914 (00000000000, 00000000000, 00000000000, 00000000000) End of stack trace ================================================ FILE: corpus/google/vorbis/notes.txt ================================================ See https://github.com/google/fuzzer-test-suite/tree/master/vorbis-2017-12-11 for more information. Fuzzing benchmark for Vorbis. Contains CVE-2018-5146 (pwn2own 2018), a buffer overflow. Reproducer provided (crash-e86e0482b8d66f924e50e62f5d7cc36a0acb03a7). A second buffer overflow is also present. Reproducer provided (crash-8c5dea6410b0fb0b21ff968a9966a0bd7956405f). This bug no longer reproduces after the fix for CVE-2018-5146. Also contains a null-dereference. Reproducer provided (crash-23c2d78e497bf4aebe5859e3092657cb0af4c299). This bug also no longer reproduces after the fix for CVE-2018-5146. In the included decode_fuzzer.exe, the main function is at offset 0x1363 (0x401363 address). ================================================ FILE: corpus/hang/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (hang-linux) add_executable(hang-linux ${PROJECT_SOURCE_DIR}/hang.c) ================================================ FILE: corpus/hang/hang.c ================================================ int main() { while(1); } ================================================ FILE: corpus/libtest/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (libtest) add_executable(libtest ${PROJECT_SOURCE_DIR}/test.c) add_executable(libtest_pie ${PROJECT_SOURCE_DIR}/test.c) add_library(test1 SHARED ${PROJECT_SOURCE_DIR}/lib1.c) add_library(test2 SHARED ${PROJECT_SOURCE_DIR}/lib2.c) set_target_properties(libtest PROPERTIES LINK_FLAGS "-no-pie") target_link_libraries(libtest test1 test2) target_link_libraries(libtest_pie test1 test2) ================================================ FILE: corpus/libtest/lib1.c ================================================ #include int test_func1(char * buffer) { char * nil = NULL; if (buffer[0] == 'A') { if (buffer[1] == 'B') { if (buffer[2] == 'C') { if (buffer[3] == 'D') { *nil = 0; } else { puts("lib1 Wrong 3"); } } else { puts("lib1 Wrong 2"); } } else { puts("lib1 Wrong 1"); } } else { puts("lib1 Wrong 0"); } return 0; } ================================================ FILE: corpus/libtest/lib2.c ================================================ #include int test_func2(char * buffer) { char * nil = NULL; if (buffer[0] == 'E') { if (buffer[1] == 'F') { if (buffer[2] == 'G') { if (buffer[3] == 'H') { *nil = 0; } else { puts("lib2 Wrong 3"); } } else { puts("lib2 Wrong 2"); } } else { puts("lib2 Wrong 1"); } } else { puts("lib2 Wrong 0"); } return 0; } ================================================ FILE: corpus/libtest/libs.h ================================================ #pragma once int test_func1(char * buffer); int test_func2(char * buffer); ================================================ FILE: corpus/libtest/test.c ================================================ #include "libs.h" #include #include int main() { char buffer[4]; memset(buffer, 0, 4); read(0, buffer, sizeof(buffer)); test_func1(buffer); test_func2(buffer); return 0; } ================================================ FILE: corpus/network/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (network-linux) add_executable(server-linux ${PROJECT_SOURCE_DIR}/server/server.cpp) add_executable(client-linux ${PROJECT_SOURCE_DIR}/client/client.cpp) ================================================ FILE: corpus/network/client/client.cpp ================================================ #ifdef _WIN32 #define _WINSOCK_DEPRECATED_NO_WARNINGS #include #include #else #include #include #include #include #include #include #include #include #include #define INVALID_SOCKET -1 #define SOCKET_ERROR -1 #endif #include // This program crashes if it receives ABCD on a socket from a server. // You can test it by doing: nc -l 4444, ./client.exe in another shell, // and typing the input you want to send in the nc window. // Note that VisualStudio will not overwrite the binaries already // included with Killerbeez. (corpus/network/{client,server}.exe) // It will instead put newly-compiled binaries in corpus/network/x64. void process_data(char * buffer) { char * nil = NULL; if (buffer[0] == 'A') { if (buffer[1] == 'B') { if (buffer[2] == 'C') { if (buffer[3] == 'D') { *nil = 'E'; } else { printf("Wrong 3\n"); } } else { printf("Wrong 2\n"); } } else { printf("Wrong 1\n"); } } else { printf("Wrong 0\n"); } } int main(int argc, char ** argv) { #ifdef _WIN32 WSADATA wsaData; SOCKET sock = INVALID_SOCKET; #else int sock = INVALID_SOCKET; #endif struct sockaddr_in addr; int result, port; char buffer[512]; const char * ip; if (argc < 3) { printf("Using 127.0.0.1:4444\n"); ip = "127.0.0.1"; port = 4444; } else { ip = argv[1]; port = atoi(argv[2]); } #ifdef _WIN32 if (WSAStartup(MAKEWORD(2, 2), &wsaData)) { printf("WSAStartup Failed\n"); return 1; } #endif sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (sock == INVALID_SOCKET) return -1; addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr(ip); addr.sin_port = htons(port); if (connect(sock, (sockaddr *)&addr, sizeof(addr)) == SOCKET_ERROR) return -1; printf("Connected.\n"); result = recv(sock, buffer, sizeof(buffer) - 1, 0); if (result > 0) printf("Received: %s", buffer); process_data(buffer); #ifdef _WIN32 shutdown(sock, SD_BOTH); closesocket(sock); #else shutdown(sock, SHUT_RDWR); close(sock); #endif return 0; } ================================================ FILE: corpus/network/client/client.vcxproj ================================================ Debug Win32 Release Win32 Debug x64 Release x64 15.0 {4B6243FA-0079-4FFC-9100-5CEA5D0F3698} Win32Proj client 10.0.17134.0 Application true v141 Unicode Application false v141 true Unicode Application true v141 Unicode Application false v141 true Unicode true true false false NotUsing Level3 Disabled true WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true Console true kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;%(AdditionalDependencies) NotUsing Level3 Disabled true _DEBUG;_CONSOLE;%(PreprocessorDefinitions) true Console true kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;%(AdditionalDependencies) NotUsing Level3 MaxSpeed true true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true Console true true true kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;%(AdditionalDependencies) NotUsing Level3 MaxSpeed true true true NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true MultiThreaded Console true true true kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;%(AdditionalDependencies) ================================================ FILE: corpus/network/client/client.vcxproj.filters ================================================  {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;hm;inl;inc;ipp;xsd {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms Source Files ================================================ FILE: corpus/network/close.txt ================================================ @BCD ================================================ FILE: corpus/network/multipart.txt ================================================ ["::MEM::40424344","::MEM::BBBBBBBB"] ================================================ FILE: corpus/network/network.sln ================================================  Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 15 VisualStudioVersion = 15.0.27428.2002 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "server", "server\server.vcxproj", "{8D91B6E4-5A17-4C0A-A5B7-DF5ED80AC9E7}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "client", "client\client.vcxproj", "{4B6243FA-0079-4FFC-9100-5CEA5D0F3698}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 Debug|x86 = Debug|x86 Release|x64 = Release|x64 Release|x86 = Release|x86 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {8D91B6E4-5A17-4C0A-A5B7-DF5ED80AC9E7}.Debug|x64.ActiveCfg = Debug|x64 {8D91B6E4-5A17-4C0A-A5B7-DF5ED80AC9E7}.Debug|x64.Build.0 = Debug|x64 {8D91B6E4-5A17-4C0A-A5B7-DF5ED80AC9E7}.Debug|x86.ActiveCfg = Debug|Win32 {8D91B6E4-5A17-4C0A-A5B7-DF5ED80AC9E7}.Debug|x86.Build.0 = Debug|Win32 {8D91B6E4-5A17-4C0A-A5B7-DF5ED80AC9E7}.Release|x64.ActiveCfg = Release|x64 {8D91B6E4-5A17-4C0A-A5B7-DF5ED80AC9E7}.Release|x64.Build.0 = Release|x64 {8D91B6E4-5A17-4C0A-A5B7-DF5ED80AC9E7}.Release|x86.ActiveCfg = Release|Win32 {8D91B6E4-5A17-4C0A-A5B7-DF5ED80AC9E7}.Release|x86.Build.0 = Release|Win32 {4B6243FA-0079-4FFC-9100-5CEA5D0F3698}.Debug|x64.ActiveCfg = Debug|x64 {4B6243FA-0079-4FFC-9100-5CEA5D0F3698}.Debug|x64.Build.0 = Debug|x64 {4B6243FA-0079-4FFC-9100-5CEA5D0F3698}.Debug|x86.ActiveCfg = Debug|Win32 {4B6243FA-0079-4FFC-9100-5CEA5D0F3698}.Debug|x86.Build.0 = Debug|Win32 {4B6243FA-0079-4FFC-9100-5CEA5D0F3698}.Release|x64.ActiveCfg = Release|x64 {4B6243FA-0079-4FFC-9100-5CEA5D0F3698}.Release|x64.Build.0 = Release|x64 {4B6243FA-0079-4FFC-9100-5CEA5D0F3698}.Release|x86.ActiveCfg = Release|Win32 {4B6243FA-0079-4FFC-9100-5CEA5D0F3698}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {6BAE5368-94FB-433E-8E4B-B681FD9D2E10} EndGlobalSection EndGlobal ================================================ FILE: corpus/network/server/server.cpp ================================================ // linux: compile with gcc -o server-linux server.cpp // windows: compile w/ visual studio #ifdef _WIN32 #define _WINSOCK_DEPRECATED_NO_WARNINGS #include #include #else #include #include #include #include #include #include #include #include #include #define INVALID_SOCKET -1 #define SOCKET_ERROR -1 #endif #include #define PORT 4444 // this program will listen on 127.0.0.1:4444 and crash if it receives the // input "ABCD". // Note that VisualStudio will not overwrite the binaries already // included with Killerbeez. (corpus/network/{client,server}.exe) // It will instead put newly-compiled binaries in corpus/network/x64. void process_data(char * buffer) { char * nil = NULL; if (buffer[0] == 'A') { if (buffer[1] == 'B') { if (buffer[2] == 'C') { if (buffer[3] == 'D') { *nil = 'E'; } else { printf("Wrong 3\n"); } } else { printf("Wrong 2\n"); } } else { printf("Wrong 1\n"); } } else { printf("Wrong 0\n"); } } #ifdef _WIN32 int tcp_listen(SOCKET * sock) #else int tcp_listen(int * sock) #endif { struct sockaddr_in addr; *sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (*sock == INVALID_SOCKET) return -1; #ifndef _WIN32 // linux-only // https://stackoverflow.com/a/24194999 int enable = 1; if (setsockopt(*sock, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)) < 0) { printf("setsockopt failed.\n"); return -1; } #endif addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr("127.0.0.1"); addr.sin_port = htons(PORT); if (bind(*sock, (const sockaddr *)&addr, sizeof(addr)) == SOCKET_ERROR) { #ifdef _WIN32 printf("bind failed with error: %d\n", WSAGetLastError()); closesocket(*sock); #else printf("bind failed with error: %d\n", errno); close(*sock); #endif return 1; } if (listen(*sock, SOMAXCONN) == SOCKET_ERROR) { #ifdef _WIN32 printf("listen failed with error: %d\n", WSAGetLastError()); closesocket(*sock); #else printf("listen failed with error: %d\n", errno); close(*sock); #endif return 1; } return 0; } #ifdef _WIN32 int udp_listen(SOCKET * sock) #else int udp_listen(int * sock) #endif { struct sockaddr_in addr; *sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); if (*sock == INVALID_SOCKET) return -1; #ifndef _WIN32 // linux-only // https://stackoverflow.com/a/24194999 int enable = 1; if (setsockopt(*sock, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)) < 0) { printf("setsockopt failed.\n"); return -1; } #endif addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr("127.0.0.1"); addr.sin_port = htons(PORT); if (bind(*sock, (const sockaddr *)&addr, sizeof(addr)) == SOCKET_ERROR) { #ifdef _WIN32 printf("bind failed with error: %d\n", WSAGetLastError()); closesocket(*sock); #else printf("bind failed with error: %d\n", errno); close(*sock); #endif return 1; } return 0; } int main(int argc, char ** argv) { #ifdef _WIN32 WSADATA wsaData; #endif int i, done, forever = 0, udp = 0, num_skipped_inputs = 0; #ifdef _WIN32 SOCKET server = INVALID_SOCKET, client = INVALID_SOCKET; #else int server = INVALID_SOCKET, client = INVALID_SOCKET; #endif char buffer[4096]; struct sockaddr_in addr; int addrlen = sizeof(addr); if (argc > 1 && !strcmp("-loop", argv[1])) forever = 1; if (argc > 2) num_skipped_inputs = atoi(argv[2]); if (argc > 2) udp = strcmp("-udp", argv[3]) == 0; #ifdef _WIN32 if (WSAStartup(MAKEWORD(2, 2), &wsaData)) { printf("WSAStartup Failed\n"); return 1; } #endif if ((!udp && tcp_listen(&server)) || (udp && udp_listen(&server))) return 1; done = 0; while (!done || forever) { done = 1; if (udp) { for (i = 0; i < num_skipped_inputs; i++) #ifdef _WIN32 recvfrom(server, buffer, sizeof(buffer), 0, (sockaddr *)&addr, &addrlen); #else recvfrom(server, buffer, sizeof(buffer), 0, (sockaddr *)&addr, (socklen_t *)&addrlen); #endif #ifdef _WIN32 if (recvfrom(server, buffer, sizeof(buffer), 0, (sockaddr *)&addr, &addrlen) != SOCKET_ERROR) #else if (recvfrom(server, buffer, sizeof(buffer), 0, (sockaddr *)&addr, (socklen_t *)&addrlen) != SOCKET_ERROR) #endif process_data(buffer); } else { client = accept(server, NULL, NULL); if (client == INVALID_SOCKET) { #ifdef _WIN32 printf("accept failed with error: %d\n", WSAGetLastError()); closesocket(server); #else printf("accept failed with error: %d\n", errno); close(server); #endif return 1; } for (i = 0; i < num_skipped_inputs; i++) recv(client, buffer, sizeof(buffer), 0); if (recv(client, buffer, sizeof(buffer), 0) > 0) process_data(buffer); #ifdef _WIN32 shutdown(client, SD_BOTH); closesocket(client); #else shutdown(client, SHUT_RDWR); close(client); #endif } } #ifdef _WIN32 closesocket(server); #else close(server); #endif return 0; } ================================================ FILE: corpus/network/server/server.vcxproj ================================================ Debug Win32 Release Win32 Debug x64 Release x64 15.0 {8D91B6E4-5A17-4C0A-A5B7-DF5ED80AC9E7} Win32Proj server 10.0.17134.0 Application true v141 Unicode Application false v141 true Unicode Application true v141 Unicode Application false v141 true Unicode true true false false NotUsing Level3 Disabled true WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true Console true kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;%(AdditionalDependencies) NotUsing Level3 Disabled true _DEBUG;_CONSOLE;%(PreprocessorDefinitions) true Console true kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;%(AdditionalDependencies) NotUsing Level3 MaxSpeed true true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true Console true true true kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;%(AdditionalDependencies) NotUsing Level3 MaxSpeed true true true NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true Console true true true kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;ws2_32.lib;%(AdditionalDependencies) ================================================ FILE: corpus/network/server/server.vcxproj.filters ================================================  {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;hm;inl;inc;ipp;xsd {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms Source Files ================================================ FILE: corpus/persist/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (persist) set(PERSIST_SRC ${PROJECT_SOURCE_DIR}/test.c) add_executable(nopersist ${PERSIST_SRC}) add_executable(persist ${PERSIST_SRC}) add_executable(persist_hang ${PERSIST_SRC}) add_executable(deferred ${PERSIST_SRC}) add_executable(deferred_nohook ${PERSIST_SRC}) target_compile_definitions(persist PUBLIC PERSIST) target_compile_definitions(persist_hang PUBLIC PERSIST PUBLIC HANG) target_compile_definitions(deferred PUBLIC SLOW_STARTUP) target_compile_definitions(deferred_nohook PUBLIC SLOW_STARTUP PUBLIC DEFERRED_NOHOOK) target_link_libraries(persist forkserver) target_link_libraries(persist_hang forkserver) target_link_libraries(deferred forkserver) target_link_libraries(deferred_nohook forkserver) include_directories(${PROJECT_SOURCE_DIR}/../../instrumentation/) ================================================ FILE: corpus/persist/test.c ================================================ #include #include #include #if defined(PERSIST) || defined(DEFERRED_NOHOOK) #include #endif int test_func() { char buffer[4]; char * nil = NULL; FILE * fp = stdin; memset(buffer, 0, 4); read(0, buffer, sizeof(buffer)); if (buffer[0] == 'A') { if (buffer[1] == 'B') { if (buffer[2] == 'C') { if (buffer[3] == 'D') { *nil = 'E'; } else { puts("Wrong 3"); } } else { puts("Wrong 2"); } } else { puts("Wrong 1"); } } else { puts("Wrong 0"); } return 0; } int main() { #ifdef SLOW_STARTUP sleep(5); #endif #ifdef DEFERRED_NOHOOK KILLERBEEZ_INIT(); #endif #ifdef PERSIST while(KILLERBEEZ_LOOP()) { #endif #ifdef HANG while(1); #endif test_func(); #ifdef PERSIST } #endif return 0; } ================================================ FILE: corpus/test/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (test-linux) add_executable(test-linux ${PROJECT_SOURCE_DIR}/test.c) ================================================ FILE: corpus/test/inputs/close.txt ================================================ @BCD ================================================ FILE: corpus/test/inputs/crash.txt ================================================ ABCD ================================================ FILE: corpus/test/inputs/input.txt ================================================ AAAA ================================================ FILE: corpus/test/inputs/multipart.txt ================================================ ["::MEM::41414141414141414141414141414141","::MEM::42424242424242424242424242424242"] ================================================ FILE: corpus/test/inputs/telnet_multipart.txt ================================================ ["::MEM::fffb01fffb01fffb01","::MEM::fffb03fffd18fffd1f","::MEM::68656c6c6f20776f726c640a0d"] ================================================ FILE: corpus/test/notes.txt ================================================ test.c is just a very simple testcase to make sure our instrumentation is working as expected. It looks at the first 4 characters of input, and takes a different one for each character matching "ABCD". If the full string matches ABCD, then the test program will crash (by writing to NULL). In the compiled test.exe executable, the main function is at offset 0x1000 (0x401000 address). ================================================ FILE: corpus/test/test.c ================================================ #include #include int main(int argc, char ** argv) { char buffer[4]; char * nil = NULL; FILE * fp = stdin; if (argc > 1) { fp = fopen(argv[1], "rb+"); if (!fp) { puts("Couldn't open file\n"); return 1; } } memset(buffer, 0, 4); fread(buffer, 1, 4, fp); fclose(fp); if (buffer[0] == 'A') { if (buffer[1] == 'B') { if (buffer[2] == 'C') { if (buffer[3] == 'D') { *nil = 'E'; } else { puts("Wrong 3"); } } else { puts("Wrong 2"); } } else { puts("Wrong 1"); } } else { puts("Wrong 0"); } //puts("Sleeping now"); //Sleep(1000 * 100); //while (1) {} return 0; } ================================================ FILE: docs/AFL.md ================================================ # AFL based Instrumentation The AFL instrumentation module provides coverage information for the target program. The AFL instrumentation module utilizes the GCC, LLVM, or QEMU based instrumentation in order to obtain an AFL-style bitmap of the program coverage. The AFL instrumentation is only available on Linux; for WinAFL based instrumentation see the [DynamoRIO documentation](docs/DynamoRIO.md). This document describes the AFL instrumentation module, how to use it, and the changes that were made from the original [AFL implementation](http://lcamtuf.coredump.cx/afl/). # Background [AFL](http://lcamtuf.coredump.cx/afl/) is a state-of-the-art fuzzer that can employ several different types of program instrumentation in order to increase the efficiency of the fuzzer by utilizing coverage information. As one of the most popular fuzzers available a large number of developers have contributed enhancements and additional features. As such, Killerbeez hopes to build off of these contributions by reusing the instrumentation methods available in AFL. AFL supports instrumentation of source code through the [GCC](https://gcc.gnu.org/) and [LLVM](https://llvm.org/) based instrumentation, as well as the binary instrumentation through the use of a modified version of [QEMU](https://www.qemu.org/). # Killerbeez Implementation Differences ### Fork Server Differences In order to standardize the fork server protocol between the AFL instrumentation and the [IPT instrumentation](docs/IPT.md), the AFL instrumentation has been slightly modified. The fork server protocol in Killerbeez is based on 1-byte commands being sent to the fork server and 4-byte responses returning. Five commands are supported: 1. `EXIT` - kill any child processes and exit 2. `FORK` - fork a new child, but wait to run the new target executable 3. `RUN` - Tell the newly forked child to run target executable 4. `FORK_RUN` - fork a new child and run the target executable immediately 5. `GET_STATUS` - Return the status (from `waitpid`) of the last child The GCC, LLVM, and QEMU instrumentation only implements `EXIT`, `FORK_RUN`, and `GET_STATUS`, whereas the `LD_PRELOAD` library based fork server used in the IPT instrumentation implements all 5 commands. ### QEMU Instrumentation Differences The QEMU instrumentation included in Killerbeez has been patched with a number of third party patches which fix bugs or add enhancements to it. These patches are available in [vanhauser-thc's github repo](https://github.com/vanhauser-thc/afl-patches/). The following patches have been included: * `afl-qemu-speed.diff` - Updates QEMU to allow caching, ~3 times speed improvement. See [abiondo's AFL repo](https://github.com/abiondo/afl) or this [post](https://abiondo.me/2018/09/21/improving-afl-qemu-mode/) describing the work for more details. * `afl-qemu-ppc64.diff` - Updates the AFL QEMU instrumentation to work with PowerPC * `afl_qemu_optimize_map.diff` - Optimizes the AFL log function in the QEMU instrumentation. * `afl_qemu_optimize_entrypoint.diff` - Fixes entrypoint detection in QEMU instrumentation on ARM. # GCC Instrumentation As Killerbeez's GCC instrumentation is based off of AFL's GCC instrumentation, the process for instrumenting a target is very similar. As such, [the original README](https://github.com/mirrorer/afl/blob/master/docs/README#L84)'s instructions for instrumenting a target may provide helpful information. First, the `afl-gcc` compiler tool needs to be compiled. This tool can be built by running the following commands: ``` $ cd afl_progs/ $ make ``` The resulting `afl-gcc` and `afl-g++` tools can then be used in place of the regular gcc/g++ compilers to instrument any source code compiled. The correct way to compile the target program may vary depending on the specifics of the build process, but a nearly-universal approach would be: ``` $ CC=/path/to/killerbeez/afl_progs/afl-gcc ./configure $ make clean all ``` For C++ programs, you'd would also want to set `CXX=/path/to/killerbeez/afl_progs/afl-g++`. Once the target program has been instrumented, it can be fuzzed using the `fuzzer` program. ``` $ ./fuzzer stdin afl bit_flip -d '{"path":"/path/to/test/program"}' -n 10 -sf /path/to/seed/file ``` # LLVM Instrumentation As Killerbeez's LLVM instrumentation is based off of AFL's LLVM instrumentation, the process for instrumenting a target is very similar. As such, [the LLVM README](https://github.com/mirrorer/afl/blob/master/llvm_mode/README.llvm)'s instructions for instrumenting a target may provide helpful information. First, the `afl-clang-fast` compiler tool needs to be compiled. This tool can be built by running the following commands: ``` $ cd afl_progs/llvm_mode $ make ``` If `make` cannot find a version of `llvm-config` in `PATH`, you may need to specify the `LLVM_CONFIG` environment variable. The exact filename of `llvm-config` will vary based on your specific distribution. For Ubuntu 16.04.5 LTS, the make command should be run as follows: ``` $ make LLVM_CONFIG=llvm-config-3.8 ``` The resulting `afl-clang-fast` and `afl-clang-fast++` tools can then be used in place of the regular clang/clang++ compilers to instrument any source code compiled. Thus, you can instrument the target in a way similar to the GCC instrumentation, e.g.: ``` $ CC=/path/to/killerbeez/afl_progs/afl-clang-fast ./configure $ make clean all ``` For C++ programs, you'd would also want to set `CXX=/path/to/killerbeez/afl_progs/afl-clang-fast++`. Once the target program has been instrumented, it can be fuzzed using the `fuzzer` program. ``` $ ./fuzzer stdin afl bit_flip -d '{"path":"/path/to/test/program"}' -n 10 -sf /path/to/seed/file ``` ### Persistence Mode The LLVM based instrumentation supports persistence mode to further increase the speed of fuzzing. Persistence mode involves executing multiple inputs without restarting the target process. While this approach can be much quicker, it can cause instabilities in the target process if not setup properly. In order for persistence mode to work, the target must reset its state after each test case. In LLVM based instrumentation, persistence mode is accomplished by modifying the source code of the target to call the `__AFL_LOOP()` macro. This macro is used to mark the start and stop of the target process testing a single input. An ideal program for persistence mode is one that has very little global state, or the state can easily be reset. The structure of a persistence mode program, is shown below, where the `__AFL_LOOP` macro is used to call the fork server. A more complete example program and Makefile that can be used with LLVM persistence mode is available in the corpus/afl_test/ directory of this repository. ``` while(__AFL_LOOP()) { // Read input data. // Call library code to be fuzzed. // Reset state. } ``` Once a program has been instrumented, persistence mode can be enabled by setting the AFL instrumentation's `persistence_max_cnt` option. The `persistence_max_cnt` option defines how many inputs to test in a single process before restarting the target program. This value can be determined experimentally, but a good starting value is 1000. An example `fuzzer` command that utilizes persistence mode is shown below: ``` $ ./fuzzer stdin afl afl -d '{"path":"/path/to/test/program"}' -n 5000 -sf /path/to/seed/file -i '{"persistence_max_cnt":1000}' ``` ### Deferred Startup Mode The AFL instrumentation fork server tries to optimize performance of the target process by executing the target binary until it reaches the `main` function, and then forking all new processes from the copy stopped at `main`. This ensures all of the startup code that is executed prior to the `main` function is only ever run once. However, if a target process has a large startup cost, fuzzing will still be slow. In these cases, it is beneficial to use the fork server's deferred startup mode, to wait until after the process has finished starting up to start the fork server. To enable the deferred startup mode, find a suitable location in the code where the delayed forking can take place. Ideally, this location would be after any startup work is performed but before the actual processing of an input begins. More information on the process for determining this location is available in the original AFL's [LLVM instrumentation README](https://github.com/mirrorer/afl/blob/master/llvm_mode/README.llvm#L82). Once this location is selected, add the following code to indicate to the start the fork server at this location: ``` __AFL_INIT(); ``` Once this code is added, the target program can be compiled with `afl-clang-fast` and it can then be used with the `fuzzer`. To enable the deferred startup mode, the `deferred_startup` option should be passed to the AFL instrumentation module's options. An example `fuzzer` command that utilizes deferred startup mode is shown below: ``` $ ./fuzzer stdin afl afl -d '{"path":"/path/to/test/program"}' -n 5000 -sf /path/to/seed/file -i '{"deferred_startup":1}' ``` # QEMU Instrumentation If source code is not available or the target cannot be successfully instrumented via the GCC or LLVM instrumentation described above, AFL's QEMU instrumentation may be the right approach. AFL's QEMU instrumentation supports the ability to perform on-the-fly instrumentation of black-box binaries through its user space emulation mode. Additionally, this instrumentation can be used to fuzz binaries built for a different architecture than the host processor (i.e. fuzzing an ARM binary on an x86 computer). Before the QEMU instrumentation can be used, the `afl-qemu-trace` binary must be built. The commands below will download the QEMU source code, update the code to include the instrumentation, and build the `afl-qemu-trace` binary. For additional instructions and caveats, see [README.qemu_mode](afl_progs/qemu_mode/README.qemu). ``` $ cd afl_progs/qemu_mode $ ./build_qemu_support.sh ``` Once `afl-qemu-trace` is compiled, the target program can be fuzzed with the `fuzzer`. To enable the QEMU instrumentation, the `qemu_mode` option should be passed to the AFL instrumentation module's options. An example `fuzzer` command that utilizes qemu mode is shown below: ``` $ ./fuzzer stdin afl bit_flip -d '{"path":"/path/to/test/program"}' -n 10 -sf /path/to/seed/file -i '{"qemu_mode":1}' ``` If the AFL instrumentation cannot automatically detect the location of the `afl-qemu-trace` binary, you will need to specify the path to `afl-qemu-trace` with the `qemu_path` option: ``` $ ./fuzzer stdin afl bit_flip -d '{"path":"/path/to/test/program"}' -n 10 -sf /path/to/seed/file -i '{"qemu_mode":1,"qemu_path":"/path/to/afl-qemu-trace"}' ``` ================================================ FILE: docs/BUILD.md ================================================ # Build Instructions This document describes the process of compiling Killerbeez on Linux and Windows. ## Windows ### Prerequisites To build Killerbeez on Windows you will need Microsoft Visual Studio 2017, Cygwin, Radamsa, and DynamoRIO. Unless otherwise noted, all of the snippets below use cmd.exe. ### Installation 1. Install [Visual Studio 2017 Community](https://www.visualstudio.com/downloads/). Version 15.5.7 has been tested to work with Killerbeez. Anything later should also work. Earlier versions which support cmake will likely work but have not been tested and may require slight changes to the build settings. + The following workloads/components will be needed to build Killerbeez. They can be added with the Visual Studio Installer. 1. Desktop development with C++ 2. Linux development with C++ 3. Visual C++ tools for CMake 4. Git for Windows 2. Install [Cygwin](https://cygwin.com/install.html) (only required for the radamsa mutator). + Use `C:\cygwin64` as the installation directory. + Make sure the packages `gcc-core`, `make`, `git`, and `wget` are being installed. + Add the Cygwin `bin/` (e.g. `C:\cygwin64\bin`) to your PATH environment variable. 3. Download the Killerbeez source code ``` set WORKDIR=C:/ :: We'll use forward slashes (Windows doesn't care) to avoid escaping backslashes cd %WORKDIR% git clone https://github.com/grimm-co/killerbeez.git ``` 4. Build [Radamsa](https://gitlab.com/akihe/radamsa) (optional). + Clone the Radamsa repository into %WORKDIR% from a Cygwin terminal and build: ``` cd /cydrive/c/killerbeez git clone https://gitlab.com/akihe/radamsa.git cd radamsa make ``` 5. Install [DynamoRIO](http://dynamorio.org/). Use the [latest build available](https://console.cloud.google.com/storage/browser/chromium-dynamorio/builds). A direct link to the latest build as of 3/14/18 can be found [here](https://storage.googleapis.com/chromium-dynamorio/builds/DynamoRIO-Windows-6.2.17295-0xa77808f.zip). + Download the zip file and extract it so that the main directory (the one containing bin32/ and bin64/ directories) is `%WORKDIR%/killerbeez/dynamorio` + *Note:* The reason we have to use the latest build is that [commit c575ad](https://github.com/DynamoRIO/dynamorio/commit/c575ad16f8943eb6946e8c875eb248d948390537) is needed to support binaries built with VS 2017 on Windows 10. This commit is not included in the 7.0.0-RC1 release. 6. Build Killerbeez + Open the repository `killerbeez` within Visual Studio (File -> Open -> CMake..) and build it using (CMake -> Build All). This should build the fuzzer and its dependencies from the other repos. If successful, you'll see an aggregate `build/` directory in the root of your working directory. In it, the compiled executables and libraries from all three projects will be found in folders named after the architecture (e.g. x64) and build type (e.g. Debug). + The fuzzer.exe executable can be found at `%WORKDIR%/killerbeez/build/x64/Debug/killerbeez/fuzzer.exe` ## Linux and Mac ### Prerequisites To build Killerbeez on Linux/Mac you will need a compiler (gcc or clang), make, and cmake. To build the AFL instrumentation with gcc, clang, and qemu, there are a few extra packages needed. The dependency lists below will make sure you can compile everything to get all the cool features. macOS (brew) ``` brew install autoconf automake libtool gcc cmake pkg-config ``` Debian 9 (stretch) / Ubuntu 18.04 (bionic) / Ubuntu 16.04 (xenial): ``` sudo apt install llvm clang libtool-bin build-essential cmake automake bison flex libglib2.0-dev libc6-dev-i386 libpixman-1-dev ``` Ubuntu 14.04 (trusty) ``` sudo apt install llvm clang libtool build-essential cmake automake bison flex libglib2.0-dev libc6-dev-i386 git ``` Fedora (tested on 29 and 30): ``` sudo dnf install llvm clang llvm-devel libtool libstdc++-static cmake bison flex glib2-devel glibc-devel.i686 zlib-devel ``` Notes: Ubuntu 12.04 (precise) doesn't have a recent enough version of CMake (it has 2.8.7, but 2.8.8 needed) in the repositories. It should work if you compile CMake 2.8.8 or later yourself, but it is not a tested distribution. Debian 8 (jessie) fails to build/install due to what looks like a bug in CMake, though we did not take the time to figure out the specific error. Debian 10 (buster) and Ubuntu 18.04 both have versions of clang which do not currently work with the version of the llvm instrumentation from AFL. This will be fixed when we replace the standard AFL programs with the ones from AFL++. On macOS (at least on 10.13.4 (High Sierra)), Apple has reportedly removed the ability to load dylibs using relative paths.[1] There are reports that SIP needs to be disabled[2] to fix this, however setting DYLD_LIBRARY_PATH to point to the location of the .dylib files (usually $REPOROOT/build/killerbeez) was sufficient in our tests. For now, just set this environment variable if there are errors about RPATH or loading .dylibs. In the long run, we'll be investigating what other projects have done[3] to work around this issue. [1] https://github.com/tensorflow/tensorflow/issues/6729#issuecomment-272583349 [2] https://github.com/BVLC/caffe/issues/3227 [3] https://github.com/alexgkendall/caffe-segnet/pull/68/commits/f282c0f784e95460d55e18d68933f2ef66bd3b47 ### Installation Clone the killerbeez repo ``` # the --recursive is needed to check out submodules git clone --recursive https://github.com/grimm-co/killerbeez.git cd killerbeez # Make a build directory and compile the code. mkdir build; cd build; cmake ..; make radamsa all # radamsa isn't in "all" by default because of Windows ``` If everything compiled, the fuzzer and other Killerbeez files will be in `build/killerbeez`, and the mutators will be under `build/mutators`. ================================================ FILE: docs/CI.md ================================================ # Setting Up GitLab CI Runners for Killerbeez We use GitLab to develop Killerbeez internally; these are the instructions for creating CI runners that test the build on a number of platforms. ## Prerequisites * A GitLab server hosting Killerbeez * A CI server with the following installed: * Vagrant * VirtualBox ## Instructions * Clone this repo onto your server * `cd vagrant/ci_runner` * `cp runner_vars.example runner_vars` * Edit `runner_vars` with the parameters of your Gitlab server: * `CI_SERVER_URL` - HTTPS URL to the GitLab server * `REGISTRATION TOKEN` - CI token generated by GitLab (get it from the Settings > CI/CD > Runners page for the Killerbeez project) * `CI_SERVER_TLS_CA_FILE` - If your GitLab server's certificate is signed by a private CA, place the CA certificate in the `vagrant/ci_runner` directory and set this variable. The certificate will be copied to `/killerbeez` in the VM, so if your certificate is named `ca.crt`, set `CI_SERVER_TLS_CA_FILE=/killerbeez/ca.crt`. * Any other settings that gitlab-runner accepts as environment variables. See `gitlab-runner register -h` for more options. * `vagrant up` This will bring up the runners for a bunch of platforms. If you would like to bring up the runners one at a time, first run ``` vagrant status ``` to see the list of runners available, then run ``` vagrant up ``` to bring up the specified VM. ## Technical Details There are three kinds of runners created by the Vagrantfile: 1. For OSes that are listed as supported by the [official Linux package repositories](https://docs.gitlab.com/runner/install/linux-repository.html) we start up a VM from a Vagrant Box of that OS and install the gitlab-runner package. The runner uses the `shell` executor to run builds. 2. For all other linux platforms, we prebuild a Docker image of the OS with dependency packages installed. We run gitlab-runner in a docker container and use the `docker` executor to run builds using the prebuilt OS image. 3. Windows. For these we download the gitlab-runner binary directly and install it as a service. In all cases, the `.gitlab-ci.yml` specifies the OSes to build on using tags. The Docker runners are configured with the prebuilt docker image as the default so that it does not have to be specified in the job configuration. ================================================ FILE: docs/DynamoRIO.md ================================================ # DynamoRIO Persistence Mode The DynamoRIO instrumentation module provides coverage information for the target program. This module is largely copied from [WinAFL](https://github.com/ivanfratric/winafl), and as such has many of the same advantages and disadvantages. The DynamoRIO instrumentation module utilizes persistence mode to greatly increase the speed of fuzzing. This document describes how to use persistence mode with the DynamoRIO instrumentation module. # Background Persistence mode involves executing multiple inputs without restarting the process. While this approach is much quicker, it can cause instabilities in the target process. In the DynamoRIO instrumentation module, persistence mode is accomplished by selecting a target function to wrap, and re-executing this function for each input. The re-execution of this function is done by recording the program counter, the stack address, and the function parameters, and then simply setting the registers and arguments back to the original values when the function finishes. As you can imagine, this can cause problems with larger programs that rely on the program's global state. The target function must be carefully selected to ensure that it rereads the input to ensure that the new input is processed. Further, the target function must also finish in order to ensure that the fuzzer can quickly reset and run a new input. # Options In order to enable persistence mode, several options must be passed to the DynamoRIO instrumentation. Arguments: * `-fuzz_iterations` - The number of fuzz inputs to send to a target before restarting the target. Default is 1 iteration (i.e. persistence mode is disabled). * `-coverage_modules` - The modules (i.e. libraries or the main executable) that DynamoRIO should track for coverage information. * `-per_module_coverage` - Whether each of the tracked modules should be recorded independently (when the option is set to 1) or whether the same coverage map should be used for all modules (when set to 0). The default option is to record each module in the same map. * `-client_params` - A string of options that should be passed to the DynamoRIO plugin. See the description of these arguments below. In addition to the arguments passed to the instrumentation module, there are a number of options that can be passed to the DynamoRIO plugin that is executed in the target process' address space. These arguments are passed inside the `-client_params` argument. * `-target_module` - This option specifies which module the target function to wrap is in. * `-target_offset` - This option specifies the address of the target function to wrap. * `-target_method` - This option specifies the name of the target function to wrap. In order for this option to be able to lookup the name, the function needs to be exported or the symbols for the target module need to be available. * `-nargs` - The number of arguments that the target function takes. This is used to save and restore the arguments between fuzz iterations. * `-call_convention` - The target function's calling convention. The default calling convention is cdecl on 32-bit x86 platforms and Microsoft x64 for Visual Studio 64-bit applications. Possible values: fastcall, ms64 for Microsoft x64 Visual Studio, stdcall for cdecl or stdcall, and thiscall. * `-no_thread_coverage` - With this option enabled, all threads of the target program will track coverage. By default, only the thread that hits the target function will track coverage. * `-covtype` - the type of coverage being recorded. Supported options are bb for basic block coverage or edge for edge coverage. Edge coverage is the default. * `-write_log` - A debug option that writes a log file to the current directory with debug information on the DynamoRIO plugin's status. In order to run the fuzzer in DynamoRIO's persistence mode, the `-fuzz_iterations`, `-coverage_modules`, `-target_module`, and either the `-target_method` or `-target_offset` options all need to be setup with values specific to the target being fuzzed. # Example ``` fuzzer.exe file dynamorio afl -n 100 -sf C:\\killerbeez\corpus\test\inputs\close.txt -d "{\"path\":\"C:\\\\killerbeez\\corpus\\test\\test.exe\",\"timeout\":2,\"arguments\":\"@@\"}" -i "{\"timeout\": 3000,\"coverage_modules\":[\"test.exe\"], \"client_params\":\"-target_module test.exe -target_offset 0x1000\",\"fuzz_iterations\":50}" -l "{\"level\":0}" ``` This example fuzzes the include test.exe program for 100 iterations, feeding input mutated by the AFL mutator. The instrumentation arguments are specified so that the DynamoRIO instrumentation module will track coverage of the test.exe binary. The instrumentation will wrap the test's function at offset 0x1000, restarting this function for each input. It will run 50 iterations of this function, and then restart the program cleanly for the next iteration. This specific example will find a crash in the test program at iteration 7. ================================================ FILE: docs/IPT.md ================================================ # Intel Processor Trace Instrumentation The Intel Processor Trace (IPT) instrumentation module provides coverage information for the target program. The IPT instrumentation module utilizes Intel Processor Trace to quickly obtain a set of hashes that represent the execution trace of a program. For the moment, IPT instrumentation is only available on Linux. This document describes the Intel PT instrumentation, how to use it, and the design decisions that lead to its development. # Background Beginning with the 5th generation Intel Processors, Intel introduced Intel Processor Trace to provide an efficient way to trace the execution of a processor. In order to reduce the overhead and increase the speed of tracing, IPT records as little information is possible, while still allowing for an exact trace of execution to be obtained. IPT works by recording packets which detail the execution trace in memory. IPT can be configured to limit packet generation based on the address of the instruction pointer. This allows tracing of specific executables without generating unnecessary trace information for any libraries used. Each IPT packet contains a different type of information relating to the execution trace, however the only two packets that Killerbeez uses are: * TNT packets - which contain the results of conditional branches (i.e. jnz, jz, ja, etc). These packets contain a set of bits which correspond to whether the last several branches were Taken (T) or Not Taken (NT). Depending on the processor, IPT may send TNT packets in either the short (maximum 6 TNT bits) or long (maximum 47 TNT bits) form. * TIP packets - which contain the results of indirect calls/jumps (i.e. call rax) and if enabled, the saved instruction pointer of ret instructions. With a recording of these two packets for an execution trace, software can utilize a disassembler to obtain the exact control flow of an execution. More information on IPT is available in Chapter 35 of the Intel Architecture Software Developer's manual. # Intel PT and Fuzzing While great in theory, IPT is not perfect for fuzzing. It is a step forward and can be useful, but there are a few issues that must be accommodated. In an ideal implementation, IPT would provide a set of basic block transitions, such as those obtained in the AFL fuzzer or Killerbeez's DynamoRIO instrumentation. However, IPT only provides the addresses of indirect jumps/calls and the results of conditional branches. Thus, any attempt to obtain basic block transitions must decode the packets, disassemble the traced executable and libraries, and walk through the execution trace. While IPT's tracing is very fast and low overhead compared to other instrumentations, the parsing of these traces can be very slow. One possible optimization for parsing IPT execution traces, is to obtain a complete Control Flow Graph (CFG) prior to decoding and utilizing the CFG to replace the disassembling of instructions. However, statically obtaining a complete CFG of real world programs is a hard problem, and becomes especially troublesome when a target calls a library API with a callback function. Another concern is the asynchronous nature and caching of TIP and TNT packets. TNT bits can be built up and cached for as long as the processor decides, or may be sent immediately. Further, the number of TNT bits in a TNT packet is non-deterministic and can vary between runs when tracing the same program. As such, the order of TIP packets is irrelevant to the order of the TNT packets. This prevents parsing approaches which rely on TIP packets to skip to relevant portions of the execution trace. # Killerbeez Implementation To compensate for the previously mentioned issues, Killerbeez's implementation of IPT-based fuzzing does not attempt to disassemble packets. Rather, Killerbeez maintains a set of hashes which encode the TIP and TNT data in the execution trace. As mentioned above, the order of TIP and TNT packets are not always the same, so each of the two packets are hashed independently. The instruction pointer address is extracted from the TIP packets and hashed into the trace's TIP hash, while the TNT bits are extracted from the TNT packets and hashed into the trace's TNT hash. After the hashes have been generated, a hash table is then used to lookup these hashes and determine if an execution trace has been seen before. This approach has the advantage of not requiring disassembling in order to walk the execution trace. Additionally, our IPT packet parser is able to ignore irrelevant packets and only focus on TNT and TIP packets. These characteristics make the Killerbeez IPT packet parser very fast. However, this approach does have the disadvantage of requiring IPT instruction pointer address filtering, to ensure unnecessary libraries are not also traced. This also helps reduce the non-determinism in the execution trace, as some libraries do not always trace exactly the same in each execution. # Execution Traces vs Basic Block Transitions As compared to basic block transitions, this implementation may overestimate which execution traces are consider interesting. For instance, imagine two executions: run A and run B. If run B's execution trace is a subset of run A's execution trace, our implementation will report it as being interesting, despite a basic block transition based instrumentation not reporting it as interesting. Run B may or may not be interesting depending on the specific code being executed. For instance, run B doesn't exercise any additional code, so it may not be interesting, however if a bug in the program can only be exercised by not executing a specific piece of code, or executing it fewer times, run B may be interesting. In order to compensate for the differences between execution traces and basic block transitions, the manager will analyze each interesting input generated by the fuzzing clients before adding it to the working input set. For instance, depending on the manager design, it may trace the target program's execution with the interesting input file using another slower instrumentation that is able to obtain basic block transitions. With the list of basic block transitions for this input file, the manager can query the database of previously found basic block transitions and make a decision on whether the client reported interesting input file is actually interesting enough to be added to the working input set. # Comparison of Implementations ## Honggfuzz Honggfuzz also utilizes Intel PT support to trace userland targets when fuzzing. Rather than hashing the TIP/TNT packets, honggfuzz utilizes the instruction pointers in TIP packets as an index into a bitmap which records previously seen instruction pointers. While this approach is quick and can avoid disassembling of the target executable and libraries, it disregards the conditional branch decisions recorded in the TNT packets. While our implementation will overestimate interesting files, honggfuzz's implementation underestimates interesting files and will not report an input file that only changes a conditional branch in the execution of the target program. Honggfuzz's IPT implementation is available in the [honggfuzz repository on github](https://github.com/google/honggfuzz/blob/master/linux/pt.c). ## kAFL kAFL utilizes Intel PT support to trace execution while fuzzing Operating System kernels. Rather than hashing TIP/TNT packets, kAFL utilizes a custom packet decoder that caches disassembly. Similar to Killerbeez, kAFL also ignores non-relevant IPT packets. As described above, the Killerbeez implementation does not a use a disassembler and thus will be faster than kAFL, but is unable to obtain the basic block transitions that kAFL can. kAFL's IPT implementation is available in the [kAFL repository on github](https://github.com/RUB-SysSec/kAFL/blob/master/QEMU-PT/pt/). # Example In order to utilize Killerbeez's IPT instrumentation, your processor and Linux kernel must support IPT. To check for support, look for the directory `/sys/devices/intel_pt/`. Additionally, Killerbeez's IPT instrumentation requires address filtering; the number of address filters supported on your system is available in the `/sys/devices/intel_pt/caps/num_address_ranges` file. The IPT instrumentation can be used as any other instrumentation module would, i.e. by specifying the name as the instrumentation type. The TNT and TIP hashes are output as DEBUG messages, and can be viewed by increasing the logging level (with the option `-l "{\"level\":0}"`). An example command illustrating the IPT module's usage is shown below. This example runs 10 iterations of the test-linux binary, mutates the input with the bit_flip mutator, and feeds the input over stdin to the target program. This command will cause a crash in the test-linux binary on the seventh iteration. The IPT instrumentation tracks the TNT and TIP packets that are generated from the main test-linux executable. ``` ./fuzzer stdin ipt bit_flip -d "{\"path\":\"$HOME/killerbeez/build/killerbeez/corpus/test-linux\"}" -n 10 -sf $HOME/killerbeez/killerbeez/corpus/test/inputs/close.txt ``` If instead of tracking code coverage for the main executable, you wish to track the coverage of a library, then you can use the `coverage_libraries` option. This option specifies an array of libraries for the IPT instrumentation module to track coverage information for. The below command illustrates how to use this option with the included example program. This command tracks the code coverage of libtest1.so and libtest2.so. ``` ./fuzzer stdin ipt bit_flip -d "{\"path\":\"$HOME/killerbeez/build/killerbeez/corpus/libtest\"}" -n 10 \ -i "{\"coverage_libraries\":[\"$HOME/killerbeez/build/killerbeez/corpus/libtest1.so\",\"$HOME/killerbeez/build/killerbeez/corpus/libtest2.so\"]}" \ -sf $HOME/killerbeez/killerbeez/corpus/test/inputs/close.txt ``` # Persistence Mode The IPT instrumentation module provides the ability to use persistence mode to increase the speed of fuzzing. Persistence mode involves executing multiple inputs without restarting the target process. While this approach can be much quicker, it can cause instabilities in the target process if not setup properly. The IPT instrumentation module's persistence mode is based off the persistence mode available in the [LLVM instrumentation included in AFL](https://github.com/mirrorer/afl/tree/master/llvm_mode). As such, it has similar advantages and disadvantages. In the IPT instrumentation module, persistence mode is accomplished by modifying the source code of the target program to repeatedly call the Killerbeez fork server library's `killerbeez_loop` function. This function is used to mark the start and stop of the target process testing a single input. An ideal program for persistence mode is one that has very little global state, or the state can easily be reset. The structure of a persistence mode program, is shown below, where the `KILLERBEEZ_LOOP` macro is used to call the fork server. One thing to note is that the target process must reread any input data, to ensure it is running with the newly mutated input each iteration. In order to compile the instrumented source code, it must include the forkserver.h header file (so that the `KILLERBEEZ_LOOP` macro is defined) and the linker arguments must be modified to link against the fork server library. A more complete example program and Makefile that can be used with IPT persistence mode is available in the corpus/persist/ directory of this repository. ``` while(KILLERBEEZ_LOOP()) { // Read input data. // Call library code to be fuzzed. // Reset state. } ``` Once a program has been instrumented, persistence mode can be enabled by setting the IPT instrumentation's `persistence_max_cnt` option. The `persistence_max_cnt` option defines how many inputs to test in a single process before restarting the target program. This value can be determined experimentally, but a good starting value is 1000. An example command illustrating the IPT module's usage with persistence mode is shown below. This example runs 5000 iterations of the persist binary, mutates the input with the afl mutator, and feeds the input over stdin to the target program. The IPT module will run 1000 iterations per persist process. ``` ./fuzzer stdin ipt afl -i "{\"persistence_max_cnt\":1000}" -d "{\"path\":\"$HOME/killerbeez/build/killerbeez/corpus/persist\"}" -n 5000 -sf $HOME/killerbeez/killerbeez/corpus/test/inputs/close.txt ``` For comparison, a non-persistence mode run with a similar binary can be started with this command: ``` ./fuzzer stdin ipt afl -d "{\"path\":\"$HOME/killerbeez/build/killerbeez/corpus/nopersist\"}" -n 5000 -sf $HOME/killerbeez/killerbeez/corpus/test/inputs/close.txt ``` # Deferred Startup Mode Killerbeez's fork server tries to optimize performance of the target process by executing the target binary until it reaches the `main` function, and then forking all new processes from the copy stopped at `main`. This ensures all of the startup code that is executed prior to the `main` function is only ever run once. However, if a target process has a large startup cost, fuzzing will still be slow. In these cases, it is beneficial to use the fork server's deferred startup mode, to wait until after the process has finished starting up to start the fork server. Killerbeez's deferred startup mode is based off the deferred instrumentation mode available in the [LLVM instrumentation included in AFL](https://github.com/mirrorer/afl/tree/master/llvm_mode). Killerbeez offers two different techniques for enabling the deferred startup mode. Both techniques are configured by modifying the configuration in the forkserver_config.h header file in the instrumentation/ directory of this repository and recompiling Killerbeez. ## Function Hooking By default, the Killerbeez fork server uses library injection and function hooking in order to execute code in a target process. Thus, the Killerbeez deferred startup mode can be enabled by switching which function is hooked. This mode has the advantage that it can still hook functions in target programs when source code is unavailable. In forkserver_config.h, there are 4 preprocessor macros that control the fork server's function hooking behavior: * `DISABLE_HOOKING` - This macro disables function hooking. This macro should be set to 0 to enable function hooking. * `USE_LIBC_START_MAIN` - This macro controls whether the default function (`__libc_start_main`) is hooked or not. To customize the hooked function, this must be set to 0. * `CUSTOM_FUNCTION_NAME` - This macro should contain the name of the function to hook. The name should NOT be placed in quotes. * `RUN_BEFORE_CUSTOM_FUNCTION` - This macro determines whether the fork server should startup before or after the hooked function is called. Set it to 0 to start the fork server after the hooked function returns, or 1 to start the fork server before the hooked function is called. The deferred executable in the corpus/persist/ directory is an example of a target where deferred startup mode is advantageous. This target calls sleep at the beginning of the program, which will substantially slowdown fuzzing. Killerbeez can be instructed to wait to start the fork server until after the sleep call by modifying forkserver_config.h to set the macros as shown below: ``` #define DISABLE_HOOKING 0 #define USE_LIBC_START_MAIN 0 #define CUSTOM_FUNCTION_NAME sleep #define RUN_BEFORE_CUSTOM_FUNCTION 0 ``` ## Source Code Instrumentation If source code is available, the target program can be modified to explicitly start the fork server by calling the `KILLERBEEZ_INIT()` macro at the desired point in the target program. In order to compile the instrumented source code, it must include the forkserver.h header file (so that the `KILLERBEEZ_INIT` macro is defined) and the linker arguments must be modified to link against the fork server library. Once the target program's source code is modified, the `DISABLE_HOOKING` macro in the forkserver_config.h file should be set to 1. This ensures the forkserver does not also try to hook a function to startup. The deferred_nohook executable in the corpus/persist/ directory shows an example of using source code instrumentation to enable deferred startup mode. ================================================ FILE: docs/Server.md ================================================ # Client/Server Operation using BOINC ## Installation ### Server (Tested on Ubuntu 16.04) 1. Install required software ``` sudo apt update && sudo apt install git m4 pkg-config autoconf libtool \ libssl-dev libmysqlclient-dev libcurl4-openssl-dev python apache2 \ mysql-server python-mysqldb haveged php libapache2-mod-php php-mysql \ php-xml-parser curl python-requests python-virtualenv unzip ``` * You will be asked to set a password for mysql root user 2. Get code ``` sudo mkdir /usr/local/killerbeez sudo chown $USER /usr/local/killerbeez sudo chmod a+rx /usr/local/killerbeez umask 0022 cd /usr/local/killerbeez git clone --recursive https://github.com/grimm-co/killerbeez.git ``` 3. Build code ``` cd killerbeez/server/boinc ./_autosetup ./configure --disable-client --disable-manager make ``` 4. User permissions ``` sudo useradd -m -s /bin/bash boincadm sudo usermod -a -G boincadm www-data sudo -u boincadm sh -c 'echo umask 0007 >> /home/boincadm/.bashrc' sudo sh -c 'echo umask 0007 >> /etc/apache2/envvars' sudo chgrp boincadm /usr/local/killerbeez sudo chmod g+w /usr/local/killerbeez ``` * Note: the new user doesn't have sudo access, so continue using your normal account for the remaining instructions except when indicated. 5. MySQL setup (make sure to select your own password) ``` mysql -u root -p mysql> CREATE USER 'killerbeez'@'localhost' IDENTIFIED BY ''; mysql> GRANT ALL ON killerbeez.* to 'killerbeez'@'localhost'; ``` ``` sudo sh -c 'echo sql_mode="ONLY_FULL_GROUP_BY,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION" >> /etc/mysql/mysql.conf.d/mysqld.cnf' sudo systemctl restart mysql ``` 6. Apache setup ``` sudo a2enmod cgi sudo systemctl restart apache2 ``` 7. Project setup (run as the boincadm user) 1. First, open a shell as the boincadm user for the rest of this step ``` sudo -i -u boincadm ``` 2. Determine a URL that points to this server. If accessed in a browser, this URL should display the default Apache "It works!" page. If this will be a public instance, take a look at [these guidelines](https://boinc.berkeley.edu/trac/wiki/MasterUrl#ChoosingaprojectURL). * `export BOINC_URL=` 3. Create the BOINC project for Killerbeez (using the passord from step 5 above) ``` cd /usr/local/killerbeez/killerbeez/server/boinc tools/make_project --url_base $BOINC_URL \ --db_user killerbeez --db_pass '' killerbeez ``` * Enter Y at the prompt. 4. Set up cron, load platforms into the database, and set a password to access the admin UI (admin username does not need to be boincadm) ``` cd ~/projects/killerbeez crontab killerbeez.cronjob bin/xadd htpasswd -c html/ops/.htpasswd ``` 5. Edit `html/project/project.inc` to set PROJECT and COPYRIGHT_HOLDER correctly 6. Install killerbeez-specific files: ``` cp /usr/local/killerbeez/killerbeez/server/*.py bin cp -r /usr/local/killerbeez/killerbeez/server/skel . ``` 7. We need the killerbeez binaries to run on the target system. Download the latest [release](https://github.com/grimm-co/killerbeez/releases) for the target platform and place it in the `skel/` directory 8. The Killerbeez fuzzer binary does not use the BOINC API, however BOINC has a wrapper which wraps an executable and deals with all the BOINC-specific stuff. This allows any application to be leveraged by BOINC. We need this wrapper program for all platforms which we intend to support. Extract the wrapper binary (or see the [build instructions](#wrapper) to build your own copy): For 64-bit Windows: ``` unzip -j -d skel/windows_x86_64 skel/windows_x86_64/killerbeez-x64.zip \ '*wrapper_26014_windows_x86_64.exe' ``` For 64-bit Linux: ``` unzip -j -d skel/x86_64-pc-linux-gnu \ skel/x86_64-pc-linux-gnu/killerbeez-Linux.zip */wrapper ``` 9. If you want to support MacOS, you will need to [build the wrapper yourself](#wrapper), as the versions on the [BOINC wiki](https://boinc.berkeley.edu/trac/wiki/WrapperApp) are too old. 10. Enable the project ``` bin/start ``` 11. Done running as boincadm ``` exit ``` 8. Install Apache config for project ``` sudo cp /home/boincadm/projects/killerbeez/killerbeez.httpd.conf /etc/apache2/sites-available/killerbeez.conf sudo sed -i -e '/Order /d' -e 's/Deny from all/Require all denied/' \ -e 's/Allow from all/Require all granted/' \ /etc/apache2/sites-available/killerbeez.conf sudo a2ensite killerbeez sudo systemctl reload apache2 ``` 10. Start the Killerbeez API server ``` sudo -i -u boincadm cd /usr/local/killerbeez virtualenv -ppython3 killerbeez-venv source killerbeez-venv/bin/activate cd /usr/local/killerbeez/killerbeez/python/manager pip install -r requirements.txt python server.py -create # Remove -create if restarting server ``` This will start up the server on port 5000. In the instructions below, we will call the URL pointing to this port `$API_URL`. ### Client Next we need to set up at least one client. Follow these instructions first, then the operating system specific instructions below. If there aren't any instructions for your operating system, you're on your own (when if you figure it out, we accept pull requests for improved documentation :-)). 1. Create an account via BOINC webpage (`$BOINC_URL/killerbeez/create_account_form.php`) #### Linux (Ubuntu) 1. sudo apt install boinc-client 2. Get an account key using: `boinccmd --lookup_account $BOINC_URL/killerbeez/ ` 3. `boinccmd --project_attach $BOINC_URL/killerbeez/ ` #### Windows Note: only Windows 10 x64 is currently supported 1. Log into the BOINC webpage (`$BOINC_URL/killerbeez`) 2. GUI instructions to add project: 1. Go to Project > Join on website 2. If client not installed, install it 3. Select "Add project" in GUI 4. Enter project URL from webpage 5. Enter email address and password of the account you registered in step 1 ## Administration All administration of the BOINC server which is done from the command line should be done as the "boincadm" user. You can use `sudo -i -u boincadm` to drop to an interactive shell with this user. ### Add a target Killerbeez jobs have a "target", which represents a target program running on a certain platform. BOINC needs to be told about these targets, and the platforms they can run on. The [add_target.py](../server/add_target.py) tool will create a BOINC "app" for each target/platform combination. These can be customized to install a new application, but the default configuration will allow fuzzing anything that is installed on client machines already, such as Windows Media Player. 1. Create the target A list of platforms (operating/cpu architecture) can be found on the [BOINC Wiki](https://boinc.berkeley.edu/trac/wiki/BoincPlatforms). ``` cd ~/projects/killerbeez/ bin/add_target.py [ ...] ``` * Example: `bin/add_target.py wmp windows_x86_64` 2. If you need any additional files in this app, put them in the app dir (`apps/_/1/`). At a minimum, you will probably want to put the executable there (unless the assumption is that it is already on the clients' computers). 3. Add an entry for each added file to the `apps/_/1//version.xml` file, as in the following example: ``` myfile.exe myfile.exe ``` The physical name is the actual filename you added, while the logical name is the name it will be given on the client system. `` ensures that the real file is placed at that name rather than a BOINC-specific "link" file. See the [BOINC docs](https://boinc.berkeley.edu/trac/wiki/AppVersionNew) for more information on configuring your app. 4. Finalize the app creation ``` bin/update_versions ``` 5. If you need to make changes to your app, you can do so by creating a new app version. Keep in mind that BOINC files are immutable, so if you change a file you must also change its filename. The following is a good workflow: 1. In the `apps/_//` directory, change whatever files you need to update. 2. Add a version number to the names of updated files (e.g. myfile.zip.1 or myfile.1.exe) 3. Edit `apps/_///version.xml` with the new physical names of all files. 4. Rename the whole `apps/_/` directory to `apps/_/`. 5. Run `bin/update_versions`. Supporting a new platform is kind of a big deal. If you want to take a stab at it, take a look at the files in the [skel](../server/skel) directory and the comments in add_target.py to get started. You will probably want to have the [BOINC wiki](https://boinc.berkeley.edu/trac/wiki) loaded up, especially if you're not already familiar with how the implementation details of BOINC. ### Submit job Customize the [boinc_submit.py](../server/boinc_submit.py) script for your desired job by changing the constants at the top, and the job parameters in the requests. The constants are: * `PROJECT` - The URL for the Killerbeez API (`$API_URL`) * `SEED` - A string to be used as the contents of the seed file Run the script, and it will print out the ID of the submitted job. ### View job results The results of a job can be accessed via the killerbeez API. Let `$JOB_ID` be the ID of the job of interest: ``` curl $API_URL/api/job/$JOB_ID/results ``` For example: ``` $ curl http://localhost:5000/api/job/37/results [{"result_type": "crash", "repro_file": "3fb/38D1F16F84B07DBF6B29861BD7464ABA", "job_id": 37, "result_id": 1}, {"result_type": "crash", "repro_file": "235/97A596BE0B77EF1B6899503300B205FE", "job_id": 37, "result_id": 2}] ``` The output is a JSON list of all the job's results. Each result is a JSON object, in which the result_type describes why this result is interesting (crash? hang? new_path?), and the repro_file is the path of a file with the input that caused the crash, hang, or new path. To retrieve the crashing input: ``` curl $BOINC_URL/killerbeez/download/$repro_file ``` For example: ``` $ curl http://localhost:80/killerbeez/download/3fb/38D1F16F84B07DBF6B29861BD7464ABA -215005474723783 ``` You may be interested to know what seed file these results were mutated from. The path of the seed file is available in the job information: ``` curl $API_URL/api/job/$JOB_ID ``` For example: ``` $ curl http://localhost:5000/api/job/37 {"mutator": null, "end_time": null, "status": "completed", "input_ids": [], "instrumentation_type": null, "seed_file": "ab/jf_2f79d50c5c10004b755b4622555e99f5", "job_type": null, "mutator_state": null, "driver": null, "assign_time": null, "job_id": 37} ``` When jobs are submitted automatically, the seed file may be the result of some other job, in which case it's possible to find the job that produced it by searching results by repro_file: ``` curl $API_URL/api/results?repro_file=$repro_file ``` For example: ``` $ curl http://localhost:5000/api/results?repro_file=235/97A596BE0B77EF1B6899503300B205FE [{"result_type": "crash", "repro_file": "235/97A596BE0B77EF1B6899503300B205FE", "job_id": 37, "result_id": 2}] ``` This gives you the job ID of the job that produced the file, allowing you to trace its ancestry. ### Set up account with administrator access This step is only needed if you are going to create an account that will submit jobs directly to BOINC (see next section). You do not need administrator access to submit jobs via the Killerbeez API server. 1. Administrator must register an account via BOINC webpage (`$BOINC_URL/killerbeez/create_account_form.php`) 2. Log into site and go to Project > Account to find User ID 3. On server: ``` sudo -i -u boincadm cd ~/projects/killerbeez bin/manage_privileges grant all` ``` ### Set up account with direct BOINC job submission privileges You only need to do this if you want to submit jobs directly to BOINC (most likely for testing). Submitting jobs via the Killerbeez API server does not require this. To perform these steps you must have administrator access (see previous section). However, as an administrator you can grant job submission privileges to any account, administrator or otherwise. 1. Go to `$BOINC_URL/killerbeez/manage_project.php` 2. Click name of the user to grant privileges to 3. Select "All apps" 4. Click "Ok". ## Optional build instructions ### Wrapper The BOINC [wrapper](https://boinc.berkeley.edu/trac/wiki/WrapperApp) is an adapter that lets programs be run unmodified under the BOINC client. Our usage of the wrapper requires features and bugfixes that are more recent than the last released wrapper binary. Our binary release inlcudes a compiled copy of the wrapper, as described above, but you can build your own with the following steps: 1. On a Windows machine, install [git](https://git-scm.com/downloads) and [Visual Studio Community 2013](https://visualstudio.microsoft.com/vs/older-downloads/) 2. Start Git Bash and clone the [BOINC repository](https://github.com/BOINC/boinc) 3. Open `win_build\boinc_vs2013.sln` from the cloned repo in Visual Studio 2013 4. Click the `wrapper` project in the Solution Explorer 5. Select `x64` from the Platforms drop-down in the toolbar, and `Release` from the Configurations drop-down next to it. 6. From the `BUILD` menu, select `Build wrapper` 7. The compiled binary should be in `win_build\Build\x64\Release\wrapper_26014_windows_x86_64.exe` 8. Drop this binary into the `/home/boincadm/projects/killerbeez/skel/windows_x86_64` directory on your killerbeez server ================================================ FILE: docs/api/Makefile ================================================ .PHONY=all clean TYPESETTER=pdflatex all: api.pdf clean: rm -f *.out *.aux *.toc *.out *.log *.pdf api.pdf: api.tex defines.tex api_mutator.tex api_driver.tex \ api_instrumentation.tex coverpage.tex ${TYPESETTER} api.tex # first time for the ToC ${TYPESETTER} api.tex # second time for the real deal ================================================ FILE: docs/api/README.txt ================================================ # API Documentation This directory contains all the documentation for the KILLERBEEZ API. This means it includes the documentation for the mutator API, even though the mutators are kept in a separate repository. Mutator specific documentation, however, is kept in the mutator's repository. # Building The included Makefile can be used to build the api.pdf documentation by running the command `make` from this directory. The documentation is written in [LaTeX](https://www.latex-project.org/) and uses `pdflatex` to compile. On Ubuntu a working LaTeX environment can be setup with the following command: ``` sudo apt install okular texlive texlive-full texinfo texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended latex2rtf pandoc ``` On macOS, a working LaTeX environment can be setup using the [MacTeX package](https://tug.org/mactex/mactex-download.html) or via homebrew. On Windows, a working LaTeX environment can be setup via the [MiKTeX project](https://miktex.org/). ================================================ FILE: docs/api/api.tex ================================================ \documentclass{article} \input{defines} \input{packages} \input{coverpage} \title{\killerbeez{} API} \author{GRIMM} \date{\apiDate{}} \begin{document} % Nice cover page \thispagestyle{empty} \maketitle \newpage % Table of Contents \tableofcontents \newpage \section{Overview} This document will cover the version \apiVersion{} API for each module, along with a quick high-level summary of what it does. \par The APIs are all specified in C, as this provides a consistent language and is explicit about data types which means there's no need for a separate Python specification. The C code is frequently wrapped with Python (via ctypes), but modules are typically written in C code as they run considerably faster when it is all native code. \section{Manager} The manager is what coordinates a fuzz job. It decides which seeds to use, which mutators to run and sends this information to the client, which kicks off one or more Main Fuzzers. The client will then also handle getting the results back to the manager. \section{Fuzzer} This will run many iterations of a single seed and a single mutator against a target program. For efficiency, this will be run on the same computer (which means same O/S) as the target binary. This component will be an executable that the manager executes on each of the target systems. The arguments for this function are defined in the usage function of the fuzzer. \section{Mutator} \label{mutator} \input{api_mutator} \section{Driver} \label{driver} \input{api_driver} \section{Instrumentation} \label{instrumentation} \input{api_instrumentation} \section{Structures} \label{structures} \input{api_structures} \section{Helper Utilities} \label{helpers} \input{helpers} %\section{Database} % TODO: document me! %\section{Tracer} % TODO: document me! %\section{Input Generator} % TODO: create me and then document me! \end{document} ================================================ FILE: docs/api/api_driver.tex ================================================ The driver will be the component that runs the program being fuzzed. The driver should start the program, feed in the input, and determine when the program is done processing the input. This component may need to be customized per target application. \par Anything which is driver specific will only be used within the driver functions. All other components will treat these items as opaque strings/blobs. \api{void * create(char * options, instrumentation\_t * instrumentation, void * instrumentation\_state, mutator\_t * mutator, void * mutator\_state) }{ This function will allocate and initialize the driver structures. If the driver is going to be testing a long-running process, this function should start that process. Anything that needs to be done before a fuzzing run can start should be done here. }{ \item options - a JSON string that contains the driver specific string of options. \item instrumentation - a pointer to an instrumentation instance that the driver will use to instrument the requested program. The caller should initialize this instrumentation instance before the create call to the driver, and then free it after cleaning up the driver. This parameter is optional and can be set to NULL if the caller does not wish to use an instrumentation with the driver. \item instrumentation\_state - a pointer to the instrumentation state for the passed in instrumentation. This parameter is optional and can be set to NULL if the caller does not wish to use an instrumentation with the driver. \item mutator - a pointer to a mutator instance that the driver can use to obtain the next input (for use in the \texttt{test\_next\_input} function). This parameter is optional and can be set to NULL if the caller does not wish to use a mutator with the driver. Without this parameter, the \texttt{test\_next\_input} and \texttt{get\_last\_input} functions will be unavailable. \item mutator\_state - a pointer to the mutator state for the passed in mutator. This parameter is optional and can be set to NULL if the caller does not wish to use a mutator with the driver. \item return value - a driver specific structure or NULL on failure. The returned value should not be used for anything other than passing to the various Driver API functions. } \api{void cleanup(void * driver\_state) }{ This function will kill any processes created by the driver and clean up anything else that was created to help fuzzing. It will also free the driver state. }{ \item driver\_state - a driver specific structure previously created by the create function. This structure will be freed and should not be referenced afterwards. } \api{int test\_input(void * driver\_state, char * buffer, size\_t length) }{ This function will cause the program being fuzzed to be tested against the given input. This function should block execution until the program being fuzzed has finished processing the given input. }{ \item driver\_state - a driver specific structure previously created by the create function. \item buffer - the input that should be tested \item length - the length of the buffer argument \item return value - 0 on success, -1 on failure } \api{int test\_next\_input(void * driver\_state); }{ This function uses the mutator given during the driver creation to retrieve the next mutated input and test it against the target program. This function blocks execution until the program being fuzzed has finished processing the mutated input. It will report whether the fuzzed process crashed or hung, or neither. This function is only available if a mutator was given to the driver in the \texttt{create} function. }{ \item driver\_state - a driver specific structure previously created by the create function. \item return value - 0 on success, -1 on failure, or -2 if the mutator has run out of inputs to to mutate. } \api{void * get\_last\_input(void * driver\_state, int * length); }{ This function retrieves the most recent mutated input that was tested with the \texttt{test\_next\_input} function. This function is only available if a mutator was given to the driver in the \texttt{create} function. }{ \item driver\_state - a driver specific structure previously created by the create function. \item length - a pointer to an integer that will be set to the length of the returned buffer. \item return value - on success this function will return a buffer containing the last input that was tested, or NULL on failure. This pointer should not be freed by the caller, and is only valid until the next call to \texttt{test\_next\_input}. } \api{int help(char ** help\_str) }{ This function sets a help message for the driver. This is useful if the driver takes a JSON options string in the create() function. }{ \item help\_str - A double pointer that will be updated to point to the new help string. \item return value - 0 on success and -1 on failure } ================================================ FILE: docs/api/api_instrumentation.tex ================================================ The instrumentation modules are what track the state of a process and determine if a path through the process is new. This will include things such as QEMU (for Linux), LLVM (for source), PIN, Dynamo-RIO, Dyninst, and Intel PT. They are optionally given some state information. The state information is module-specific and is used to tell the instrumentation module which paths have been previously hit. Additionally, each instrumentation module will have a variety of configuration options that can be specified that will be specific to that instrumentation module. These options will be specified as a JSON char array. \par Anything which is instrumentation specific will only be used within the instrumentation functions. All other components will treat these items as opaque strings/blobs. The typical order of calling instrumentation functions is create(), enable(), is_process_done(), get_fuzz_result(). \par As an implementation note, all of the instrumentation modules can not have the same name for functions like ``create'' or there will be a naming collision. This is currently handled in instrumentation_factory.c by setting an instrumentation structure which maps the names listed here to the actual functions which back them for the selected instrumentation. \api{void * create(char * options, char * state) }{ This function will create and return an instrumentation struct that defines the instrumentation's state. The state argument will be used to load the previously executed paths through the fuzzed program. }{ \item options - a JSON string that contains the instrumentation specific options \item state - used to load a previously dumped state (produced by the get\_state() function), that defines the current paths seen by the instrumentation. Alternatively, NULL can be provided to start an instrumentation without a previously dumped state \item return value - an instrumentation specific structure or NULL on failure. The returned value should not be used for anything other than passing to the various Instrumentation API functions } \api{void cleanup(void * instrumentation\_state) }{ This function will release any resources that the instrumentation has open and free the instrumentation state. }{ \item instrumentation\_state - an instrumentation specific structure previously created by the create function. This structure will be freed and should not be referenced afterwards } \api{char * get\_state(void * instrumentation\_state) }{ This function will return the state information holding the previous execution path info. The returned value can later be passed to the instrumentation create() function to load the state back into an instrumentation struct. It is the caller's responsibility to free the memory allocated and returned here using the free\_state() function. }{ \item instrumentation\_state - an instrumentation specific structure previously created by the create function \item return value - a buffer that holds information about the previous execution paths as a JSON char array. } \api{void free\_state(char * state) }{ This function will free a previously dumped state (via the get\_state() function) of the instrumentation. }{ \item state - a previously dumped state buffer obtained by the get\_state() function } \api{int set\_state(void * instrumentation\_state, char * state) }{ This function will set the previous execution paths of the instrumentation. This can be used to restart an instrumentation once it has been created. }{ \item instrumentation\_state - an instrumentation specific structure previously created by the create() function \item state - a previously dumped state buffer obtained by the get\_state() function \item return value - 0 on success or non-zero on failure } \api{void * merge(void * instrumentation\_state, void * other\_instrumentation\_state) }{ This function will merge two sets of instrumentation coverage data. The resulting instrumentation state will include the tracked coverage from both instrumentation states. Both instrumentation states must have the same instrumentation options (what to track coverage of, which modules, etc.) specified, and generally need to be produced by the same instrumentation module in order for the merge to work correctly. It's possible that two different instrumentation modules may produce state information in the same format, however this is up to them and not something guaranteed by this specification. Neither argument will be modified nor freed. It is the caller's responsibility to free the memory allocated and returned here using the free\_state() function. }{ \item instrumentation\_state - an instrumentation specific structure previously created by the create() function \item other\_instrumentation\_state - a second instrumentation specific structure previously created by the create() function that should be merged with the first \item return value - an instrumentation specific structure that combines the coverage information from both of the instrumentation states or NULL on failure } \api{int enable(void * instrumentation\_state, void * process, char * cmd\_line, char * input, size\_t input\_length) }{ This function will enable the instrumentation module for a specific process and runs that process. If the process needs to be restarted, it will be. }{ \item instrumentation\_state - an instrumentation specific structure previously created by the create() function \item process - a pointer to a handle for the process on which the instrumentation was enabled. On Windows, this is a HANDLE*, on UNIX-based systems it is a pid_t* \item cmd\_line - the command line of the fuzzed process on which to enable instrumentation \item input - pointer to the buffer containing the input data that should be sent to the fuzzed process \item input\_length - the length of the input parameter \item return value - 0 on success, non-zero on failure } \api{int is\_new\_path(void * instrumentation\_state) }{ This function will determine whether the process being instrumented has taken a new path. It should be called after the process has finished processing the tested input. If \texttt{is\_new\_path} is called prior to \texttt{enable}, it will return failure, as the fuzzing of processes has not been started yet. }{ \item instrumentation\_state - an instrumentation specific structure previously created by the create() function \item return value - 1 if the previously setup process (via the enable() function) took a new path, 0 if it did not, or -1 on failure } \api{int get\_fuzz\_result(void * instrumentation\_state) }{ This function will return the result of the fuzz job. It should be called after the process has finished processing the tested input, i.e. after a successful \texttt{is\_process\_done}. If \texttt{get\_fuzz\_result} is called prior to \texttt{enable}, it will return failure, as the fuzzing of processes has not been started yet. Because this is only called once the fuzzing is done, it will generally clean up resources such as reaping the target process. }{ \item instrumentation\_state - an instrumentation specific structure previously created by the create() function \item return value - either \texttt{FUZZ\_NONE}, \texttt{FUZZ\_HANG}, \texttt{FUZZ\_CRASH}, or \texttt{FUZZ\_ERROR} on error. } \api{int get\_module\_info(void * instrumentation\_state, int index, int * is\_new, char ** module\_name, char **info, int size) }{ This function is optional and not required for the fuzzer to work. It can be used to obtain coverage information for each executable/library separately. This function returns information about each of the separate modules (shared libraries such as .dll, .so, .dynlib). If \texttt{get\_module\_info} is called and requests the \texttt{is\_new} or \texttt{info} parameters prior to \texttt{enable} being called, it will return failure, as the fuzzing of processes has not been started yet. }{ \item instrumentation\_state - an instrumentation specific structure previously created by the create() function \item index - an index into the module list for the module about which information should be retrieved. The return value will indicate if a module exists for this index. Indices start at 0 and increase from there \item is\_new - This parameter returns whether or not the last run of the instrumentation returned a new path for the module with the specified index. In order for the information returned in this parameter to be accurate, the is\_new\_path method should be called first. This parameter is optional and can be set to NULL, if you do not want this information \item module\_name - This parameter returns the filename of the module at the specified index. This parameter is optional and can be set to NULL, if you do not want this information. This parameter should not be freed by the caller \item info - This parameter returns the per-instrumentation path info for the module with the specified index. For example, for the DynamoRIO module, the returned info is an AFL style bitmap of the edges. This parameter is optional and can be set to NULL, if you do not want this information. This parameter should not be freed by the caller \item size - This parameter returns the size of the per-instrumentation path info in the returned info parameter. This parameter is optional and can be set to NULL, if you do not want this information \item return value - non-zero if module with the specified index cannot be found, or 0 if it is found } \api{instrumentation\_edges\_t * get\_edges(void * instrumentation\_state, int index) }{ This function is optional and not required for the fuzzer to work. It is used by the tracer. This function returns an array of basic block edges that occurred in the most recent run of the instrumentation. If \texttt{get\_edges} is called prior to \texttt{enable}, it will return failure, as the fuzzing of processes has not been started yet. }{ \item instrumentation\_state - an instrumentation specific structure previously created by the create() function. \item index - If per-module instrumentation information is enabled, this parameter is an index into the module list for the module about which edges should be retrieved. The return value will indicate if a module exists for this index. Indices start at 0 and increase from there. If per-module instrumentation information is NOT enabled, then this parameter is ignored and the general edges array will be returned. \item return value - NULL if an array of basic block edges was not tracked for the most recent instrumentation run or per-module instrumentation is enabled and the requested index was not found. Otherwise, an instrumentation\_edges\_t pointer that contains an array of basic block edges that were hit in the most recent instrumentation run. The returned pointer should be freed by the caller. } \api{int is\_process\_done(void * instrumentation\_state) }{ This function is used by the driver to determine if an instrumented process has finished being fuzzed. If the process is done being fuzzed, then \texttt{get\_fuzz\_result} is ready to be called. If \texttt{is\_process\_done} is called prior to \texttt{enable}, it will return failure, as the fuzzing of processes has not been started yet. }{ \item instrumentation\_state - an instrumentation specific structure previously created by the create() function. \item return value - 0 if the process has finished being fuzzed, 1 if not, -1 on error. } \api{int help(char ** help\_str) }{ This function sets a help message for the instrumentation. This is useful if the instrumentation takes a JSON options string in the create() function. }{ \item help\_str - A double pointer that will be updated to point to the new help string. \item return value - 0 on success and -1 on failure } ================================================ FILE: docs/api/api_mutator.tex ================================================ The mutator modules are what actually mutate the seed buffers. These would include things like a bit flipper, byte munger and so forth. They are given an input buffer and optionally some state information. The state information is module-specific and allows the mutator to pick up where it left off. For example, the bit flipper mutator module, which simply flips one bit in the input buffer, would just need to record what bit to flip as their state. On the other hand, more complicated mutators may need to keep track of more information. Additionally, each mutator will have a variety of mutator specific configuration options that can be specified. Both the mutator state and options will be specified as JSON char arrays. \par Anything which is mutator specific will only be used within the mutator functions. All other components will treat these items as opaque strings/blobs. \api{void init(mutator\_t * m) }{ This function fills in m with all of the function pointers for this mutator. % TODO Put this note in a sidebar Note: This function only appears when compiled as a module. When ALL\_MUTATORS\_IN\_ONE is defined, this function will not exist, as there would be a name collision with all the other init() functions from other modules and there will not be any need for obtaining this struct, as all the functions will just be called directly. It's just the code which uses modules which will want to use this struct. ALL\_MUTATORS\_IN\_ONE being defined will cause all the other functions to have the name of the mutator and an underscore prepended. This means that the create() function will be called bit\_flip\_create() in the bit flipper mutator. The name of the mutator is defined by MUTATOR\_NAME. }{ \item m - a pointer to a mutator\_t structure that will be filled in with the function pointers that define this mutator. \item return value - none } \api{void * create(char * options, char * state, char * input, size\_t input\_length) }{ This function will allocate and initialize the mutator structure. The allocated structure will exist until the cleanup() function is called. }{ \item options - a JSON string that contains the mutator specific string of options. \item state - used to load a previously dumped state (produced by the get\_state() function), that defines the current iteration of the mutator. This will be a mutator specific JSON string. Alternatively, NULL can be provided to start a mutator without a previously dumped state. \item input - base input string which will be modified to produce mutated inputs later when the mutate() function is called \item input\_length - the size of the input buffer \item return value - a mutator specific structure or NULL on failure. The returned value should not be used for anything other than passing to the various Mutator API functions. } \api{void cleanup(void * mutator\_state) }{ This function will release any resources that the mutator has open and free the mutator state structure. }{ \item mutator\_state - a mutator specific structure previously created by the create() function. This structure will be freed and should not be referenced afterwards. } \api{ int mutate(void * mutator\_state, char * buffer, size\_t buffer\_length) }{ This function will mutate the input given in the create() function and return it in the buffer argument. The size of the buffer will be mutator specific. For example, some mutators may require this buffer to be larger than the original input (passed to the create() function) as it's going to extend the original input in some way. Other mutators will want it to be the same size. Guidance on this will be specified by the mutator specific documentation. }{ \item mutator\_state - a mutator specific structure previously created by the create() function. \item buffer - a buffer to which the mutated input will be written \item buffer\_length - the size of the passed in buffer argument \item return value - the length of the mutated data on success, 0 when the mutator is out of mutations, or -1 on error } \api{ int mutate\_extended(void * mutator\_state, char * buffer, size\_t buffer\_length, uint64\_t flags); }{ This function is identical to the \texttt{mutate} function, with the exception that it accepts a flags parameter that specifies how the mutations should be done. }{ \item mutator\_state - a mutator specific structure previously created by the create() function. \item buffer - a buffer to which the mutated input will be written \item buffer\_length - the size of the passed in buffer argument \item flags - this parameter is a bitfield that specifies how the mutations should be done. Available flags are: \begin{itemize} \item \texttt{MUTATE\_THREAD\_SAFE} - The mutator should ensure that the mutations are done in a thread safe way. If the mutator will be accessed via multiple concurrent threads, this flag should be set. \item \texttt{MUTATE\_MULTIPLE\_INPUTS} - If the mutator will be handling individual input parts, this flag should be used. For some of the fuzzer applications, it may be necessary to split the input up into separate pieces that are mutated independently. In these cases, the mutator can be given multiple inputs and asked for mutations of the input parts individually. One example user of this API is the network driver, where each input is a separate network packet sent to the target process. When this flag is set, the index of the input part to mutate should be included in the lowest 16-bits of the flags parameter. For instance, to mutate the fifth input buffer, set flags to \\ \texttt{(MUTATE\_MULTIPLE\_INPUTS | 5)}. \end{itemize} \item return value - the length of the mutated data on success, 0 when the mutator is out of mutations, or -1 on error } \api{char * get\_state(void * mutator\_state) }{ This function will return the state of the mutator. The returned value can be used to restart the mutator at a later time, by passing it to the create() or set\_state() function. It is the caller's responsibility to free the memory allocated here using the free\_state() function. }{ \item mutator\_state - a mutator specific structure previously created by the create() function. \item return value - a buffer that defines the current state of the mutator. This will be a mutator specific JSON string. } \api{void free\_state(char * state)}{ This function will free a previously dumped state (via the get\_state() function) of the mutator. }{ \item state - a previously dumped state buffer obtained by the get\_state() function. } \api{int set\_state(void * mutator\_state, char * state) }{ This function will set the current state of the mutator. This can be used to restart a mutator once from a previous run. }{ \item mutator\_state - a mutator specific structure previously created by the create() function. \item state - a previously dumped state buffer obtained by the get\_state() function. This will be a mutator specific JSON string. \item return value - 0 on success or non-zero on failure } \api{ int get\_current\_iteration(void * mutator\_state) }{ This function will return the current iteration count of the mutator, i.e. how many mutations have been generated with it. }{ \item mutator\_state - a mutator specific structure previously created by the create() function. \item return value - the number of previously generated mutations } \api{int get\_total\_iteration\_count(void * mutator\_state) }{ This function will return the total possible number of mutations with this mutator. For some mutators, this value won't be possible to predict or the mutator will be capable of an infinite number of mutations. }{ \item mutator\_state - a mutator specific structure previously created by the create() function. \item return value - the number of possible mutations with this mutator. If this number can't be predicted or is infinite, -1 will be returned. } \api{void get\_input\_info(void * mutator\_state, int * num\_inputs, size\_t **input\_sizes) }{ This function will retrieve the number of inputs and the size of each input that is managed by a mutator. For most of the simple mutators, they will only be given a single input. However, some of the more complicated mutators, such as the manager mutator, will manage several input buffers and mutate them independently with the \texttt{mutate\_extended} function. This function will return the number of inputs that a mutator is mutating and the sizes of each of those inputs. }{ \item mutator\_state - a mutator specific structure previously created by the create() function. \item num\_inputs - a pointer to an integer which will be used to return the number of inputs that a mutator has. \item input\_sizes - a pointer to a size\_t array that will be used to return the size of each of the inputs. } \api{int set\_input(void * mutator\_state, char * new\_input, size\_t input\_length) }{ This function will set the input (saved in the mutator's state) to something new. This can be used to reinitialize a mutator with new data, without reallocating the entire state struct. }{ \item mutator\_state - a mutator specific structure previously created by the create() function. \item new\_input - The new input used to produce new mutated inputs later when the mutate() function is called \item input\_length - the size in bytes of the input buffer. \item return value - 0 on success and -1 on failure } \api{int help(char ** help\_str) }{ This function sets a help message for the mutator. This is useful if the mutator takes a JSON options string in the create() function. }{ \item help\_str - A double pointer that will be updated to point to the new help string. \item return value - 0 on success and -1 on failure } ================================================ FILE: docs/api/api_structures.tex ================================================ This section describes the structures used throughout the API. For each of the top level components, there is a structure which defines the available functions in that component. This allows for a common interface among all of the available implementations of a component. \vbox{\lstinputlisting[ label={lst:mutatort}, caption={\texttt{mutator\_t} struct definition}, language=C, captionpos=b ]{files/mutator_t.c} } The \texttt{mutator\_t} structure, shown in Listing \ref{lst:mutatort}, defines all of the common interfaces for each mutator. The definitions of each of the function pointers in the \texttt{mutator\_t} structure is described in Section \ref{mutator}. \vbox{\lstinputlisting[ label={lst:drivert}, caption={\texttt{driver\_t} struct definition}, language=C, captionpos=b ]{files/driver_t.c} } The \texttt{driver\_t} structure, shown in Listing \ref{lst:drivert}, defines all of the common interfaces for each driver. The definitions of each of the function pointers in the \texttt{driver\_t} structure is described in Section \ref{driver}. The last field in the struct, \texttt{state} holds a pointer to the implementation specific driver state structure. This field should only be used when calling each of the driver's functions. \vbox{\lstinputlisting[ label={lst:instrumentationt}, caption={\texttt{instrumentation\_t} struct definition}, language=C, captionpos=b ]{files/instrumentation_t.c} } The \texttt{instrumentation\_t} structure, shown in Listing \ref{lst:instrumentationt}, defines all of the common interfaces for each instrumentation. The definitions of each of the function pointers in the \texttt{instrumentation\_t} structure is described in Section \ref{instrumentation}. \vbox{\lstinputlisting[ label={lst:instrumentationedget}, caption={\texttt{instrumentation\_edge\_t} struct definition}, language=C, captionpos=b ]{files/instrumentation_edge_t.c} } The \texttt{instrumentation\_edge\_t} structure, shown in Listing \ref{lst:instrumentationedget}, is used to return a list of basic block addresses which make up each edge in the fuzzed program's path. This list is returned from the \texttt{get\_edges} method as described in Section \ref{instrumentation}. ================================================ FILE: docs/api/coverpage.tex ================================================ \makeatletter \def\@maketitle{ \begin{center} {\Huge \bfseries \sffamily \@title }\\[3ex] {\Large Version \apiVersion{}}\\[3ex] {\Large \@date}\\[8ex] \includegraphics{killerbeez-logo.png} \end{center} } \makeatother ================================================ FILE: docs/api/defines.tex ================================================ % 1 = definition, 2 = description, 3 = args/return value \def\api#1#2#3{ \bigskip % Can't make \texttt bold... % Maybe use \lstinputlisting ? \texttt{#1} \par #2 \par \begin{itemize} #3 \end{itemize} } \def\killerbeez{Killerbeez} \def\apiVersion{0.1} \def\apiDate{2018.07.30} ================================================ FILE: docs/api/files/driver_t.c ================================================ struct driver { void (*cleanup)(void * driver_state); int (*test_input)(void * driver_state, char * buffer, size_t length); int(*test_next_input)(void * driver_state); void *(*get_last_input)(void * driver_state, int * length); void * state; }; typedef struct driver driver_t; ================================================ FILE: docs/api/files/instrumentation_edge_t.c ================================================ struct instrumentation_edge { #ifdef _M_X64 uint64_t from; uint64_t to; #else uint32_t from; uint32_t to; #endif }; typedef struct instrumentation_edge instrumentation_edge_t; ================================================ FILE: docs/api/files/instrumentation_t.c ================================================ struct instrumentation { void *(*create)(char * options, char * state); void(*cleanup)(void * instrumentation_state); void *(*merge)(void * instrumentation_state, void * other_instrumentation_state); char * (*get_state)(void * instrumentation_state); void(*free_state)(char * state); int(*set_state)(void * instrumentation_state, char * state); int(*enable)(void * instrumentation_state, HANDLE * process, char * cmd_line, char * input, size_t input_length); int(*is_new_path)(void * instrumentation_state, int * process_status); //Optional int (*get_module_info)(void * instrumentation_state, int index, int * is_new, char ** module_name, char ** info, int * size); instrumentation_edges_t * (*get_edges)(void * instrumentation_state, int index); }; typedef struct instrumentation instrumentation_t; ================================================ FILE: docs/api/files/mutator_t.c ================================================ typedef struct mutator { void * (*create)(char * options, char * state, char * input, size_t input_length); void(*cleanup)(void * mutator_state); int(*mutate)(void * mutator_state, char * buffer, size_t buffer_length); int(*mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); char * (*get_state)(void * mutator_state); void(*free_state)(char * state); int(*set_state)(void * mutator_state, char * state); int(*get_current_iteration)(void * mutator_state); int(*get_total_iteration_count)(void * mutator_state); void(*get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); int(*set_input)(void * mutator_state, char * new_input, size_t input_length); int(*help)(char **help_str); } mutator_t; ================================================ FILE: docs/api/helpers.tex ================================================ In addition to the fuzzer, a few other utilities have been created that help with the fuzzing process. This section describes these helper utilities and their role in the \killerbeez{} architecture. \subsection{Merger} The merger combines multiple sets of instrumentation data into one instrumentation state. The resulting instrumentation state will include the tracked coverage from all of the input instrumentation states. This allows multiple instances of the fuzzer to share instrumentation data, and ignore paths that the other fuzzers found. \subsection{Picker} The picker helps the user decide which libraries should be instrumented while fuzzing. This is accomplished by running the target program and recording coverage information on each of the loaded libraries. It then analyzes the coverage information for each library to determine which libraries the coverage information varies based on the input file. These libraries are most likely the ones that process the input file, and thus the most likely targets for fuzzing. \subsection{Tracer} The tracer runs a target program in an instrumented state and records the entire set of basic block transitions that the process makes. This process will typically be slower than the instrumentations used during fuzzing. The full list of basic block transitions that a target process makes when parsing a given input is needed for advanced corpus management techniques. ================================================ FILE: docs/api/packages.tex ================================================ \usepackage{textcomp} \usepackage{listings} \lstset{basicstyle=\ttfamily\small,showstringspaces=false,upquote=true} \usepackage{graphicx} ================================================ FILE: docs/paper/Makefile ================================================ .PHONY=all clean TYPESETTER=pdflatex all: killerbeez.pdf clean: rm -f *.out *.aux *.toc *.out *.log *.pdf killerbeez.pdf: killerbeez.tex packages.tex introduction.tex references.tex \ implementation.tex overview.tex future_work.tex \ background.tex related_work.tex ${TYPESETTER} killerbeez.tex ${TYPESETTER} killerbeez.tex # do it twice for the bibliography ================================================ FILE: docs/paper/abstract.tex ================================================ \textbf{\textit{Abstract}}: The trend of people increasingly relying on software has continued for several decades and shows no sign of abating. Businesses rely on Windows and the applications which run on it, servers are typically some type of UNIX system, and Apple computers are gaining popularity as of late. The desire for these systems to be stable and resilient to attacks drives the need to find software errors which may compromise them. Many improvements have been made in the field of software testing, with one of the popular ones being fuzz testing, or fuzzing for short. Unfortunately, the implementation details make it difficult to compare or combine different methods, while others are only available for specific operating systems, or limited to cases where source code is available. Killerbeez intends to pull these technologies together and allow them to interoperate. It is scalable, supports multiple operating systems, is extensible, and will have support for testing both kernel and user space applications. Killerbeez's goal is to measure the effectiveness of various fuzzing techniques in a variety of situations so the optimal solution can be applied. ================================================ FILE: docs/paper/acknowledgments.tex ================================================ This research was started through an internal research program funded at GRIMM. The authors would like to thank Brian Schanbacher, Ian Klatzco, and Tommy Chin for their support and contributions. Additionally, the authors would like to thank the many security researchers who have open sourced their prior-research, which eases the integration into Killerbeez. ================================================ FILE: docs/paper/background.tex ================================================ There are a huge number of fuzzing tools\cite{afl,aflosx,winafl,peach22,syzkaller,ossfuzz,driller,radamsa,ni,zzuf,synfuzz,brundlefuzz,honggfuzz,kafl,vuzzer,boofuzz} which are publicly available, many of which are very useful on real-world binaries. However, each tool typically only handles one very specific use case or contains other real-world limitations, and most are not designed to scale out. For instance, there are a number of fuzzers targeting kernel system calls\cite{syzkaller,trinity,kafl,osxfuzz}, others for fuzzing \IOCTLs{}\cite{ioctlfuzzer,ioctlbf}, and others for fuzzing userland Linux targets that only effectively work with command line programs written in C-like languages\cite{afl}. Some only function with source code, only work on 32-bit Linux\cite{vuzzer}, or require users to manually specify the data format which the target is expecting.\cite{peach,boofuzz} Finally, there is a category of tools which work amazingly well on tiny example programs but do not work on production software due to bugs or lack of support for features such as multithreading.\cite{grimmdriller,angrissues} Lack of compatibility with production software is typically not viewed as an issue in academic work, as the problem to address can be scoped based on the tools that are available; one may assume that the problem can be solved in other situations, but leave the proof for future work. The researchers are typically correct in their assumption, however practitioners need tools that work in practice, not theoretical solutions. In industry, the situation is dictated by the target software, and there is often not a choice as to the implementation's programming language, whether source code is available, what operating system it runs on, or which CPU architectures it supports. This leaves the security professional to choose between the limited set of tools that can handle the specific requirements of their target, many of which will turn out to be mutually exclusive. This lack of available tools also creates a new problem, as a common response to this dilemma is to put together a custom tool which meets their needs, and to do so in the shortest amount of time possible. Furthermore, this results in the same code being re-written for different platforms, or sometimes for the same platform, simply because the security professional was unaware of existing implementations. The adapted tool will likely contain some of the same bugs and limitations that were in the initial version of the existing tool, which may or may not get fixed before it is abandoned. In short, while the state of security research is advancing rapidly, the tools to bring their benefits to life are sorely lacking. Though there are some fuzzing projects which come close, such as OSS-Fuzz,\cite{ossfuzz} there are not any fuzzing tools which are freely available, work on closed source applications, are easily extendable, can be run in a distributed manner, and run against Windows, Linux, and macOS applications. ================================================ FILE: docs/paper/conclusion.tex ================================================ In this paper, we present Killerbeez, a fuzzing framework that integrates many disparate research projects so that they can be used together. This framework allows for independent fuzzing improvements to be reused in a distributed, cross-platform way that they otherwise would not have been able to. Furthermore, Killerbeez allows for each new improvement to be easily evaluated against the existing components, which will expedite the evaluation of new research. The authors have released Killerbeez as open-source software\footnote{https://github.com/grimm-co/killerbeez} in the hopes that it will allow for the quicker adoption and evaluation of new security research, resulting in a more efficient vulnerability detecting system. ================================================ FILE: docs/paper/data/Makefile ================================================ all: image image: gnuplot < picker.gnuplot clean: rm -f picker.svg ================================================ FILE: docs/paper/data/picker.gnuplot ================================================ set terminal pngcairo background "#ffffff" enhanced fontscale 1.0 dashlength 2 set output '../picker.png' set title "Detected Non-Detemrinistic Basic Block Transitions in\nWindows Media Player Libraries" set key right center set xlabel '{/Helvetica-Oblique Number of Executions}' font 'arial,10' set ylabel '{/Helvetica-Oblique Non-deterministic Basic Block Transitions Detected}' font 'arial,10' set linetype 1 dt 1 set style line 1 lt 1 lw 2 set linetype 2 dt 2 set style line 2 lt 2 lw 2 set linetype 3 dt 3 set style line 3 lt 3 lw 3 set linetype 4 dt 4 set style line 4 lt 4 lw 2 plot "data/AudioSes.dll" title 'AudioSes.dll' with lines ls 2, \ "data/MFPlat.dll" title 'MFPlat.dll' with lines ls 1, \ "data/MSIMG32.dll" title 'MSIMG32.dll' with lines ls 3, \ "data/WindowsCodecs.dll" title 'WindowsCodecs.dll' with lines ls 4 ================================================ FILE: docs/paper/future_work.tex ================================================ The short-term focus will be to incorporate existing technologies from other projects and get them integrated with Killerbeez and running on all the supported platforms. There is ample research, tools, and techniques available now which have yet to be applied in different domains. Once the state of the art has been incorporated, more automation will be the next priority. Expanding the portability of the instrumentation modules so they work on more operating systems is a high priority. The \IPT{} module currently only works on Linux, but should be able to work on Windows now that Windows \IPT{} driver support has improved. The reverse is true for DynamoRIO, which currently works on Windows but should be able to be ported to run on macOS\footnote{DynamoRIO's support for macOS is a work in progress.} and Linux without too much difficulty. Pulling in more instrumentation technologies such as Intel's Pin, and Dyninst~\cite{dyninst} is also on the list of future work. Wrapping up the future enhancements to instrumentation is making more instrumentations aware of the non-deterministic portions of code which were identified by the Picker. Currently, only the DynamoRIO instrumentation can use this information, but it should be easy to extend this to all of the \AFL{}-bitmap compatible instrumentation modules. This includes the \AFL{} module and the to-be-written Pin and Dyninst modules. The \IPT{} module will not be able to use this data because it does not actually decode the \TNT{} and \TIP{} bitstreams to determine what code is being executed. Adding real-time parsing would slow down the target software significantly, however, this would still be considered if the benefit of handling non-deterministic code appears to be worth the additional overhead which would be incurred. Finally, adding in the Linux-only instrumentation technologies from Honggfuzz~\cite{honggfuzz} is planned, to get the performance boost on software which runs on Linux, especially that which is closed source or not written in either C or C++. Expanding the selection of drivers to include the ability to monitor dialog box pop-ups on Windows is an area of interest. There are already drivers for the more common input methods, such as files, \stdin{}, and network data, however these should also be expanded to cover kernel functions via syscalls, drivers via \IOCTLs{}, \IPC{} messages, etc. This will make Killerbeez a fuzzer which can handle not only applications on multiple operating systems, but also the kernel of multiple operating systems as well. Currently, this is only theoretically possible with Killerbeez, which is not much help to researchers in the field. Making it supported without any development or modifications will be a big step in helping industry researchers analyze operating system kernels. The mutation algorithms from several other projects have already been integrated with Killerbeez, however it is desirable to have the ability to write mutators in Python. This will allow pulling in mutators from projects like BrundleFuzz~\cite{brundlefuzz} as well as quickly putting together custom target-specific mutators without needing to learn the Killerbeez mutator \API{}, nor even needing a compiler. In addition to the aforementioned modules on the client side, there are a number of algorithms to pull in from academic publications. This includes the seed energy rating and power schedules from AFLFast~\cite{aflfast} and AFLGo.\cite{aflgo} FairFuzz presents another seed selection algorithm to bias seed files toward code segments which are not often executed.\cite{fairfuzz} The algorithm from PerfFuzz~\cite{perffuzz} can be incorporated into Killerbeez to find algorithmic complexity vulnerabilities. The research on using estimators and extrapolators to determine if a fuzzing campaign should be stopped, or continue running, can be integrated from Pythia.\cite{pythia} Instrumentation modules can be updated to implement the collision resistant algorithm from CollAFL.\cite{collafl} Angora is also on the list of techniques to integrate into Killerbeez, however it is at the end of the queue of improvements due to it needing to be completely re-implemented on account of the authors never releasing the code.\cite{angora} The need to obtain a wide variety of seed values is a weakness of Killerbeez as well as many other fuzzers. The quality of the starting corpus makes a big difference in the efficiency of fuzzing. This issue will be addressed by leveraging existing tools which find new inputs more efficiently, or are able to find inputs which reach code that is unlikely to be hit via mutation. There are a number of security analysis and fuzzing tools which do not fit into mutators, drivers, nor instrumentation modules. This includes input generation tools, such as Driller~\cite{driller} and Synfuzz~\cite{synfuzz}. Integrating Driller can be accomplished by simply scheduling \BOINC{} jobs which run Driller instead of the fuzzer component. The input to the process is still the executable and the code which has been covered thus far, and the output is still inputs which cause new code to be executed, so this will work perfectly with the current system. This allows the \BOINC{} code to be leveraged to handle things like scheduling tasks, dealing with worker nodes which time out, and the campaign manager can deal with deciding how much time to spend drilling versus fuzzing. Preparing a Driller environment which can be easily deployed will require a bit of work, however this should only need to be done one time, not once for each fuzzing target. Pulling in inputs generated with Synfuzz should be even easier, as the only \REST{} \API{} which should be required is the ability to add files to the corpus. Some automation of setting up Synfuzz may also be possible, but it requires an oracle to determine which inputs are valid or not, which will likely mean it will need to be set up manually, short of new research that allows autonomously detecting such oracles and hooking the appropriate functions. Finally, expanded support for fuzzers other than the Killerbeez fuzzer is planned. With the client/server architecture which was chosen, there is no reason Killerbeez has to be limited to a single fuzzer. It could easily run Honggfuzz, \AFL{}, WinAFL, or others. Doing so will require additional configuration in \BOINC{} to ensure the previous state is sent to the \BOINC{} clients, as well as a new \BOINC{} assimilator to gather the results. % Not sure if we want to take on the things below or not, but if so they will % be in the far future. % Unsolved problems: % How to choose targets % Where to get seeds? % Search the web, very ad-hoc and manual process % Avoiding the easy-to-find crashes to get to the more interesting ones % Could be done with smarter input generators/mutators & automated static/dynamic analysis % Targets which include checksums, compression or encryption % Typical solution, modify the source/executable to remove those checks % Room for improvement: % Detecting non-crashing errors (especially without source code for the target) ================================================ FILE: docs/paper/implementation.tex ================================================ Killerbeez is an interoperable fuzzing architecture that is environment- and platform-independent. It consists of driver, instrumentation, and mutator modules on the client side, a tracer which obtains accurate code coverage and a picker which determines which code should be instrumented. Scalability is achieved by using a \BOINC{} server to allow multiple client nodes to fuzz in parallel. Clients simply obtain work from and return results to the server. While there are some novel improvements in Killerbeez, the primary benefit is getting many existing tools to work together and run on new platforms. Each tool pulled into Killerbeez was the best in class on its own, but when combined with others, the value is more than the sum of its parts. \subsection{Driver} \label{Driver} Driver modules use a mutator module to mutate an input, an instrumentation module to trace the target's execution, and are responsible for getting the input data into the target. A simple example is a file-based driver, which will create a file containing the mutated input and use the instrumentation module to launch the application in a way that it will read this file. This is typically accomplished by passing a filename on the command line. For targets which do not have any way to specify the input on the command line, a custom driver would be required to use keyboard shortcuts, mouse input, or some other method of getting the input into the program. The following drivers have been implemented: \begin{itemize}[noitemsep] \item \textbf{File} - for programs that read input from a file \item \textbf{Stdin} - for programs that read input from standard input \item \textbf{Network Server} - enables fuzzing server programs \item \textbf{Network Client} - enables fuzzing client programs \item \textbf{Windows Media Player} - for Windows Media Player \end{itemize} The File and Stdin drivers provide feature parity with \AFL{}, in terms of input methods. There is nothing particularly novel about these drivers, but they are an important feature. Sending malicious files via email is a popular attack vector, so there is an interest in proactively finding bugs that can be triggered by loading files. The Network Server and Network Client drivers provide feature parity with \AFL{} when it is combined with Preeny,\cite{preeny} which modifies the behavior of network-based target programs to accept input via stdin. Two drivers are needed, one to establish a connection to a server, and the other to accept a connection from a client. These drivers caused the creation of the multipart mutator, which is covered in more detail in Section~\ref{Mutator}. A Windows Media Player (WMP) driver was created to demonstrate how to deal with a \GUI{} application which does not exit without user interaction. Applications with a \GUI{} are difficult to fuzz and many fuzzers\cite{afl,honggfuzz,peach,vuzzer,boofuzz} evade this problem by not supporting targets with a \GUI{}. The typical recommendation is to fuzz the library which does the heavy lifting, or modify the application to not load a \GUI{}. This does not work well when dealing with closed source applications. A test harness can be written which calls the undocumented functions in the closed source library, but there is no guarantee that bugs found will also be present and reachable in the real application. This is due to constraints which may be placed on function arguments in the main application, or that functions are called in a different order. While writing a custom harness to test a library is a good recommendation, it should not be the only option. Killerbeez addresses the problem of fuzzing \GUI{} applications with modular drivers instead of simply avoiding the problem. The next issue a driver has to deal with is determining when the target is done processing the input. Typically, a fuzzer mutates the input, feeds it to a target, and then monitors the target for interesting behavior such as a crash or a hang, and if it does not exit after the timeout period, it considers the target to be hung. This works for command line programs which exit immediately after processing the input, but falls apart when dealing with \GUI{} applications. Setting a timeout which is too low results in stopping before the program is finished processing the input, while setting it too high means wasting time. On top of this, every non-crashing test case is considered a hang. WinAFL attempts to address these problems by forcing the user to reverse engineer the target software to identify the function which reads and processes input data. This is time consuming, and if there is one function which reads the data into memory and another function which parses it, this strategy does not work well. This can sometimes be worked around by going up a level in the call stack until the function is located which calls both the reading and parsing functions. However, that function might also be the one which loads the \GUI{}, which means it may never return. This breaks WinAFL's assumption that there is a function which reads input data, parses it, and returns, which means it will fail in the same way fuzzers intended for command line programs will fail: with every test case being a hang. The next alternative is to patch the target executable to exit after parsing. All of these approaches require reverse engineering, and will have varied results depending on the details of the target software. The WMP driver, when used with the DynamoRIO\cite{dynamo} instrumentation, uses the same strategy as WinAFL, where a specific function name or offset needs to be specified and the test is ended when that function returns, however this is not the only stopping condition. The driver also checks for sound playing and assumes that if it was able to decode the file and start playing sound, that the application has finished parsing the input file. The underlying assumptions are that errors will be in the code which does the parsing, rather than the code which does the playing, and that all the parsing is done up front. This does mean that bugs which require a significant number of frames will not be found, as the test will conclude early and kill the application. This is a conscious trade-off which was made to speed up the number of executions per second by terminating much earlier than waiting for the entire clip to play or the timeout to occur. While this technique is based on fuzzing Windows Media Player, it should work on any media playing application. \subsection{Mutator} \label{Mutator} The mutators from Honggfuzz, Radamsa, AFL, and Ni\cite{ni} are leveraged by wrapping the code from these projects to conform to the Killerbeez mutator \API{}. By defining an \API{} for the mutators, researchers can modify other fuzzers to conform to the Killerbeez mutator \API{} and easily swap in the new mutators. The following mutators have been implemented: \begin{itemize}[noitemsep] \item \textbf{arithmetic} - 32-bit arithmetics, both endians. From \AFL{} \item \textbf{bit flip} - Flips various number of bits (1-32). From \AFL{} \item \textbf{dictionary} - Inserts or replaces values from a dictionary. From \AFL{} \item \textbf{havoc} - Runs multiple mutations on a single input. From \AFL{} \item \textbf{interesting value} - Inserts values which are more likely to trigger integer overflows or off-by-one errors. From \AFL{} \item \textbf{splice} - Splices two input files together. From \AFL{} \item \textbf{afl} - All of the \AFL{} mutators, run in the same manner as is done in \AFL{} \item \textbf{honggfuzz} - Mutation algorithm from Honggfuzz\cite{honggfuzz} \item \textbf{multipart} - Input must be made up of multiple parts, different mutators are applied to different parts of the input. Useful for network protocols where there is a desire to not disrupt the handshake/login \item \textbf{ni} - Mutation algorithm from Ni \item \textbf{nop} - Mutator which does not mutate anything, useful for testing and when combined with the multipart mutator \item \textbf{radamsa} - Mutator which wraps the Radamsa\cite{radamsa} executable \item \textbf{zzuf} - Mutation algorithm from zzuf\cite{zzuf} \end{itemize} As indicated in the list above, several of the mutators were taken from \AFL{} and adapted to the Killerbeez mutator \API{}. These have proven to be effective algorithms and were a solid starting point. Honggfuzz has a different set of mutators, some of which are similar to those from \AFL{}, such as Honggfuzz's magic value mutator and \AFL{}'s interesting value mutator, however there are slight variations which work better against some targets than others. Honggfuzz is the only fuzzer to have found a critical vulnerability in OpenSSL to date,\cite{honggfuzz} so clearly it does something different than the other fuzzers. Again, the approach was one of pragmatism: taking the existing techniques and bringing them to new environments, such as Windows with code coverage capabilities. The multipart mutator's development was driven by the network drivers and the desire to have the handshake or authentication section of the input not be mutated, as this would prevent much of the target's code from being reached. The input is divided into parts and a mutator is run on each part. For any parts which should not be modified, the ``nop'' mutator, which is described below, is selected. This allows different mutators to be used on different segments of the input and the ones which perform the best can be selected more often by the campaign manager. The multipart mutator could also be used with a file-based driver which is multipart aware, allowing different segments of input files to be defined. This would enable things such as ensuring a file's magic bytes are never modified by using the ``nop'' mutator on the first segment. Aki Helin, the author of Radamsa, also wrote a mutation algorithm called Ni. This code was adopted with minimal changes to provide more diversity in mutation methods, and based on Aki's reputation for having novel ideas on how to mutate inputs. During development, it quickly became clear that having a mutator which does not do any mutation would be handy when debugging issues. This is how the nop mutator was born. It was later used when the multipart mutator was developed. Radamsa is a general purpose fuzzer, written in Lisp, which came from the Oulu University Secure Programming Group (OUSPG) Protos Genome Project.\cite{genome} It works well against a variety of network protocols and file formats, and has found dozens of vulnerabilities.\cite{radamsaresults} The radamsa mutator module in Killerbeez is a simple wrapper which feeds data to the Radamsa executable. The strategy of using an external process was chosen to allow the process to be long lived, so Radamsa's internal state can be updated over the course of many inputs. The alternative approach, which other fuzzing projects have taken when adopting Radamsa, is to pull in the \texttt{main()} function from the C code (which is generated by the Radamsa Lisp code) and execute \texttt{main()} once per input.\cite{radamsatob} This is much faster in terms of execution time, because the function is executed within the context of the fuzzer process. This means data does not need to be piped from one process to another and then back again, however it loses a key value of Radamsa, which is that it keeps state and tends to get better as it sees more data.\cite{radamsagrrproblems} While the implementation in Killerbeez is slower, and this reflects poorly on the metric of executions of the target application per second, it is arguably higher quality mutations in the long run. There was an effort to get Radamsa compiled on Windows as a library, which was painstakingly implemented, only to find that it was about twice as slow as using an external process. The cause of this was not immediately apparent, and the effort was abandoned in favor of developing other features. Finally there is zzuf, which is yet another application fuzzer, that primarily targets media players, image viewers, and web browsers. As with other mutators which were pulled in from other projects, it has found bugs in production code ranging from audio and video codecs to objdump~\cite{binutils} and nm~\cite{binutils}. Each of the mutators brings diversity to Killerbeez. Different authors are going to frequently have different approaches, and even when the algorithms are similar, there are frequently implementation details which will vary in ways which are sometimes important. Different mutation algorithms will perform differently on various targets. The benefit of being able to switch from one to another easily enables Killerbeez to measure which ones are finding more inputs which trigger new code execution on different targets and at different points in the fuzzing process. A mutator which performs poorly with the initial corpus of inputs may be the best later when a different section of code is unearthed. \subsection{Instrumentation} \label{Instrumentation} Killerbeez uses an instrumentation abstraction, to implement the feedback-based portion of the fuzzer. Instrumentation monitors code coverage of the target binary. Feedback-based fuzzing helps expand code coverage by reducing the input set to only those that reach new code. This reduces that the likelihood that multiple inputs will be tested that result in the same code coverage. The following instrumentation modules have been implemented: \begin{itemize}[noitemsep] \item \textbf{Debug} - A na\"ive Windows-only instrumentation that determines the result of a round of fuzzing via the Windows Debug \API{}.\cite{windebugapi} \item \textbf{Return Code} - A Linux-only equivalent to the debug instrumentation that uses the \texttt{waitpid()} system call to determine the result of a fuzz round. \item \textbf{DynamoRIO} - An instrumentation that uses the DynamoRIO project\cite{dynamo} to determine new code paths discovered in a binary. \item \textbf{Intel PT} - An instrumentation that uses Hardware-level ``Process Tracing'' \item \textbf{AFL} - An instrumentation injected by a modified version of AFL's compilers (afl-gcc or afl-clang-fast), or via running the executable under a modified version of QEMU\cite{qemu} \end{itemize} The instrumentation modules monitor, at a minimum, whether a process crashed, exited cleanly, or timed out. More advanced instrumentation modules, such as DynamoRIO, monitors basic block coverage and can inform the fuzzer of new code paths taken in a binary. Instrumentation developers decide what options their module has and whether they will implement optional features. For example, a module can do only lightweight tracking, as is done in \AFL{}, or it can optionally support tracking every basic block executed and each transition. If it can do the slower, more accurate tracing, it is considered to be not only an instrumentation module, but also a tracer. How tracers are used is covered more in section \ref{Tracer}. The Debug instrumentation is currently a Windows-only instrumentation which attaches to the target process using the debugging interface and monitors the process for a crash or clean exit. It does not track code coverage, which is commonly referred to as ``black box'' fuzzing. The driving force behind this module was that there is no reliable way to determine if a process crashed or exited normally on Windows without debugging it. Unlike Unix, the return code does not contain this information, so there is no way to tell the difference between a program that decided to exit with a non-zero status code to indicate an error, and a crash. The Return Code instrumentation module is similar to the Debug instrumentation in that it does not track code coverage. On \POSIX{} operating systems, the return code of a process is a 32-bit integer. Only the eight least significant bits are provided to the shell, but the full value is available from the \texttt{waitpid()} function and macros such as WIFEXITED and WIFSIGNALED can be used to discern between a clean exit with a non-zero exit code and an actual crash. The DynamoRIO instrumentation module is a modified version of the instrumentation in WinAFL. It requires the user to specify a function which is responsible for loading and processing the input data. At the end of the target function, DynamoRIO will kill the process. Alternatively, this instrumentation supports persistence mode, which allows for multiple inputs to tested without restarting a process. This mode reduces the overhead of restarting the process, and thus increases the number of tests that can be conducted per second. Persistence mode in the DynamoRIO instrumentation is accomplished by resetting the stack and jumping to the beginning of the function again, which may work in test applications, but does not tend to work in real-world software. The typical result is a crash due to global state which is never reset. This includes things like open file handles, allocated memory, application specific state information, etc. The target function must be identified outside of Killerbeez and is typically done manually. By default, an \AFL{}-style bitmap is generated to track code coverage. The module takes options which allow this to be changed to obtain a full trace (see section \ref{Tracer} for details on this feature). Other options include a list of libraries which should be covered by instrumentation. This allows things like tracking code coverage in acrord32.dll when fuzzing Adobe Reader~\cite{adobereader}. Tracking code coverage in modules is an important feature, because the majority of the input parsing code is encapsulated in a library and recompiling is not an option. The Intel PT module uses \IPT{} to gather trace information for CPUs which support \IPT{}. This requires a kernel component to manage \IPT{}, but the tracing itself is done in hardware with a modest performance overhead\cite{iptoverhead,harnessingipt}. The current implementation of this instrumentation module~\cite{killerbeezipt} only works on Linux, via the ``perf'' subsystem. Expanding this to support the \IPT{} driver in Windows is planned in the future. Regardless of operating system, the output of \IPT{} relevant for tracing execution in fuzzing are the \TNT{} and \TIP{} packets. The former tracks ``the direction of direct control branches,'' while the latter records ``the target \IP{} of indirect branches, exceptions, interrupts and other branches or events.''\cite{intelptmanual} The \TNT{} packets form a bit stream, while the \TIP{} packets contain a series of instruction pointer addresses, which may be compressed if the \IP{}'s upper bits match the previous \IP{} value. However, these two packet types are not synchronized. For example, if there are four conditional branches, an indirect jump, and then four more conditional branches, \IPT{} will generate a \TIP{} packet and one byte of \TNT{} packet data with no information about the order in which the \TIP{} and \TNT{} events occurred. To make sense of this data, the executable must be analyzed to determine the order in which to pull information from the \TNT{} and \TIP{} queues. As this disassembly adds to the performance overhead, the Intel PT instrumentation module does not do it. Instead it takes a hash of the entire \TNT{} bit stream and the entire set of \IP{} addresses in the \TIP{} packets. This does not identify what code was executed, but it does determine if a different code path was taken, as a different code path would result in different packet data, and thus a different hash. Because the packet order is not synchronized between the \TNT{} and \TIP{} streams, hashes are taken of each stream separately and the pair of hashes are used to identify a particular code path. The \IPT{} instrumentation also supports persistence mode. Persistence mode in the \IPT{} instrumentation is accomplished by modifying the target to repeatedly accept a new input from the fuzzer, call the code to be fuzzed, and reset the target state. While persistence mode in the \IPT{} instrumentation requires source code and manual modifications to the target software, it is much more likely to work properly as compared to the DynamoRIO instrumentation persistence mode. The only other public fuzzers known to implement Intel PT based tracing are Honggfuzz\cite{honggfuzz}, Richard Johnson's modified version\cite{winaflintelpt} of WinAFL, and kAFL\cite{kafl}. Honggfuzz does full packet decoding using the Intel's processor trace decoder library\cite{libipt} which incurs a much higher overhead than the Killerbeez implementation. Richard's fork of WinAFL did full packet decoding at one point, but does not seem to use the trace data at all with the latest commit.\cite{winaflcommit} Instead, there is a comment which says ``FIXME winipt'' and the calls to \texttt{PtTraceProcessStart()} and \texttt{PtTraceProcessStop()} have been commented out, which implies this is still a work in progress. kAFL utilizes a custom packet decoder built specifically to allow efficient parsing of the \IPT{} packets and disassembly of the target executable. As such, kAFL's \IPT{} parser is faster than the Intel processor trace decoder library,\cite{harnessingipt} but is slower than the approach taken in Killerbeez which refrains from analyzing the target executable. Finally, we have the \AFL{} instrumentation module, which is based around the instrumentation injected at compile time by afl-gcc or the \AFL{} llvm module. Much of the code was taken directly from \AFL{} and adapted to conform to the Killerbeez instrumentation \API{}. This module has been tested with Linux and macOS, but should work on any \POSIX{} operating system. The injected fork server is a slightly modified version of the implementation in the \AFL{} project. The forkserver is also used by the Intel PT module, which splits the ``fork'' and ``run'' actions, as \IPT{} needs to be initialized between these two steps. The original AFL implementation combined these two actions, as they did not have any use case that required them to be separate. The \AFL{} instrumentation modules also implements the persistence mode feature included in \AFL{}. The \AFL{} instrumentation persistence mode is implemented similarly to the \IPT{} instrumentation persistence mode and has similar advantages and disadvantages. \AFL{}'s QEMU user mode tracing is also included in Killerbeez, however this mode only works on Linux as QEMU user mode is only available there. QEMU chain caching, which is disabled in \AFL{}, has been enabled in the Killerbeez implementation via the patch made by Andrea Biondo.\cite{qemuspeedup} This patch to QEMU ensures chains are properly tracked and results in a 3-4x improvement in performance. This puts the Killerbeez implementation of the source-based \AFL{}-style instrumentation equivalent with the original implementation and the QEMU feature significantly faster, showing the advantages of combining the innovations of different authors. \subsection{Tracer} \label{Tracer} A tracer is an instrumentation module which captures detailed trace information about exactly which basic blocks were executed, along with the transitions between them and implements some optional functions in the Killerbeez instrumentation \API{} which return this information. This coverage information is commonly referred to as nodes and edges. The DynamoRIO instrumentation module is an example of both a normal instrumentation module and a tracer. By default, it does lightweight tracing to obtain the \AFL{}-style bitmap coverage information and returns an error if it is asked for nodes and edges. The ``edges'' option can be enabled to switch the module to capture full trace information. Enabling the more accurate tracing mode has a larger overhead, so it is not used for every iteration of fuzzing. As a counterexample, the Intel PT instrumentation module is currently not a tracer. Without full \IPT{} packet decoding, it is not possible for this module to obtain such detailed information. Any time trace information is found, the assimilator stores it in the manager's database in a standard format. This is possible because the Killerbeez instrumentation \APIs{} which get nodes and edges require the data to be in a specific format. Any other trace data is allowed to be in any format, as it is passed around as an opaque blob and not consumed by anything other than the instrumentation module which created it. Trace data is accessible via the manager's \REST{} \API{}. The trace data can later be used to reduce the set of seeds to only include the minimum number of files, or the minimum file size, which hits the maximum amount of code. The concept of minimizing test corpora while maintaining the maximum code coverage dates back to at least October of 2008, when Peach Fuzzer version 2.2 was released, which included the minset tool.\cite{peach22} There are a number of different algorithms which could be chosen, which is why this is handled by an optional add-in which can be swapped out at will, as shown in Figure \ref{fig:Killerbeez-integrations}. This granular code coverage data can also be used for weighting seeds based on various algorithms, such as attempting to get to code which is less frequently covered, or targeting a particular piece of code such as a parser which was manually identified or a new piece of code which was identified using automated patch analysis. This would be implemented by the seed selector module from the campaign manager. The most basic seed selection algorithm would weigh all of the seeds equally and go through them in a round-robin fashion. Determining how often to use the tracer is the responsibility of the job type selector in the campaign manager. This module has the ability, but not the obligation, to take node and edge coverage into account. The decision of when to run the tracer is made based on whether the trace data is necessary for the configured Killerbeez components and how much of a performance impact the tracer will have (as compared to scheduling fuzzing jobs during the same time period). The simplest algorithm would never enable the tracer, which would inhibit all other components from using trace data. \subsection{Picker} \label{Picker} Instrumenting all libraries for a real application using a dynamic instrumentation technology is prohibitively slow. Even when using more efficient instrumentation methods, there is a desire to minimize the amount of overhead in instrumentation so more effort can be spent finding bugs instead of performing bookkeeping operations. The Picker automatically determines which libraries should be instrumented so the fuzzer can limit instrumentation to what is interesting, and omit all the other libraries. For deterministic code, this comes with a level of certainty that what is instrumented is, in fact, all of the code which handles the input the fuzzer is sending it. This step is taken when configuring a target, before any fuzzing begins. The Picker determines the library to instrument by running through all seed values and instrumenting each library separately. If the code is never executed, or the coverage is the same for every input, it implies the library is not important in parsing the input. It is possible that the library handles some aspect of the protocol which was simply never executed by any of the seed inputs, however, with a diverse set of starting inputs, there can be some confidence that nothing is omitted for the list of modules to instrument. Code which is non-deterministic causes problems with the algorithm above. Tracking down each source of non-determinism and attempting to eliminate it would be a very tedious task. An example of one source of non-determinism was a call to a graphics libraries failing to allocate a surface object. It is difficult to know how to remove this type of non-determinism. Every system call which fails on a regular basis would have to be analyzed, and a decision made on what to do about it. Making it always fail may cut off code paths later which trigger a bug which would be reachable in the program in practice. Repeatedly making the call until it succeeded may also eliminate code paths later, plus makes execution slower at best and an infinite loop at worst. Instead of trying to force the non-deterministic program to behave in a deterministic fashion, the Picker accepts that the code in question is going to behave erratically and ignores the execution data related to those sections of code. This is done by running the same input through the target \textit{N} times and finding all of the bytes in the coverage info which vary. By default, \textit{N} is 10, however it should be large enough that a significant number of executions do not identify any more bytes where the execution varied. The correct value for N will vary from one target to another. The data for Windows Media Player, shown in Figure \ref{fig:picker}, indicates new non-deterministic code was being found at 325 executions. However, after 10 executions, more than half of the non-deterministic transitions were identified for each of the four libraries. Choosing the number of executions is a trade-off between spending time up front to get more efficient fuzzing, and getting started more quickly but being less efficient. In addition to determining which libraries to instrument, the identified non-deterministic transitions can be used by some instrumentation modules in determining if an input caused new code paths to be found. This is currently only implemented in the DynamoRIO instrumentation module, but will likely be implemented in the several of the instrumentation modules listed in the \nameref{Future Work} section. \begin{figure}[htb] \centering \includegraphics[width=3.5in]{picker.png} \caption{Total Non-deterministic Basic Block Transitions Detected per Execution In Windows Media Player Libraries} \label{fig:picker} \end{figure} The algorithm the Picker uses is not effective with all instrumentation modules. The Picker uses the instrumentation module which will be used in practice, and uses the same options for that module. It then operates on the opaque blob which represents the code coverage information. It does this without any knowledge of the internal format. Anything which uses the \AFL{}-style bitmap will work fine. This would include the DynamoRIO and \AFL{} instrumentation modules. The \IPT{} instrumentation output is the hashes of the \TNT{} and \TIP{} packets, so any non-determinism will change every byte of the instrumentation data. This will cause the Picker to mask out all bytes of the coverage data, which is not useful. If the Intel PT instrumentation was also a tracer, it would be capable of using an internal format which is compatible with the Picker. ================================================ FILE: docs/paper/introduction.tex ================================================ Over the past few years, coverage-guided fuzzing has become a popular way to find software and hardware vulnerabilities due to advances in publicly available tools such as \AFL{}\cite{afl} and its derivatives.\cite{vanhauser} Many fuzzers have ``trophy cases'' consisting of a list of bugs known to be found with that tool to demonstrate their effectiveness against real-world applications.\cite{rustfuzztrophy}\cite{honggfuzz}\cite{afl} However, most tools are primarily focused on finding bugs in open source, command line Linux software which reads input from files or standard input. While there has been some exploration to get \AFL{} working on other operating systems\cite{aflosx,winafl} as well as supporting network input,\cite{netafl,preeny} these features are often omitted. Most of the published improvements over the original version of \AFL{} are implemented as forks of \AFL{}.\cite{aflfast,aflgo,fairfuzz,perffuzz,pythia,collafl} Thus the enhancements are mutually exclusive, short of embarking on a development effort to review the modifications and merge the forks back together, manually resolving any conflicts and incompatibilities. The issue of incompatibility is not limited to projects which are modified versions of \AFL{}. Mixing and matching features from various tools requires a significant amount of effort. It involves setting up a build environment, which in itself can be a challenge, as well as merging together different code bases. Often times the code bases will be written in different programming languages, which means they need to be integrated in some way. Using a tool against a type of software it has never been used against before, such as using a Linux kernel fuzzer against another operating system, can find a large number of bugs\cite{anton}. However, in practice, security professionals rarely have the amount of time to invest to get the tools working together, or working in other contexts. We present Killerbeez, a fuzzing framework which brings together many of the various security analysis tools so they can be used together. Killerbeez supports multiple operating systems, can handle target applications with or without source code, and software with a \GUI{}. Furthermore, the input mutation algorithms, instrumentation, seed selection algorithms, and methods for feeding input data to the target are all easily interchangeable via modular components. This modularity enables two properties (1) easily mixing and matching tactics from different researchers and (2) implementing new algorithms easily. Finally, Killerbeez is scalable, using \BOINC{}\cite{boinc} to distribute work to multiple nodes from a central server. Our contributions include: \begin{enumerate}[noitemsep] \item An \API{} combining the different components of a fuzzer in a pluggable (modular) way to allow for extensibility \item A collection of existing fuzzers modified to use the \API{} \item A method for automatically determining which libraries are likely to cause a crash, so those can be targeted while fuzzing \item A technique for quickly utilizing \IPT{} trace information to identify unique code traces while fuzzing \item Ability to automatically filter out trace data related to non-deterministic code \end{enumerate} Section \ref{Background} covers background information, section \ref{Killerbeez Overview} provides an overview, the implementation is covered in section \ref{Implementation}, and sections \ref{Related Work}, \ref{Future Work} and \ref{Conclusion} cover related work, future, work and conclusion, respectively. ================================================ FILE: docs/paper/killerbeez.tex ================================================ \documentclass[twocolumn]{article} \input{packages} \title{Killerbeez: Fuzzing Framework to Bring Together the State of the Art} \author{Adam Nichols, Ian Bridges, Benjamin Lipton, Jeff Stewart, Tomas Tillery \\ GRIMM\\ \{adam,ben,jeffball,tomas\}@grimm-co.com icb@rice.edu\footnote{Research was done while at GRIMM}\\ } \date{Published: 18 OCT 2019} % Ensure all acronyms we use are expanded on first use \def\AFL{\renewcommand\AFL{AFL}American Fuzzy Lop (AFL)} \def\COM{\renewcommand\COM{COM}Common Object Model (COM)} \def\BOINC{\renewcommand\BOINC{BOINC}Berkeley Open Infrastructure for Network Computing (BOINC)} \def\BTS{\renewcommand\BTS{BTS}Branch Trace Store (BTS)} \def\GUI{\renewcommand\GUI{GUI}Graphical User Interface (GUI)} \def\IOCTLs{\renewcommand\IOCTLs{IOCTLs}Input/Output Controls (IOCTLs)} \def\IPC{\renewcommand\IPC{IPC}Interprocess Comunication (IPC)} \def\IPT{\renewcommand\IPT{IPT}Intel Processor Trace (IPT)} \def\stdin{\renewcommand\stdin{stdin}standard input (stdin)} \def\TNT{\renewcommand\TNT{TNT}Taken Not Taken (TNT)} \def\TIP{\renewcommand\TIP{TIP}Target IP (TIP)} \def\IP{\renewcommand\IP{IP}Instruction Pointer (IP)} \def\API{% Whether we use API or APIs first, we want to redefine both \renewcommand\API{API}% \renewcommand\APIs{APIs}% Application Programming Interface (API)% } \def\APIs{% It would look silly to spell out API and APIs both \renewcommand\API{API}% \renewcommand\APIs{APIs}% Application Programming Interfaces (APIs)% } \def\REST{\renewcommand\REST{REST}Representational State Transfer (REST)} \def\POSIX{\renewcommand\POSIX{POSIX}Portable Operating System Interface (POSIX)} \begin{document} % Heading for the first page has the title, author(s) and date \maketitle \input{abstract} \section{Introduction} \label{Introduction} \input{introduction} \section{Background} \label{Background} \input{background} % The overview describes what components exist (driver, mutator, manager, etc.) % and why. Examples are provided as needed, but just enough to cover the % concepts. \section{Killerbeez Overview} \label{Killerbeez Overview} \input{overview} % The implementation goes over each component in detail to showcase what we % have available so far. \section{Implementation} \label{Implementation} \input{implementation} %\section{Evaluation} \label{Evaluation} \section{Related Work} \label{Related Work} \input{related_work} % This is where we cover planned expansions, both in terms of additional things % to pull in, such as grammars which generate test files, Driller style % integration of symbolic execution, and additional modules for the existing % components (drivers, instrumentation, etc.). \section{Future Work} \label{Future Work} \input{future_work} \section{Conclusion} \label{Conclusion} \input{conclusion} \section{Acknowledgments} \label{Acknowledgments} \input{acknowledgments} \section{References} \label{References} \renewcommand{\section}[2]{} % Removes the title thebibliography wants to add \input{references} \end{document} ================================================ FILE: docs/paper/overview.tex ================================================ The core components of Killerbeez can be split into two logical categories of orchestration and handling interactions with the target\footnote{Software under test is referred to as the ``target.''} program. The former refers to decisions such as what inputs to use as seed data,\footnote{Initial inputs which will be modified are referred to as ``seeds,'' and the set of initial inputs used is referred to as the ``seed corpus.``} which mutation algorithms to use, how to minimize the input corpus and other decisions which are best left to a central controller. The latter category contains actions such as launching the target; feeding the target input data; tracking code coverage; determining when the target is done processing the input; and reporting whether the target crashed, froze due to something like an infinite loop, or executed new portions of code. \subsection{Orchestration} Killerbeez coordinates the entire distributed fuzzing campaign. The orchestration tasks are handled by the Killerbeez ``manager,'' which runs on a central server. After some initial configuration to specify targets and strategies, the manager decides what jobs to schedule next. It tracks targets available for fuzzing, selects which tools to use and how to configure them, manages the corpus of inputs by removing less interesting ones, and dispatches jobs to worker nodes to be executed. It also provides a \REST{} \API{} that allows a researcher to trigger actions and extract results manually, or to integrate an external system that does so autonomously. The components of the manager are depicted in Figure \ref{fig:Killerbeez-server}. \begin{figure*}[!ht] \centering \includegraphics[width=\textwidth]{KILLERBEEZ_Server_Architecture.png} \caption{Killerbeez Server Architecture} \label{fig:Killerbeez-server} \end{figure*} \subsubsection{Work Distribution} The most basic role of the manager is to provide an interface for queuing tasks to be executed on worker nodes and processing the results. A \BOINC{} server is used to transmit the work to nodes and receive results. The manager provides a layer on top of \BOINC{} that understands Killerbeez-specific parameters such as the mutator and instrumentation to use, making it simple to submit jobs to \BOINC{} that run the fuzzer with an appropriate command line. Jobs submitted via the manager also automatically set up Killerbeez and the target software, so the worker nodes are not required to have any special software installed besides the off-the-shelf \BOINC{} client. When jobs complete, the manager uses the \BOINC{} ``assimilator'' interface to collect the results and update the manager's database. The information inserted includes not only the direct output of the fuzzer (new inputs that cause new paths in the binary to be hit) but metadata about the job as well. This metadata could include the average execution time of the binary (to help choose parameters that execute faster), the final instrumentation state from fuzzing (to help the next job find fewer duplicate paths), and various other statistics. Because the manager is not responsible for running the target application, it does not need to run on the same platform as the target. Thus, it can run on Linux while serving work to be executed by Windows or macOS machines. \subsubsection{Integration} The manager provides a \REST{} \API{}, which allows clients to access and configure seed data, fuzzing targets, and low-level metadata produced during fuzzing. This will enable future enhancements to be made by taking advantage of external tools, such as using a test case generator to produce new seed data. Another planned integration is to use Driller\cite{driller} to generate program inputs which reach code that has not yet been reached by mutation. The \REST{} \API{} is also used for some of the manager's built-in functionality. The campaign manager, the component that plans new jobs to execute, gathers data using the \REST{} \API{}, analyzes the data to determine the next job to create, and then submits the resulting job via the \REST{} \API{}. The corpus minimization uses the \REST{} \API{} to obtain execution traces and modify the working set of seed values. Accessing data via the \REST{} \API{} allows these components to be less coupled with the internals of the manager, enabling them to run as standalone processes. Figure \ref{fig:Killerbeez-integrations} shows how the \REST{} \API{} enables integration with various tools. \begin{figure*}[!ht] \centering \includegraphics[width=\textwidth]{KILLERBEEZ_Integrations.png} \caption{Killerbeez Integration with External Tools} \label{fig:Killerbeez-integrations} \end{figure*} \subsubsection{Tracing and Corpus Minimization} \label{Corpus Minimization} Killerbeez also introduces the idea of obtaining detailed code coverage information about execution for each input which has a unique code path. This is typically not done by other fuzzers, as obtaining a full execution trace is significantly more overhead than the lightweight instrumentation that \AFL{} or Honggfuzz\cite{honggfuzz} use.\cite{collafl} During normal fuzzing, Killerbeez will generally use lightweight methods of tracking execution. However, having a full trace is useful when minimizing the seed corpus and determining which seeds should be weighted more heavily. Reducing the number of files in the corpus helps fuzzers to more efficiently test targets by eliminating inputs which result in the same target functionality being tested.\cite{fileformatfuzzing,softwaredumber} This concept has been encapsulated in the tracer module. Each time an input is found which hits a new code path, a tracer job can be added via the manager. The new tracer job will be executed by a \BOINC{} client, just like any other fuzzing job. The results will include full trace data, which will be stored in the manager's database. The data can be retrieved via the \REST{} \API{}, enabling a ``corpus minimizer'' tool to explore the paths covered by the current input corpus and remove inputs that are redundant. More information about the tracer and corpus minimizer can be found in section \ref{Tracer}. \subsubsection{Work Generation} The manager is also responsible for deciding what work should be performed next. The component that does this is called the ``campaign manager,'' and it consists of several pluggable modules that work together to generate jobs. For jobs that run the Killerbeez fuzzer, the seed selector module specifies an algorithm for choosing the most interesting input to use as a starting point for fuzzing, while the job parameter selector module determines parameters like the mutator to use for the job or the instrumentation options. It is also possible to integrate tools besides the fuzzer into the job system, such as Driller or the tracer. The job type selector module is responsible for choosing which of these tools is currently needed most. The modules can use the \REST{} \API{} to query any of the metadata recorded in the database to make their decisions. \subsubsection{Scalability} The manager can choose the amount of work done per job by specifying things like the number of fuzz iterations. By scheduling larger jobs, the manager can scale up to a very large number of clients, allowing a substantial amount of work to get done with a minimal amount of coordination and network overhead. However, if the manager becomes a bottleneck due to the number of workers or the number of API requests, several components could be scaled out. The \BOINC{} server could be moved to its own machine, or even scaled out to a cluster.\cite{boincmultihost} The manager database could also be moved to its own machine. The \REST{} \API{} server is stateless, so it could be scaled fairly easily to run on multiple machines, and components that interact via the \REST{} \API{}, such as input generators and the campaign manager, can also be moved to run on their own hardware. If performance is still a problem even with all components scaled up as much as possible, multiple manager servers could be set up to run parallel fuzzing campaigns, using the \REST{} \API{} to share results between them. In this mode, each manager would act as an input generator for the others. \subsection{Preparation} \label{Preparation Overview} There is a preparation step in fuzzing workflows which is often overlooked or dismissed, that includes setting up the target software and deciding specific fuzzing parameters. This occurs before any real fuzzing begins and includes things such as compiling the target, possibly with a specific compiler or compiler flags, determining what options should be enabled in the target software, deciding which portions of the code should be tracked for code coverage, and specifying how to deal with non-deterministic code. The compilation step is driven by the type of instrumentation chosen, which is typically just a matter of selecting the option with the lowest performance penalty. Instrumentation does not need to be added at compile time, but it is added here when possible, as it reduces the overhead at runtime. What options to enable in the target software is target specific and subjective. It depends on the higher level goals. If the goal is to find a vulnerability with wide applicability, choose default options. If it is to test out a specific feature, disable everything except that option. If it is just to find any bug in any configuration, enable everything. While these are important questions, the most interesting decision is what to track in terms of code coverage, and how to deal with non-deterministic code. The solution \AFL{} uses is to require the user to manually determine which code in the target to instrument. \AFL{} requires the user to compile the target library or executable with a special instrumenting compiler. Alternatively, \AFL{} can use a modified version of QEMU while fuzzing to instrument all of the libraries used by a target at run-time. As expected, instrumenting all of the libraries has a higher overhead than only instrumenting a few specific modules. Killerbeez improves on this by including a tool called the ``Picker'' which automatically determines which libraries should be instrumented. The algorithm for doing so is described in section \ref{Picker}. One important detail is that the picker can not operate effectively when the target does not have deterministic execution. If feeding the same file into the application multiple times results in different code being executed, this is a problem, not only for the Picker, but also for code coverage in general. Non-deterministic code causes new code paths to be taken, making it appear as if the input file was the reason new code was executed. This results in erroneously keeping inputs which are not actually valuable. \AFL{} does not deal with this problem directly, but it does alert the user to the fact that the target is non-deterministic. The user can then do things like hijack calls to functions such as \texttt{srand()} which intentionally introduces randomness and non-determinism. This is often done for applications which employ cryptography for initialization vectors and nonces. Under command line Linux applications, hijacking non-determinism introducing functions works fairly well. On GUI applications in Windows, it does not work as well. There are system calls in Windows which occasionally fail for no apparent reason. This should not be a problem for the target software, as it should be checking the return code to detect and handle this appropriately. However, when these failures happens it has the side effect of making the fuzzer misinterpret the new code coverage to think an uninteresting input was interesting. The details of how non-determinism is handled in Killerbeez are also described in section \ref{Picker}. \subsection{Execution} Execution is handled by the client fuzzer program, which is aptly named ``fuzzer.'' This can be run manually from the command line, however it is typically run by the manager, via a \BOINC{} client. In either case, the fuzzer is responsible for running the target, feeding it input data, tracking code coverage, detecting crashes, and dealing with user interaction such as dialog boxes. \begin{figure*}[!ht] \centering \includegraphics[width=\textwidth]{killerbeez-high-level-block-diagrams.png} \caption{Killerbeez Fuzzer Overview} \label{fig:Killerbeez-fuzzer-overview} \end{figure*} The fuzzer consists of glue code that combines together various modules, which is where all the interesting things occur. The purpose of the Driver, Mutator and Instrumentation modules used in the fuzzer are covered in sections \ref{Driver Overview}, \ref{Mutator Overview}, and \ref{Instrumentation Overview}, respectively. The relationships between these components are depicted in Figure \ref{fig:Killerbeez-fuzzer-overview}. The modules which currently exist and how they work are covered in the \nameref{Implementation} section. The same code base is used on Windows, Linux, and macOS to enable as much code re-use as possible. Most of the mutators are shared among all platforms. Only the Radamsa mutator, which runs as a separate process, has platform specific code. Some of the instrumentation and driver\footnote{ ``driver'' refers to driver modules, not operating system drivers.} modules, such as the \IPT{} instrumentation and WMP driver, contain platform specific and sometimes target specific code. \subsubsection{Driver} \label{Driver Overview} Killerbeez offers drivers, which are target-specific wrappers that abstract away the concept of loading data into a target and enable finer definition of the failure modes of a particular piece of software. While typical fuzzers look for crashes and hangs, specifically-written drivers can have more context about a given fuzz target. Better understanding of the fuzz target's behavior means that Killerbeez can make better-informed decisions about the status of a target after a particular input, and it can terminate or classify the result of a particular input more quickly than waiting for a timeout. First, the driver module is responsible for feeding inputs to a target. This is a departure from most fuzzers, which only work for one type of input. In the case of AFL, the input is a file (or \stdin{}, which is also a file under UNIX). Syzkaller\cite{syzkaller}, on the other hand, uses only system calls. Each tool then has to implement their own mutation algorithms, code coverage, results collection and so forth. Drivers enable Killerbeez to reuse all of these components and select how to interact with the target by simply selecting the appropriate driver. Abstracting this away allows for more exotic use cases, such as fuzzing \IOCTLs{}, network servers, network clients, \IPC{} such as Mach Messages, XPC, Distributed Objects, \COM{}, and others, all with minimal effort. The second thing drivers are responsible for is dealing with any target specific issues, such as handling GUI interactions. For example, if a PDF file with a malformed header is given to a PDF reader application, it typically will pop up a dialog box indicating that the file is corrupt. Fuzzers such as Honggfuzz or WinAFL\cite{winafl} would wait until a timeout expires.\footnote{WinAFL can exit at the end of a function, but dialog boxes tend to prevent that function from returning in practice.} This results in all executions which hit this code path to appear as a hung process. In Killerbeez, a driver could be written for the specific PDF reader which monitors the application for dialog boxes, detects when dialog boxes appear, analyzes the text of the dialog box and determines that the status is a clean exit rather than a hang. This would allow the fuzzer to move on to the next input more quickly, as it would know the input is done being processed and would not need to wait for the full timeout period. It also helps discern between a hang, which may indicate a denial of service vulnerability such as an infinite loop, and an error which is handled in the expected manner. For another example, see the Windows Media Player driver in section \ref{Driver}. Many of the drivers work on many fuzzing targets in a particular category. Targets which accept input from the network are handled by the Network Server driver module, programs which open a file are generally handled by the file driver and so forth. Other drivers can be written to handle things which are specific to particular pieces of software. Some targets will handle opening files differently if opened via double clicking an icon as compared to using the open option from the file menu. Other examples include error message analysis to determine if the system should move on to the next input, or if it should click ``OK'' and continue (e.g. in the event of a warning message). \subsubsection{Mutator} \label{Mutator Overview} Killerbeez also implements ``mutators,'' which are abstractions on modifying program input. They decide where to modify bytes in the input data, and how to modify them. Killerbeez uses a selection of user-selectable mutators. Parameters are passed to the mutator module via the driver, which control the operation of the mutator. For example, the bit flip mutator flips a parametrized number of bits throughout the entire input, one at a time. Modular mutators also enable trivial combination of different approaches. Using the multipart mutator, different mutators can be applied to different parts of the input. This is required for efficiently fuzzing network protocols, as it is often desirable to not mutate the initial packets as they may include a handshake or authentication. Any mutation to this section would prevent the majority of code from being executed, as the target software would execute an error path instead. It can also be used to ensure that the first few bytes in a file are not modified so the file will still be recognized as being the correct file type. \subsubsection{Instrumentation} \label{Instrumentation Overview} Instrumentation modules are responsible for tracking program execution and determining if an input has caused the target program to execute new code. How it does this, and what level of granularity is used, are questions left to the module author. There is an \IPT{} instrumentation module which is very high resolution. If a loop somewhere in the target is executed 178 times instead of 177 times, it will detect this as a new code path, as the state explored is different than what was seen before. The \AFL{} instrumentation module, on the other hand, would not consider this to be an input which causes the execution of new code. The \AFL{} instrumentation module uses a bucketing system that groups executions of the same code and considers anything which executes a portion of code 128-255 times to be equivalent.\cite{aflbucketing} Sometimes instrumentation modules need to interface with kernel drivers, which are implemented differently on different operating systems. For instance, the \IPT{} instrumentation module uses the perf subsystem on Linux, which is not available on macOS or Windows. Other instrumentation technologies, such as Intel's Pin~\cite{pin}, have a very similar interface across different operating systems, which means more of the code in the instrumentation module can be re-used, simply using \#ifdef directives if there are portions which are specific to a particular operating system. ================================================ FILE: docs/paper/packages.tex ================================================ \usepackage{enumitem} \usepackage[margin=0.75in]{geometry} \usepackage{graphicx} % Allow breaking urls at hyphens (https://tex.stackexchange.com/a/3034) \PassOptionsToPackage{hyphens}{url} \usepackage[hidelinks]{hyperref} \usepackage{multicol} ================================================ FILE: docs/paper/references.tex ================================================ \begin{thebibliography}{99} % two-digit numbers, max \bibitem{afl} Michal Zalewski. % Author \textit{American Fuzzy Lop}. % Title \url{http://lcamtuf.coredump.cx/afl/}. % URL \bibitem{vanhauser} Marc ``van Hauser'' Heuse. \textit{Collection of Patches to AFL}. \url{https://github.com/vanhauser-thc/afl-patches/}. \bibitem{aflosx} Ben Nagy. \textit{AFL on OSX}. \url{https://github.com/bnagy/osx-afl-llvm}. \bibitem{winafl} Ivan Fratric. \textit{WinAFL - Fork of AFL for Windows}. \url{https://github.com/ivanfratric/winafl} \bibitem{netafl} Maksim Shudrak. \textit{winAFL patch to enable network-based apps fuzzing}. \url{https://github.com/mxmssh/netafl} \bibitem{preeny} Yan Shoshitaishvili. \textit{Preeny: preload libraries for pwning stuff}. \url{https://github.com/zardus/preeny} \bibitem{boinc} University of California. \textit{Berkeley Open Infrastructure for Network Computing}. \url{https://boinc.berkeley.edu/}. \bibitem{peach22} Michael Eddington. \textit{Peach Fuzzer version 2.2}. \url{https://sourceforge.net/projects/peachfuzz/files/Peach/2.2/}. \bibitem{aflfast} Marcel B\"ohme, Van-Thuan Pham, Abhik Roychoudhury. \textit{Coverage-based Greybox Fuzzing as Markov Chain}. \url{https://www.comp.nus.edu.sg/~mboehme/paper/CCS16.pdf}. \bibitem{aflgo} Marcel B\"ohme, Van-Thuan Pham, Manh-Dung Nguyen, Abhik Roychoudhury. \textit{Directed Greybox Fuzzing}. \url{https://mboehme.github.io/paper/CCS17.pdf}. \bibitem{fairfuzz} Caroline Lemieux, Koushik Sen. \textit{FairFuzz: Targeting Rare Branches to Rapidly Increase Greybox Fuzz Testing Coverage}. \url{https://arxiv.org/pdf/1709.07101.pdf}. \bibitem{perffuzz} Caroline Lemieux, Rohan Padhye, Koushik Sen, Dawn Song. \textit{PerfFuzz: automatically generating pathological inputs}. \url{https://dl.acm.org/citation.cfm?doid=3213846.3213874}. \bibitem{pythia} Marcel B\"ohme. \textit{STADS: Software Testing as Species Discovery}. \url{https://mboehme.github.io/paper/TOSEM18.pdf}. \bibitem{collafl} Shuitao Gan, Chao Zhang, Xiaojun Qin, Xuwen Tu, Kang Li, Zhongyu Pei, Zuoning Chen. \textit{CollAFL: Path Sensitive Fuzzing}. \url{http://chao.100871.net/papers/oakland18.pdf}. \bibitem{syzkaller} Google. \textit{syzkaller - kernel fuzzer}. \url{https://github.com/google/syzkaller}. \bibitem{trinity} kernelslacker. \textit{Trinity - Linux system call fuzzer}. \url{https://github.com/kernelslacker/trinity}. \bibitem{osxfuzz} MWR Labs. \textit{macOS Kernel Fuzzer}. \url{https://github.com/mwrlabs/OSXFuzz}. \bibitem{ioctlfuzzer} eSage Lab. \textit{IOCTL Fuzzer}. \url{https://github.com/Cr4sh/ioctlfuzzer}. \bibitem{ioctlbf} Jeremy Brun. \textit{Windows Kernel Drivers fuzzer}. \url{https://github.com/koutto/ioctlbf}. \bibitem{ossfuzz} Google. \textit{OSS-Fuzz - Continuous Fuzzing for Open Source Software}. \url{https://github.com/google/oss-fuzz}. \bibitem{driller} Nick Stephens, John Grosen, Christopher Salls, Audrey Dutcher, Ruoyu Wang, Jacopo Corbetta, Yan Shoshitaishvili, Christopher Kruegel, Giovanni Vigna. \textit{Driller: Augmenting Fuzzing Through Selective Symbolic Execution}. \url{http://www.cs.ucsb.edu/~chris/research/doc/ndss16_driller.pdf}. \bibitem{boincmultihost} BOINC. \textit{Increasing Server Capacity}. \url{https://boinc.berkeley.edu/trac/wiki/MultiHost}. \bibitem{pin} Intel Corporation. \textit{Pin - A Dynamic Binary Instrumentation Tool}. \url{https://software.intel.com/en-us/articles/pin-a-dynamic-binary-instrumentation-tool}. \bibitem{ni} Aki Helin. \textit{Ni mutator}. \url{https://github.com/aoh/ni}. \bibitem{radamsa} Aki Helin. \textit{Radamsa - a general-purpose fuzzer}. \url{https://gitlab.com/akihe/radamsa}. \bibitem{radamsaresults} Aki Helin. \textit{Radamsa - Some Known Results}. \url{https://gitlab.com/akihe/radamsa/blob/master/README.md#some-known-results}. \bibitem{radamsatob} Trail of Bits. \textit{Grr Radamsa Modifications}. \url{https://github.com/trailofbits/grr/tree/master/third_party/radamsa}. \bibitem{radamsagrrproblems} Aki Helin. \textit{Grr Radamsa Modifications Comments}. \url{https://gitlab.com/akihe/radamsa/issues/28#note_77242061}. \bibitem{zzuf} Sam Hocevar. \textit{zzuf - general purpose fuzzer}. \url{https://github.com/samhocevar/zzuf}. \bibitem{synfuzz} Joe Rozner. \textit{Synfuzz - re-targetable grammar based test case generation}. \url{https://github.com/jrozner/synfuzz}. \bibitem{dyninst} The University of Wisconsin, University of Maryland. \textit{DyninstAPI: Tools for binary instrumentation, analysis, and modification}. \url{https://dyninst.org/}. \bibitem{brundlefuzz} Carlos Garcia Prado. \textit{BrundleFuzz - a distributed fuzzer for Windows and Linux using dynamic binary instrumentation}. \url{https://github.com/carlosgprado/BrundleFuzz}. \bibitem{angora} Peng Chen, Hao Chen. \textit{Angora: Efficient Fuzzing by Principled Search}. \url{https://angorafuzzer.github.io/}. \bibitem{intelptmanual} Intel Corporation. \textit{Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3C: System Programming Guide, Part 3, p248}. \url{https://software.intel.com/en-us/download/intel-64-and-ia-32-architectures-sdm-volume-3c-system-programming-guide-part-3}. \bibitem{libipt} Intel Corporation. \textit{libipt - an Intel(R) Processor Trace decoder library }. \url{https://github.com/01org/processor-trace}. \bibitem{winaflintelpt} Ivan Fratric, Richard Johnson. \textit{Fork of WinAFL}. \url{https://github.com/intelpt/winafl-intelpt/}. \bibitem{winaflcommit} Richard Johnson. \textit{Commit which appears to have removed IPT support}. \url{https://github.com/intelpt/winafl-intelpt/commit/d1e9e560bbaf4e56f6d6bd48672bf691097e86fa}. \bibitem{killerbeezipt} GRIMM. \textit{Killerbeez IPT Documentation}. \url{https://github.com/grimm-co/killerbeez/blob/master/docs/IPT.md}. \bibitem{qemuspeedup} Andrea Biondo. \textit{Improving AFL's QEMU mode performance}. \url{https://abiondo.me/2018/09/21/improving-afl-qemu-mode/}. \bibitem{qemu} \textit{QEMU}. \url{https://www.qemu.org/}. \bibitem{peach} Peach Tech. \textit{Peach Fuzzer}. \url{https://www.peach.tech/}. \bibitem{honggfuzz} Google. \textit{Honggfuzz - A security oriented, feedback-driven, evolutionary, easy-to-use fuzzer with interesting analysis options}. \url{http://honggfuzz.com/}. \bibitem{honggfuzzgrimm} GRIMM. \textit{Modified version of Honggfuzz which enables it to use Killerbeez mutator modules}. \url{https://github.com/grimm-co/honggfuzz}. \bibitem{anton} Anton Lindqvist. \textit{Fuzzing the OpenBSD kernel}. \url{https://www.openbsd.org/papers/fuzz-slides.pdf}. \bibitem{genome} Oulu University Secure Programming Group. \textit{PROTOS Protocol Genome Project}. \url{https://www.ee.oulu.fi/roles/ouspg/genome}. \bibitem{dynamo} \textit{DynamoRIO}. \url{http://www.dynamorio.org/}. \bibitem{kafl} Schumilo, Sergej and Aschermann, Cornelius and Gawlik, Robert and Schinzel, Sebastian and Holz, Thorsten. \textit{kAFL: Hardware-Assisted Feedback Fuzzing for OS Kernels}. \url{https://github.com/RUB-SysSec/kAFL}. \bibitem{vuzzer} Sanjay Rawat et al. \textit{VUzzer: Application-aware Evolutionary Fuzzing}. \url{https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/vuzzer-application-aware-evolutionary-fuzzing/}. \bibitem{boofuzz} Joshua Pereyda. \textit{boofuzz: Network Protocol Fuzzing for Humans}. \url{https://github.com/jtpereyda/boofuzz}. \bibitem{grimmdriller} GRIMM. \textit{Guided Fuzzing with Driller}. \url{https://blog.grimm-co.com/post/guided-fuzzing-with-driller/}. \bibitem{angrissues} Audrey Dutcher. \textit{Angr Real World Program Issue}. \url{https://github.com/shellphish/driller/issues/25#issuecomment-288253948}. \bibitem{fileformatfuzzing} Mateusz "j00ru" Jurczyk. \textit{Effective File Format Fuzzing}. \url{https://www.blackhat.com/docs/eu-16/materials/eu-16-Jurczyk-Effective-File-Format-Fuzzing-Thoughts-Techniques-And-Results.pdf}. \bibitem{softwaredumber} Tavis Ormandy. \textit{Making Software Dumber}. \url{http://taviso.decsystem.org/making_software_dumber.pdf}. \bibitem{aflbucketing} Michal Zalewski. \textit{Technical Whitepaper for afl-fuzz}. \url{http://lcamtuf.coredump.cx/afl/technical_details.txt}. \bibitem{iptoverhead} James Reinders. \textit{Processor Tracing}. \url{https://software.intel.com/en-us/blogs/2013/09/18/processor-tracing}. \bibitem{harnessingipt} Andrea Allievi and Richard Johnson. \textit{Harnessing Intel Processor Trace on Windows for Vulnerability Discovery}. \url{https://conference.hitb.org/hitbsecconf2017ams/materials/D1T1%20-%20Richard%20Johnson%20-%20Harnessing%20Intel%20Processor%20Trace%20on%20Windows%20for%20Vulnerability%20Discovery.pdf}. \bibitem{binutils} Free Software Foundation. \textit{GNU Binutils}. \url{https://www.gnu.org/software/binutils/}. \bibitem{windebugapi} Microsoft. \textit{Debugging Functions}. \url{https://docs.microsoft.com/en-us/windows/desktop/debug/debugging-functions}. \bibitem{adobereader} Adobe. \textit{Adobe Acrobat Reader DC}. \url{https://get.adobe.com/reader/}. \bibitem{rustfuzztrophy} Sergey ``Shnatsel'' Davidoff. \textit{Collection of bugs uncovered by fuzzing Rust code}. \url{https://github.com/rust-fuzz/trophy-case}. \bibitem{clusterfuzzrelease} Google. \textit{Open sourcing ClusterFuzz}. \url{https://opensource.googleblog.com/2019/02/open-sourcing-clusterfuzz.html}. %\bibitem{} % . % \textit{}. % \url{}. \end{thebibliography} ================================================ FILE: docs/paper/related_work.tex ================================================ There are projects which have addressed some of the aspects covered by Killerbeez such as platform independence, distributed fuzzing, leveraging existing tools, and so forth. Google's OSS-Fuzz\cite{ossfuzz} addresses scalability by running many fuzzers in parallel, as well as re-using existing code by leveraging tools like Honggfuzz to handle the actual fuzzing. The core fuzzing component of OSS-Fuzz, CluserFuzz, was unpublished and therefore unavailable to anyone outside of Google for the first eight years. On February 7, 2019, ClusterFuzz was released under the Apache Licence (v2).\cite{clusterfuzzrelease} Differences which still remain between OSS-Fuzz and Killerbeez include OSS-Fuzz requiring source code for the target software, and requiring that tests be written to integrate it into the overall system. This adds efficiency, as the test cases can eliminate code like GUI libraries, which is not the real target of the fuzzing, however it is unable to test closed source software. Had Clusterfuzz been open source in 2017, the authors likely would have attempted to extend OSS-Fuzz to also be able to fuzz closed source software and run on computers the user controls instead of using Google's cloud service. \AFL{}\cite{afl} is an open source fuzzer that uses coverage data and genetic algorithms to automatically discover interesting test cases in a target. \AFL{} was designed to be practical; it has a low overhead, is easy to use, and works against real-world software. As such, \AFL{} has become one of the most popular fuzzers and many research projects investigate how to improve \AFL{}. Killerbeez borrows many features from \AFL{}, such as the compiler and QEMU instrumentations. While Killerbeez has also borrowed \AFL{}'s mutation strategy, it does not currently include \AFL{}'s mutations which mutate based on coverage data. Coverage data is used to avoid wasting time mutating portions of the input that the target does not process. These mutations will be incorporated into Killerbeez in the future. While \AFL{} is a great local fuzzer, it is not easily distributed and cannot easily manage the fuzzing of more complex targets. \AFL{} does not support applications on Windows, and the \AFL{} fork which does support Windows, WinAFL, lacks many of the features of \AFL{}. Peach Fuzzer\cite{peach} now supports distributed fuzzing, modular mutators, and modules for launching apps, is able to do both file and network-based fuzzing, and does work on closed source applications. However, the distributed aspect is only available in the proprietary version of the fuzzer; it does not exist in the community edition, which is open source. There is also no feedback loop for code coverage in either edition. Instead, the input format needs to be manually described in XML files, as does the model for the program state. To alleviate this problem, the company behind Peach Fuzzer is willing to sell access to the definitions they have created. Honggfuzz\cite{honggfuzz} is an open source fuzzer which runs on Windows, Linux, macOS, Android, FreeBSD and NetBSD, all using a single code base. It can handle closed source applications, long-running applications such as servers, and will automatically use multiple CPU cores to do fuzzing in parallel. Modularity is achieved by allowing an external program to do the mutation of inputs. While Honggfuzz scales nicely on a single machine, it does not have any built in mechanism to utilize multiple machines. Using Honggfuzz in a larger framework such as OSS-Fuzz takes care of this limitation. In fact, Honggfuzz is a fuzzer which will be likely integrated into Killerbeez in the future, as described in section \ref{Future Work}. Honggfuzz has more types of instrumentation than any other fuzzer, including Killerbeez at the time of writing, however none of these work on Windows. The \BTS{} and \IPT{} instrumentations are only for Linux, as is the hardware-based counters instrumentation which tracks the number of instructions and branches which were executed. There is also compile time instrumentation, but this is only helpful in the case where source code is available, and it can be compiled by GCC or LLVM. It is possible to compile some C/C++ code for windows using LLVM, but anything which requires Microsoft Visual Studio to be compiled will not have any instrumentation. Supporting Windows does not seem to be a priority for Honggfuzz, as it cannot be compiled natively, but only via Cygwin. Honggfuzz is a great tool, which is why a modified version of it was created which can use all of the Killerbeez modules.\cite{honggfuzzgrimm} Section \ref{Future Work} describes the Linux instrumentation technologies from Honggfuzz planned for integration with Killerbeez. If it is feasible to add the ability to use Killerbeez instrumentation modules, that is another contribution which will be made to the Honggfuzz project. ================================================ FILE: driver/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (driver) SET(DRIVER_SRC ${PROJECT_SOURCE_DIR}/driver.c ${PROJECT_SOURCE_DIR}/driver_factory.c ${PROJECT_SOURCE_DIR}/file_driver.c ${PROJECT_SOURCE_DIR}/stdin_driver.c ${PROJECT_SOURCE_DIR}/network_server_driver.c ${PROJECT_SOURCE_DIR}/network_client_driver.c ) if (WIN32) set(DRIVER_SRC ${DRIVER_SRC} ${PROJECT_SOURCE_DIR}/wmp_driver.cpp ) endif(WIN32) source_group("Library Sources" FILES ${DRIVER_SRC}) add_library(driver OBJECT ${DRIVER_SRC}) target_compile_definitions(driver PUBLIC DRIVER_NO_IMPORT) target_include_directories(driver PUBLIC ${PROJECT_SOURCE_DIR}/../instrumentation/) ================================================ FILE: driver/driver.c ================================================ #include #include #include #include #include #include "driver.h" #include #ifndef _WIN32 #include #include #include #include #endif /** * Waits for a fuzzed process to be finished processing the input, either via timing out or the * process exiting. * @param process - a HANDLE to the fuzzed process * @param timeout - The maximum number of seconds to wait before declaring the process done * @param instrumentation - used to access `is_process_done` and `get_fuzz_result` * @param instrumentation_state - arguments for `is_process_done` and `get_fuzz_result` * @return - FUZZ_HANG or FUZZ_ result (from get_fuzz_result) */ #ifdef _WIN32 int generic_wait_for_process_completion(HANDLE process, int timeout, instrumentation_t * instrumentation, void * instrumentation_state) #else int generic_wait_for_process_completion(pid_t process, int timeout, instrumentation_t * instrumentation, void * instrumentation_state) #endif { time_t start_time = time(NULL); int process_done = 0; while(1) { process_done = instrumentation->is_process_done(instrumentation_state); if (process_done == 1) return instrumentation->get_fuzz_result(instrumentation_state); else if (process_done == -1) return FUZZ_ERROR; // if it's zero, the process is not done, so keep looping // timeout if (time(NULL) - start_time > timeout) return FUZZ_HANG; // FUZZ_HANG isn't ever set in the instrumentation, which isn't great. // could be solved by adding a set_fuzz_result function to the API. I // am unaware of any API constraints that require it at this time. A // potential issue I forsee is that a second get_fuzz_result would // incorrectly report FUZZ_RUNNING. #ifdef _WIN32 Sleep(5); #else usleep(5*1000); #endif } } /** * This function will call mutate on the given mutator state to modify the mutator buffer * and then, if the mutation succeeds, call the given test_input function with the mutated * buffer * @param state - a driver specific structure previously created by the driver's create function * @param mutator - the mutator to call to obtain a mutated input buffer * @param mutator_state - the state of the mutator given in the mutator parameter * @param buffer - the buffer to write the mutated input to * @param buffer_length - the length of the buffer parameter * @param test_input_func - the test_input function to call after mutating the input buffer * @param mutate_last_size - this parameter is used to return the size of the mutated input buffer * @return - FUZZ_CRASH, FUZZ_HANG, or FUZZ_NONE on success, FUZZ_ERROR on error, -2 if the mutator has finished generating inputs */ int generic_test_next_input(void * state, mutator_t * mutator, void * mutator_state, char * buffer, size_t buffer_length, int (*test_input_func)(void * driver_state, char * buffer, size_t length), int * mutate_last_size) { if (!mutator) { ERROR_MSG("Mutator module missing!"); return -1; } DEBUG_MSG("Mutating input..."); *mutate_last_size = mutator->mutate(mutator_state, buffer, buffer_length); if (*mutate_last_size < 0) return -1; else if (*mutate_last_size == 0) return -2; return test_input_func(state, buffer, *mutate_last_size); } /** * This function allocates a buffer to be used for holding the mutated input that a driver will * to the target program. * @param ratio - The desired ratio of mutate buffer size to input size. * @param input_length - The size of the input buffer * @param buffer - a pointer to a buffer pointer, used to return the allocated buffer * @param length - a pointer to a size_t variable, used to return the allocated buffer's length * @return - zero on success, non-zero on failure */ int setup_mutate_buffer(double ratio, size_t input_length, char ** buffer, size_t * length) { size_t output_size; char * output_buffer; output_size = (size_t)(input_length * ratio); if (!output_size) return 1; output_buffer = malloc(output_size); if (!output_buffer) return 1; *buffer = output_buffer; *length = output_size; return 0; } /** * This function sends the provided buffer on the already connected TCP socket * @param sock - a pointer to a connected TCP SOCKET to send the buffer on * @param buffer - the buffer to send * @param length - the length of the buffer parameter * @return - non-zero on error, zero on success */ #ifdef _WIN32 int send_tcp_input(SOCKET * sock, char * buffer, size_t length) #else int send_tcp_input(int * sock, char * buffer, size_t length) #endif { int result; size_t total_read = 0; result = 1; while (total_read < length && result > 0) { result = send(*sock, buffer + total_read, length - total_read, 0); if (result > 0) total_read += result; else if (result < 0) //Error, then break { #ifdef _WIN32 // Here in the network drivers, we have a little bit more // information than you'd get in a stdin/file driver. // We want to pass up information that we terminated after n packets. // We have a few possible send() results inside this loop. // send tells us ERROR and // - we retry // - it counts as success, do not retry // - it is a proper error, stop int error_code = WSAGetLastError(); ERROR_MSG("send() failed with error: %d", error_code); // (10053) the client unexpectedly terminated if (error_code == 10053) return -2; // TODO: this -2 should be #define FUZZ_UNUSED_PART or something similar // Currently this is checked in network_client_run() #else ERROR_MSG("send() failed with error: %d", errno); // TODO: Write error-checks #endif } } // This currently assumes that failing to write all our input is an // error, which may not always be the case. return total_read != length; } ================================================ FILE: driver/driver.h ================================================ #pragma once #ifdef _WIN32 #include #else #include // pid_t #endif #include #include #ifdef DRIVER_EXPORTS #define DRIVER_API __declspec(dllexport) #elif defined(DRIVER_NO_IMPORT) #define DRIVER_API #else #define DRIVER_API __declspec(dllimport) #endif #ifdef __cplusplus #define FUNC_PREFIX extern "C" #else #define FUNC_PREFIX #endif struct driver { void (*cleanup)(void * driver_state); int (*test_input)(void * driver_state, char * buffer, size_t length); int (*test_next_input)(void * driver_state); char *(*get_last_input)(void * driver_state, int * length); void * state; }; typedef struct driver driver_t; #ifdef _WIN32 FUNC_PREFIX int generic_wait_for_process_completion(HANDLE process, int timeout, instrumentation_t * instrumentation, void * instrumentation_state); #else FUNC_PREFIX int generic_wait_for_process_completion(pid_t process, int timeout, instrumentation_t * instrumentation, void * instrumentation_state); #endif FUNC_PREFIX int generic_test_next_input(void * state, mutator_t * mutator, void * mutator_state, char * buffer, size_t buffer_length, int(*test_input_func)(void * driver_state, char * buffer, size_t length), int * mutate_last_size); FUNC_PREFIX int setup_mutate_buffer(double ratio, size_t input_length, char ** buffer, size_t * length); #ifdef _WIN32 FUNC_PREFIX int send_tcp_input(SOCKET * sock, char * buffer, size_t length); #else FUNC_PREFIX int send_tcp_input(int * sock, char * buffer, size_t length); #endif ================================================ FILE: driver/driver_factory.c ================================================ #include #include #include #include "instrumentation.h" #include "driver_factory.h" #include "driver.h" // for driver_t #include "file_driver.h" #include "stdin_driver.h" #include "network_server_driver.h" #include "network_client_driver.h" #ifdef _WIN32 #include "wmp_driver.h" #endif #define FACTORY_ERROR() { free(ret); return NULL; } /** * This function obtains a driver_t object by calling the driver specified by driver_type's create method. * @param driver_type - the name of the driver that should be created. * @param options - a JSON string that contains the driver specific string of options * @return - a driver_t object of the specified type on success or NULL on failure */ DRIVER_API driver_t * driver_factory(char * driver_type, char * options) { return driver_all_factory(driver_type, options, NULL, NULL, NULL, NULL); } /** * This function obtains a driver_t object by calling the driver specified by driver_type's create method. * @param driver_type - the name of the driver that should be created. * @param options - a JSON string that contains the driver specific string of options * @param instrumentation - optionally, a pointer to an instrumentation instance that the driver will use * to instrument the requested program. This instrumentation instance should already be initialized. * @param instrumentation_state - a pointer to the instrumentation state for the passed in instrumentation * @return - a driver_t object of the specified type on success or NULL on failure */ DRIVER_API driver_t * driver_instrumentation_factory(char * driver_type, char * options, instrumentation_t * instrumentation, void * instrumentation_state) { return driver_all_factory(driver_type, options, instrumentation, instrumentation_state, NULL, NULL); } /** * This function obtains a driver_t object by calling the driver specified by driver_type's create method. * @param driver_type - the name of the driver that should be created. * @param options - a JSON string that contains the driver specific string of options * @param mutator - optionally, a pointer to a mutator instance that the driver will use * to obtain input when fuzzing the requested program. This mutator instance should already be initialized. * @param mutator_state - a pointer to the mutator state for the passed in mutator * @return - a driver_t object of the specified type on success or NULL on failure */ DRIVER_API driver_t * driver_mutator_factory(char * driver_type, char * options, mutator_t * mutator, void * mutator_state) { return driver_all_factory(driver_type, options, NULL, NULL, mutator, mutator_state); } /** * This function obtains a driver_t object by calling the driver specified by driver_type's create method. * @param driver_type - the name of the driver that should be created. * @param options - a JSON string that contains the driver specific string of options * @param instrumentation - optionally, a pointer to an instrumentation instance that the driver will use * to instrument the requested program. This instrumentation instance should already be initialized. * @param instrumentation_state - a pointer to the instrumentation state for the passed in instrumentation * @param mutator - optionally, a pointer to a mutator instance that the driver will use * to obtain input when fuzzing the requested program. This mutator instance should already be initialized. * @param mutator_state - a pointer to the mutator state for the passed in mutator * @return - a driver_t object of the specified type on success or NULL on failure */ DRIVER_API driver_t * driver_all_factory(char * driver_type, char * options, instrumentation_t * instrumentation, void * instrumentation_state, mutator_t * mutator, void * mutator_state) { driver_t * ret = (driver_t *)malloc(sizeof(driver_t)); if (!strcmp(driver_type, "file")) { ret->state = file_create(options, instrumentation, instrumentation_state, mutator, mutator_state); if (!ret->state) FACTORY_ERROR(); ret->cleanup = file_cleanup; ret->test_input = file_test_input; ret->test_next_input = file_test_next_input; ret->get_last_input = file_get_last_input; } else if (!strcmp(driver_type, "stdin")) { ret->state = stdin_create(options, instrumentation, instrumentation_state, mutator, mutator_state); if (!ret->state) FACTORY_ERROR(); ret->cleanup = stdin_cleanup; ret->test_input = stdin_test_input; ret->test_next_input = stdin_test_next_input; ret->get_last_input = stdin_get_last_input; } else if (!strcmp(driver_type, "network_server")) { ret->state = network_server_create(options, instrumentation, instrumentation_state, mutator, mutator_state); if (!ret->state) FACTORY_ERROR(); ret->cleanup = network_server_cleanup; ret->test_input = network_server_test_input; ret->test_next_input = network_server_test_next_input; ret->get_last_input = network_server_get_last_input; } else if (!strcmp(driver_type, "network_client")) { ret->state = network_client_create(options, instrumentation, instrumentation_state, mutator, mutator_state); if (!ret->state) { puts("Factory Error"); FACTORY_ERROR(); } ret->cleanup = network_client_cleanup; ret->test_input = network_client_test_input; ret->test_next_input = network_client_test_next_input; ret->get_last_input = network_client_get_last_input; } #ifdef _WIN32 else if (!strcmp(driver_type, "wmp")) { ret->state = wmp_create(options, instrumentation, instrumentation_state, mutator, mutator_state); if (!ret->state) FACTORY_ERROR(); ret->cleanup = wmp_cleanup; ret->test_input = wmp_test_input; ret->test_next_input = wmp_test_next_input; ret->get_last_input = wmp_get_last_input; } #endif else FACTORY_ERROR(); return ret; } #define APPEND_HELP(text, new_text, func) \ if(!func(&new_text)) { \ text = (char *)realloc(text, strlen(text) + strlen(new_text) + 1); \ strcat(text, new_text); \ free(new_text); \ } /** * This function returns help text for all available drivers. This help text will describe the drivers and any options * that can be passed to their create functions. * @return - a newly allocated string containing the help text. */ DRIVER_API char * driver_help(void) { char * text, *new_text; text = strdup("Driver Options:\n\n"); APPEND_HELP(text, new_text, file_help); APPEND_HELP(text, new_text, stdin_help); APPEND_HELP(text, new_text, network_server_help); APPEND_HELP(text, new_text, network_client_help); #ifdef _WIN32 APPEND_HELP(text, new_text, wmp_help); #endif return text; } ================================================ FILE: driver/driver_factory.h ================================================ #pragma once #include "driver.h" #include #include DRIVER_API driver_t * driver_factory(char * driver_type, char * options); DRIVER_API driver_t * driver_instrumentation_factory(char * driver_type, char * options, instrumentation_t * instrumentation, void * instrumentation_state); DRIVER_API driver_t * driver_mutator_factory(char * driver_type, char * options, mutator_t * mutator, void * mutator_state); DRIVER_API driver_t * driver_all_factory(char * driver_type, char * options, instrumentation_t * instrumentation, void * instrumentation_state, mutator_t * mutator, void * mutator_state); DRIVER_API char * driver_help(void); ================================================ FILE: driver/file_driver.c ================================================ #include "file_driver.h" #include #include #include #include "driver.h" // IWYU pragma: keep //c headers #include #include #ifdef _WIN32 #include #include #else #include // memset #include // unlink #endif /** * This function creates a file_state_t object based on the given options. * @param options - A JSON string of the options to set in the new file_state_t. See the * help function for more information on the specific options available. * @return - the file_state_t generated from the options in the JSON options string, or NULL on failure */ static file_state_t * setup_options(char * options) { file_state_t * state; size_t cmd_length; state = (file_state_t *)malloc(sizeof(file_state_t)); if (!state) return NULL; memset(state, 0, sizeof(file_state_t)); //Setup defaults state->timeout = 2; state->extension = strdup(".dat"); state->input_ratio = 2.0; //Parse the options PARSE_OPTION_STRING(state, options, path, "path", file_cleanup); PARSE_OPTION_STRING(state, options, test_filename, "filename", file_cleanup); PARSE_OPTION_STRING(state, options, arguments, "arguments", file_cleanup); PARSE_OPTION_STRING(state, options, extension, "extension", file_cleanup); PARSE_OPTION_INT(state, options, timeout, "timeout", file_cleanup); PARSE_OPTION_DOUBLE(state, options, input_ratio, "ratio", file_cleanup); if (!state->path || !file_exists(state->path) || state->input_ratio <= 0) { if(!state->path) { FATAL_MSG("Failed to load file driver: path to executable missing"); } file_cleanup(state); return NULL; } //If the user didn't specify a test filename to if(!state->test_filename) {//write the fuzz data to, generate a test filename now if(!state->arguments || !strstr(state->arguments, "@@")) { ERROR_MSG("Test filename not specified and the target program's arguments do not include the test filename " "symbol (\"@@\"). The target program will not be able to receive the mutated input data."); ERROR_MSG("Use the \"arguments\" or \"filename\" options to pass the mutated input to the target program"); file_cleanup(state); return NULL; } state->test_filename = get_temp_filename(state->extension); } if (state->arguments) { int filename_length = strlen(state->test_filename); char * new_arguments, *pos, *temp; pos = new_arguments = state->arguments; while (*pos != 0) { // replace the "@@" in the arguments with the temp filename if (*pos == '@' && *(pos + 1) == '@') { int index = pos - new_arguments; size_t temp_size = (filename_length - 2) + strlen(new_arguments) + 1; temp = (char *)malloc(temp_size); memset(temp, 0, temp_size); memcpy(temp, new_arguments, index); memcpy(temp + index, state->test_filename, filename_length); memcpy(temp + index + filename_length, pos + 2, strlen(new_arguments) - (index + 2)); free(new_arguments); new_arguments = temp; pos = new_arguments + index + filename_length; } else pos++; } state->arguments = new_arguments; } cmd_length = (state->path ? strlen(state->path) : 0) + (state->arguments ? strlen(state->arguments) : 0) + 2; state->cmd_line = (char *)malloc(cmd_length); if (!state->cmd_line) { file_cleanup(state); return NULL; } snprintf(state->cmd_line, cmd_length, "%s %s", state->path, state->arguments ? state->arguments : ""); return state; } /** * This function allocates and initializes a new driver specific state object based on the given options. * @param options - a JSON string that contains the driver specific string of options * @param instrumentation - a pointer to an instrumentation instance that the driver will use * to instrument the requested program. This instrumentation instance should already be initialized. * @param instrumentation_state - a pointer to the instrumentation state for the passed in instrumentation * @return - a driver specific state object on success or NULL on failure */ void * file_create(char * options, instrumentation_t * instrumentation, void * instrumentation_state, mutator_t * mutator, void * mutator_state) { file_state_t * state; int num_inputs; size_t *input_sizes; //This driver requires at least the path to the program to run. Make sure we either have both a mutator and state if (!options || !strlen(options) || (mutator && !mutator_state) || (!mutator && mutator_state)) //or neither { if (!options) { FATAL_MSG("Options are required for the file driver"); } return NULL; } state = setup_options(options); if (!state) return NULL; if (mutator) { mutator->get_input_info(mutator_state, &num_inputs, &input_sizes); if (num_inputs != 1 || setup_mutate_buffer(state->input_ratio, input_sizes[0], &state->mutate_buffer, &state->mutate_buffer_length)) { free(input_sizes); file_cleanup(state); return NULL; } free(input_sizes); } state->mutator = mutator; state->mutator_state = mutator_state; state->mutate_last_size = -1; state->instrumentation = instrumentation; state->instrumentation_state = instrumentation_state; return state; } /** * This function cleans up all resources with the passed in driver state. * @param driver_state - a driver specific state object previously created by the file_create function * This state object should not be referenced after this function returns. */ void file_cleanup(void * driver_state) { file_state_t * state = (file_state_t *)driver_state; free(state->mutate_buffer); free(state->path); free(state->extension); free(state->arguments); free(state->cmd_line); if (state->test_filename) { unlink(state->test_filename); free(state->test_filename); } free(state); } /** * This function will run the fuzzed program and test it with the given input. This function * blocks until the program has finished processing the input. * @param driver_state - a driver specific structure previously created by the file_create function * @param input - the input that should be tested * @param length - the length of the input parameter * @return - FUZZ_ result on success or FUZZ_ERROR on failure */ int file_test_input(void * driver_state, char * input, size_t length) { file_state_t * state = (file_state_t *)driver_state; //Write the input to disk DEBUG_MSG("Writing input to disk..."); write_buffer_to_file(state->test_filename, input, length); //Start the process and give it our input DEBUG_MSG("Enabling instrumentation module..."); if(state->instrumentation->enable(state->instrumentation_state, &state->process, state->cmd_line, NULL, 0)) return FUZZ_ERROR; //Wait for it to be done, return the termination termination status return generic_wait_for_process_completion(state->process, state->timeout, state->instrumentation, state->instrumentation_state); } /** * This function will run the fuzzed program with the output of the mutator given during driver * creation. This function blocks until the program has finished processing the input. * @param driver_state - a driver specific structure previously created by the file_create function * @return - FUZZ_ result on success, FUZZ_ERROR on error, -2 if the mutator has finished generating inputs */ int file_test_next_input(void * driver_state) { file_state_t * state = (file_state_t *)driver_state; return generic_test_next_input(state, state->mutator, state->mutator_state, state->mutate_buffer, state->mutate_buffer_length, file_test_input, &state->mutate_last_size); } /** * When this driver is using a mutator given to it during driver creation, this function retrieves * the last input that was tested with the file_test_next_input function. * @param driver_state - a driver specific structure previously created by the file_create function * @param length - a pointer to an integer used to return the length of the input that was last tested. * @return - NULL on error or if the driver doesn't have a mutator, or a buffer containing the last input * that was tested by the driver with the file_test_next_input function. This buffer should be freed * by the caller. */ char * file_get_last_input(void * driver_state, int * length) { file_state_t * state = (file_state_t *)driver_state; if (!state->mutator || state->mutate_last_size <= 0) return NULL; *length = state->mutate_last_size; return memdup(state->mutate_buffer, state->mutate_last_size); } /** * This function returns help text for this driver. This help text will describe the driver and any options * that can be passed to file_create. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ int file_help(char ** help_str) { *help_str = strdup( "file - Writes mutated input to a file, that the target process uses\n" "Required Options:\n" " path The path to the target process\n" "Optional Options:\n" " arguments Arguments to pass to the target process, with the\n" " target filename specified as @@\n" " extension The file extension to give the test file\n" " filename The filename to give the test file\n" " ratio The ratio of mutation buffer size to input size when\n" " given a mutator\n" " timeout The maximum number of seconds to wait for the target\n" " process to finish\n" "\n" ); if (*help_str == NULL) return -1; return 0; } ================================================ FILE: driver/file_driver.h ================================================ #pragma once #include "instrumentation.h" #include // for pid_t #include "global_types.h" // for mutator_t void * file_create(char * options, instrumentation_t * instrumentation, void * instrumentation_state, mutator_t * mutator, void * mutator_state); void file_cleanup(void * driver_state); int file_test_input(void * driver_state, char * buffer, size_t length); int file_test_next_input(void * driver_state); char * file_get_last_input(void * driver_state, int * length); int file_help(char ** help_str); struct file_state { //Options char * path; //The path to the fuzzed executable char * arguments; //Arguments to give the binary char * extension; //The file extension of the input files to the fuzzed process int timeout; //Maximum number of seconds to allow the executable to run char * test_filename; //The filename that we're going to write our test input to double input_ratio; //the ratio of the maximum input size //The handle to the fuzzed process instance #ifdef _WIN32 HANDLE process; #else pid_t process; #endif //command line of the fuzzed process char * cmd_line; //The instrumentation module instrumentation_t * instrumentation; //The instrumentation's state void * instrumentation_state; mutator_t * mutator; void * mutator_state; char * mutate_buffer; size_t mutate_buffer_length; int mutate_last_size; }; typedef struct file_state file_state_t; ================================================ FILE: driver/network_client_driver.c ================================================ #include "network_client_driver.h" #include #include #include #include "driver.h" //c headers #include #include #include #ifdef _WIN32 #include #include #include #else #include #include #include #include #include #include #include #include #include #define INVALID_SOCKET -1 #define SOCKET_ERROR -1 #endif /** * This function creates a network_client_state_t object based on the given options. * @param options - A JSON string of the options to set in the new network_client_state_t. See the * help function for more information on the specific options available. * @return - the network_client_state_t generated from the options in the JSON options string, or NULL on failure */ static network_client_state_t * setup_options(char * options) { network_client_state_t * state; size_t cmd_length; state = (network_client_state_t *)malloc(sizeof(network_client_state_t)); if (!state) return NULL; memset(state, 0, sizeof(network_client_state_t)); //Setup defaults state->timeout = 2; state->input_ratio = 2.0; state->lport = 9999; state->target_ip = strdup("127.0.0.1"); //Parse the options PARSE_OPTION_STRING(state, options, path, "path", network_client_cleanup); PARSE_OPTION_STRING(state, options, arguments, "arguments", network_client_cleanup); PARSE_OPTION_INT(state, options, timeout, "timeout", network_client_cleanup); PARSE_OPTION_INT(state, options, lport, "port", network_client_cleanup); PARSE_OPTION_STRING(state, options, target_ip, "ip", network_client_cleanup); PARSE_OPTION_DOUBLE(state, options, input_ratio, "ratio", network_client_cleanup); PARSE_OPTION_INT_ARRAY(state, options, sleeps, sleeps_count, "sleeps", network_client_cleanup); cmd_length = (state->path ? strlen(state->path) : 0) + (state->arguments ? strlen(state->arguments) : 0) + 4; state->cmd_line = (char *)malloc(cmd_length); memset(state->cmd_line, 0, cmd_length); if (!state->path || !state->cmd_line || !file_exists(state->path) || !state->target_ip || !state->lport || state->input_ratio <= 0) { network_client_cleanup(state); return NULL; } //Build the cmd line snprintf(state->cmd_line, cmd_length, "\"%s\" %s", state->path, state->arguments ? state->arguments : ""); return state; } /** * This function allocates and initializes a new driver specific state object based on the given options. * @param options - a JSON string that contains the driver specific string of options * @param instrumentation - a pointer to an instrumentation instance that the driver will use * to instrument the requested program. This instrumentation instance should already be initialized. * @param instrumentation_state - a pointer to the instrumentation state for the passed in instrumentation * @return - a driver specific state object on success or NULL on failure */ void * network_client_create(char * options, instrumentation_t * instrumentation, void * instrumentation_state, mutator_t * mutator, void * mutator_state) { #ifdef _WIN32 WSADATA wsaData; #endif network_client_state_t * state; int i; //Make sure we either have both a mutator and state if (!options || !strlen(options) || (mutator && !mutator_state) || (!mutator && mutator_state)) { //or neither FATAL_MSG("ERROR: Missing driver options"); return NULL; } #ifdef _WIN32 // Startup because we're going to use the winsock DLL if (WSAStartup(MAKEWORD(2, 2), &wsaData)) { ERROR_MSG("WSAStartup Failed"); return NULL; } #endif state = setup_options(options); if (!state) return NULL; if (mutator) { mutator->get_input_info(mutator_state, &state->num_inputs, &state->mutate_buffer_lengths); //size of sleeps array and inputs must be equal if (state->sleeps && state->num_inputs != state->sleeps_count) { network_client_cleanup(state); return NULL; } //Setup the mutate buffers //allocate space for the array of mutate buffers state->mutate_buffers = malloc(sizeof(char *) * state->num_inputs); if (!state->mutate_buffers) { network_client_cleanup(state); return NULL; } memset(state->mutate_buffers, 0, sizeof(char *) * state->num_inputs); //Allocate space for the array containing the sizes the mutate buffers state->mutate_last_sizes = malloc(sizeof(size_t) * state->num_inputs); if (!state->mutate_last_sizes) { network_client_cleanup(state); return NULL; } memset(state->mutate_last_sizes, 0, sizeof(char *) * state->num_inputs); //populate the array of mutate buffers for (i = 0; i < state->num_inputs; i++) { if (setup_mutate_buffer(state->input_ratio, state->mutate_buffer_lengths[i], &state->mutate_buffers[i], &state->mutate_buffer_lengths[i])) { network_client_cleanup(state); return NULL; } } state->mutator = mutator; state->mutator_state = mutator_state; } state->instrumentation = instrumentation; state->instrumentation_state = instrumentation_state; return state; } /** * This function cleans up all resources with the passed in driver state. * @param driver_state - a driver specific state object previously created by the network_client_create function * This state object should not be referenced after this function returns. */ void network_client_cleanup(void * driver_state) { network_client_state_t * state = (network_client_state_t *)driver_state; int i; //Cleanup mutator stuff for (i = 0; state->mutate_buffers && i < state->num_inputs; i++) free(state->mutate_buffers[i]); free(state->mutate_buffers); free(state->mutate_buffer_lengths); free(state->mutate_last_sizes); //Clean up driver specific options free(state->path); free(state->arguments); free(state->cmd_line); free(state->target_ip); free(state->sleeps); //Clean up the struct holding it all free(state); } /** * This function creates a socket and waits for a client to connect. * @param state - the network_client_state_t object that represents the current state of the driver * @param sock - a pointer to a SOCKET used to return the created socket * @return - FUZZ_ERROR error, zero on success */ #ifdef _WIN32 static int start_listener(network_client_state_t * state, SOCKET * sock) #else static int start_listener(network_client_state_t * state, int * sock) #endif { struct sockaddr_in addr; int iResult = 0; *sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (*sock == INVALID_SOCKET) { #ifdef _WIN32 ERROR_MSG("socket function failed with error: %ld", WSAGetLastError()); #else ERROR_MSG("socket function failed with error: %d", errno); #endif return FUZZ_ERROR; } #ifndef _WIN32 // Linux // Set SO_REUSEADDR so you reuse the address instead of waiting for a minute. // https://stackoverflow.com/a/24194999 int enable = 1; if (setsockopt(*sock, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)) < 0) FATAL_MSG("setsockopt failed.\n"); #endif //Create socket (TCP Only right now) addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr(state->target_ip); addr.sin_port = htons(state->lport); //Now bind to the socket iResult = bind(*sock, (const struct sockaddr *)& addr, sizeof(addr)); if (iResult == SOCKET_ERROR) { #ifdef _WIN32 ERROR_MSG("Socket failed to bind to port. Error code: %d", WSAGetLastError()); iResult = closesocket(*sock); if (iResult == SOCKET_ERROR) ERROR_MSG("closesocket function failed with error %d", WSAGetLastError()); #else ERROR_MSG("Socket failed to bind to port. Error code: %d", errno); iResult = close(*sock); if (iResult == SOCKET_ERROR) ERROR_MSG("closesocket function failed with error %d", errno); #endif return FUZZ_ERROR; } //Now put the socket into LISTEN state if (listen(*sock, SOMAXCONN) == SOCKET_ERROR) { #ifdef _WIN32 ERROR_MSG("listen function failed with error: %d", WSAGetLastError()); return FUZZ_ERROR; #else ERROR_MSG("listen function failed with error: %d", errno); return FUZZ_ERROR; #endif } return FUZZ_NONE; } /** * This function will run the fuzzed program and test it with the given inputs. This function * blocks until the program has finished processing the input. * @param state - the network_client_state_t object that represents the current state of the driver * @param inputs - an array of inputs to send to the program * @param lengths - an array of lengths for the buffers in the inputs parameter * @param inputs_count - the number of buffers in the inputs parameter * @return - FUZZ_NONE, FUZZ_HANG, FUZZ_CRASH on success or FUZZ_ERROR on failure */ static int network_client_run(network_client_state_t * state, char ** inputs, size_t * lengths, size_t inputs_count) { #ifdef _WIN32 SOCKET serverSock; SOCKET clientSock; #else int serverSock; int clientSock; #endif size_t i; int sock_ret; //Start the server socket so the client can connect below: if (start_listener(state, &serverSock)) { return FUZZ_ERROR; } //Have the instrumentation start the new process, since it needs to do so in a custom environment state->instrumentation->enable(state->instrumentation_state, &state->process, state->cmd_line, NULL, 0); //Now accept the client connection clientSock = accept(serverSock, NULL, NULL); if (clientSock == INVALID_SOCKET) { #ifdef _WIN32 FATAL_MSG("accept() failed with error: %d\n", WSAGetLastError()); #else FATAL_MSG("accept() failed with error: %d\n", errno); #endif return FUZZ_ERROR; } #ifdef _WIN32 closesocket(serverSock); #else close(serverSock); #endif for (i = 0; i < inputs_count; i++) { if (state->sleeps && state->sleeps[i] != 0) #ifdef _WIN32 Sleep(state->sleeps[i]); #else usleep(1000*state->sleeps[i]); #endif sock_ret = send_tcp_input(&clientSock, inputs[i], lengths[i]); if (sock_ret) { if (sock_ret == -2) { WARNING_MSG("Client terminated connection before all packets " "were sent, %d of %d packets sent", i, inputs_count); break; } return FUZZ_ERROR; } } #ifdef _WIN32 closesocket(clientSock); #else close(clientSock); #endif //Wait for it to be done return generic_wait_for_process_completion(state->process, state->timeout, state->instrumentation, state->instrumentation_state); } /** * This function will run the fuzzed program and test it with the given input. * This function blocks until the program has finished processing the input. * @param driver_state - a driver specific structure previously created by the * network_client_create function * @param input - the input that should be tested * @param length - the length of the input parameter * @return - FUZZ_NONE, FUZZ_HANG, FUZZ_CRASH on success or FUZZ_ERROR on failure */ int network_client_test_input(void * driver_state, char * input, size_t length) { network_client_state_t * state = (network_client_state_t *)driver_state; char ** inputs; size_t * input_lengths; size_t i, inputs_count; int ret = FUZZ_ERROR; if (decode_mem_array(input, &inputs, &input_lengths, &inputs_count) == 0) { if (inputs_count) ret = network_client_run(state, inputs, input_lengths, inputs_count); //clean up time for (i = 0; i < inputs_count; i++) free(inputs[i]); free(inputs); free(input_lengths); } return ret; } /** * This function will run the fuzzed program with the output of the mutator given during driver * creation. This function blocks until the program has finished processing the input. * @param driver_state - a driver specific structure previously created by the network_client_create function * @return - FUZZ_NONE, FUZZ_HANG, FUZZ_CRASH on success, FUZZ_ERROR on error, or -2 if the mutator has * finished generating inputs */ int network_client_test_next_input(void * driver_state) { network_client_state_t * state = (network_client_state_t *)driver_state; int i, ret; if (!state->mutator) return FUZZ_ERROR; memset(state->mutate_last_sizes, 0, sizeof(int) * state->num_inputs); for (i = 0; i < state->num_inputs; i++) { ret = state->mutator->mutate_extended(state->mutator_state, state->mutate_buffers[i], state->mutate_buffer_lengths[i], MUTATE_MULTIPLE_INPUTS | i); if (ret < 0) return FUZZ_ERROR; if (ret == 0) return -2; state->mutate_last_sizes[i] = (size_t)ret; } return network_client_run(state, state->mutate_buffers, state->mutate_last_sizes, state->num_inputs); } /** * When this driver is using a mutator given to it during driver creation, this function retrieves * the last input that was tested with the network_client_test_next_input function. * @param driver_state - a driver specific structure previously created by the network_client_create function * @param length - a pointer to an integer used to return the length of the input that was last tested. * @return - NULL on error or if the driver doesn't have a mutator, or a buffer containing the last input * that was tested by the driver with the network_client_test_next_input function. This buffer should be freed * by the caller. */ char * network_client_get_last_input(void * driver_state, int * length) { network_client_state_t * state = (network_client_state_t *)driver_state; int i; if (!state->mutate_buffers) return NULL; for (i = 0; i < state->num_inputs; i++) { // If network_client_test_next_input has not been called or failed to mutate the // input, there could be no input to return // Assumption: mutate_last_size should never be set to 0 in correct // operation, only if it wasn't proper loaded with the mutate array // sizes. if (state->mutate_last_sizes[i] == 0) return NULL; } return encode_mem_array(state->mutate_buffers, state->mutate_last_sizes, state->num_inputs, length); } /** * This function returns help text for this driver. This help text will describe the driver and any options * that can be passed to network_client_create. * @return - a newly allocated string containing the help text. */ int network_client_help(char ** help_str) { *help_str = strdup( "network_client - fuzzes clients by acting as a server\n" "Required Options:\n" " path The path to the exe\n" " arguments Arguments to pass to the target process\n" "Optional Options:\n" " timeout The maximum number of seconds to wait\n" " for the target process to finish\n" " ratio The ratio of mutation buffer size to\n" " input size when given a mutator\n" " ip The target IP to connect to\n" " port The target port to connect to\n" " ratio The ratio of mutation buffer size to input\n" " size when given a mutator\n" " sleeps An array of milliseconds to wait between each\n" " input being sent to the target program\n" "\n" ); if (*help_str == NULL) return -1; return 0; } ================================================ FILE: driver/network_client_driver.h ================================================ #pragma once #include "driver.h" #include "instrumentation.h" #include #ifndef _WIN32 // Linux #include // pid_t #endif void * network_client_create(char * options, instrumentation_t * instrumentation, void * instrumentation_state, mutator_t * mutator, void * mutator_state); void network_client_cleanup(void * driver_state); int network_client_test_input(void * driver_state, char * buffer, size_t length); int network_client_test_next_input(void * driver_state); char * network_client_get_last_input(void * driver_state, int * length); int network_client_help(char ** help_str); struct network_client_state { //Options char * path; //The path to the fuzzed executable char * arguments; //Arguments to give the binary int timeout; //Maximum number of seconds to allow the executable to run char * target_ip; //The IP address to send the fuzzed data to int lport; //The port to send the fuzzed data to double input_ratio; //the ratio of the maximum input size int * sleeps; //How many milliseconds to sleep between inputs int sleeps_count; //The number of items in the sleeps array //The handle to the fuzzed process instance #ifdef _WIN32 HANDLE process; #else pid_t process; #endif //command line of the fuzzed process char * cmd_line; //The instrumentation module instrumentation_t * instrumentation; //The instrumentation's state void * instrumentation_state; mutator_t * mutator; void * mutator_state; // it'd be nice if this could be size_t, but mutator function // get_input_info requires ints. int num_inputs; char ** mutate_buffers; size_t * mutate_buffer_lengths; size_t * mutate_last_sizes; }; typedef struct network_client_state network_client_state_t; ================================================ FILE: driver/network_server_driver.c ================================================ #include "network_server_driver.h" #include #include #include #include "driver.h" //c headers #include #include #include #include #ifdef _WIN32 #include #include #include #else #include #include #include #if __APPLE__ #include #include #include #include #else #include #include #endif // __APPLE__ #endif /** * This function creates a network_server_state_t object based on the given options. * @param options - A JSON string of the options to set in the new network_server_state_t. See the * help function for more information on the specific options available. * @return - the network_server_state_t generated from the options in the JSON options string, or NULL on failure */ static network_server_state_t * setup_options(char * options) { network_server_state_t * state; size_t cmd_length; state = (network_server_state_t *)malloc(sizeof(network_server_state_t)); if (!state) return NULL; memset(state, 0, sizeof(network_server_state_t)); //Setup defaults state->timeout = 2; state->input_ratio = 2.0; //Parse the options PARSE_OPTION_STRING(state, options, path, "path", network_server_cleanup); PARSE_OPTION_STRING(state, options, arguments, "arguments", network_server_cleanup); PARSE_OPTION_INT(state, options, timeout, "timeout", network_server_cleanup); PARSE_OPTION_INT(state, options, target_port, "port", network_server_cleanup); PARSE_OPTION_STRING(state, options, target_ip, "ip", network_server_cleanup); PARSE_OPTION_INT(state, options, target_udp, "udp", network_server_cleanup); PARSE_OPTION_INT(state, options, skip_network_check, "skip_network_check", network_server_cleanup); PARSE_OPTION_DOUBLE(state, options, input_ratio, "ratio", network_server_cleanup); PARSE_OPTION_INT_ARRAY(state, options, sleeps, sleeps_count, "sleeps", network_server_cleanup); cmd_length = (state->path ? strlen(state->path) : 0) + (state->arguments ? strlen(state->arguments) : 0) + 2; state->cmd_line = (char *)malloc(cmd_length); if (!state->path || !state->cmd_line || !file_exists(state->path) || !state->target_ip || !state->target_port || state->input_ratio <= 0) { network_server_cleanup(state); return NULL; } snprintf(state->cmd_line, cmd_length, "%s %s", state->path, state->arguments ? state->arguments : ""); return state; } /** * This function cleans up all resources with the passed in driver state. * @param driver_state - a driver specific state object previously created by the network_server_create function * This state object should not be referenced after this function returns. */ void network_server_cleanup(void * driver_state) { network_server_state_t * state = (network_server_state_t *)driver_state; int i; //Cleanup mutator stuff for(i = 0; state->mutate_buffers && i < state->num_inputs; i++) free(state->mutate_buffers[i]); free(state->mutate_buffers); free(state->mutate_buffer_lengths); free(state->mutate_last_sizes); //Clean up driver specific options free(state->path); free(state->arguments); free(state->cmd_line); free(state->target_ip); free(state->sleeps); //Clean up the struct holding it all free(state); } /** * This function allocates and initializes a new driver specific state object based on the given options. * @param options - a JSON string that contains the driver specific string of options * @param instrumentation - a pointer to an instrumentation instance that the driver will use * to instrument the requested program. This instrumentation instance should already be initialized. * @param instrumentation_state - a pointer to the instrumentation state for the passed in instrumentation * @return - a driver specific state object on success or NULL on failure */ void * network_server_create(char * options, instrumentation_t * instrumentation, void * instrumentation_state, mutator_t * mutator, void * mutator_state) { #ifdef _WIN32 WSADATA wsaData; #endif network_server_state_t * state; size_t i; //This driver requires at least the path to the program to run. Make sure we either have both a mutator and state if (!options || !strlen(options) || (mutator && !mutator_state) || (!mutator && mutator_state)) //or neither return NULL; #ifdef _WIN32 if (WSAStartup(MAKEWORD(2, 2), &wsaData)) { ERROR_MSG("WSAStartup Failed\n"); return NULL; } #endif state = setup_options(options); if (!state) return NULL; if (mutator) { mutator->get_input_info(mutator_state, &state->num_inputs, &state->mutate_buffer_lengths); if (state->sleeps && state->num_inputs != state->sleeps_count) { network_server_cleanup(state); return NULL; } state->mutate_buffers = malloc(sizeof(char *) * state->num_inputs); if (!state->mutate_buffers) { network_server_cleanup(state); return NULL; } //Setup the mutate buffers state->mutate_buffers = malloc(sizeof(char *) * state->num_inputs); state->mutate_last_sizes = malloc(sizeof(size_t) * state->num_inputs); memset(state->mutate_buffers, 0, sizeof(char *) * state->num_inputs); memset(state->mutate_last_sizes, 0, sizeof(size_t) * state->num_inputs); for (i = 0; i < state->num_inputs; i++) { if(setup_mutate_buffer(state->input_ratio, state->mutate_buffer_lengths[i], &state->mutate_buffers[i], &state->mutate_buffer_lengths[i])) { network_server_cleanup(state); return NULL; } } state->mutator = mutator; state->mutator_state = mutator_state; } state->instrumentation = instrumentation; state->instrumentation_state = instrumentation_state; return state; } /** * This function creates a socket and (when using TCP) connects it to the fuzzed program. * @param state - the network_server_state_t object that represents the current state of the driver * @param sock - a pointer to a SOCKET used to return the created socket * @return - non-zero on error, zero on success */ #ifdef _WIN32 static int connect_to_target(network_server_state_t * state, SOCKET * sock) #else static int connect_to_target(network_server_state_t * state, int * sock) #endif { struct sockaddr_in addr; if(state->target_udp) *sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); else *sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); #ifdef _WIN32 if (*sock == INVALID_SOCKET) #else if (*sock == -1) #endif return 1; if (!state->target_udp) { addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr(state->target_ip); addr.sin_port = htons(state->target_port); #ifdef _WIN32 if (connect(*sock, (const struct sockaddr *)&addr, sizeof(addr)) == SOCKET_ERROR) { closesocket(*sock); #else if (connect(*sock, (const struct sockaddr *)&addr, sizeof(addr)) == -1) { close(*sock); #endif return 1; } } return 0; } /** * This function sends the provided buffer on the UDP socket * @param state - the network_server_state_t object that represents the current state of the driver * @param sock - a pointer to a UDP SOCKET to send the buffer on * @param buffer - the buffer to send * @param length - the length of the buffer parameter * @return - non-zero on error, zero on success */ #ifdef _WIN32 static int send_udp_input(network_server_state_t * state, SOCKET * sock, char * buffer, size_t length) #else static int send_udp_input(network_server_state_t * state, int * sock, char * buffer, size_t length) #endif { struct sockaddr_in addr; addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr(state->target_ip); addr.sin_port = htons(state->target_port); #ifdef _WIN32 if (sendto(*sock, buffer, length, 0, (const struct sockaddr *)&addr, sizeof(addr)) == SOCKET_ERROR) #else if (sendto(*sock, buffer, length, 0, (const struct sockaddr *)&addr, sizeof(addr)) == -1) #endif return 1; return 0; } /** * This function determines if there is a program listening on the specified port on the local computer * @param port - the port number to check * @param udp - whether the specified port is udp (1) or tcp (0) * @return - 1 if the port is listening, 0 if the port is not listening, or -1 on error */ static int is_port_listening(int port, int udp) { #ifdef _WIN32 MIB_TCPTABLE * tcp_table; MIB_UDPTABLE * udp_table; DWORD i, size = 0; if (udp) { if (GetUdpTable(NULL, &size, TRUE) != ERROR_INSUFFICIENT_BUFFER) return -1; udp_table = malloc(size); if (!udp_table) return -1; if (GetUdpTable(udp_table, &size, TRUE) != NO_ERROR) { free(udp_table); return -1; } for (i = 0; i < udp_table->dwNumEntries; i++) { if (udp_table->table[i].dwLocalPort == htons(port)) { free(udp_table); return 1; } } free(udp_table); } else { if (GetTcpTable(NULL, &size, TRUE) != ERROR_INSUFFICIENT_BUFFER) return -1; tcp_table = malloc(size); if (!tcp_table) return -1; if (GetTcpTable(tcp_table, &size, TRUE) != NO_ERROR) { free(tcp_table); return -1; } for (i = 0; i < tcp_table->dwNumEntries; i++) { if (tcp_table->table[i].dwState == MIB_TCP_STATE_LISTEN && tcp_table->table[i].dwLocalPort == htons(port)) { free(tcp_table); return 1; } } free(tcp_table); } #elif __APPLE__ char ctl[] = "net.inet.tcp.pcblist"; char *buf, *entry; struct xtcpcb *tcp_entry; size_t len; uint32_t port_n = htons(port); if (sysctlbyname(ctl, NULL, &len, NULL, 0) == -1) // check to get length of data { perror("sysctlbyname failed to get length"); return -1; } buf = malloc(len); // malloc some space for it if (buf == NULL) { perror("malloc"); return -1; } if (sysctlbyname(ctl, buf, &len, NULL, 0) == -1) { perror("sysctlbyname"); free(buf); return -1; } #define ENTRY_LEN(entry) (((struct xtcpcb *)(entry))->xt_len) // buf is an array of length-prepended table entries, potentially of different kinds entry = buf; // skip first entry, it defines generation rather than a connection entry += ENTRY_LEN(entry); while (ENTRY_LEN(entry) == sizeof(struct xtcpcb)) { tcp_entry = (struct xtcpcb *)entry; if (tcp_entry->xt_socket.xso_protocol == IPPROTO_TCP && tcp_entry->xt_tp.t_state == TCPS_LISTEN && tcp_entry->xt_inp.inp_lport == port_n) { free(buf); return 1; } entry += ENTRY_LEN(entry); } free(buf); #undef ENTRY_LEN #else // Linux char line[250]; FILE * tcp_info = fopen("/proc/net/tcp","r"); int num, port_from_proc; if (tcp_info == NULL) FATAL_MSG("Failed to open /proc/net/tcp"); // Would it be faster to directly fscanf here, instead of reading output // into a buffer and then scanf'ing that? while(fgets(line, 250, tcp_info)) { // skip header line if(!strncmp(line, " sl", 4) != 0) continue; // read in: #: (ip in hex):(port), ignore the rest // throw away the (ip in hex) since we don't need it sscanf(line, "%d: %*[A-Fa-f0-9]:%X", &num, &port_from_proc); if (port == port_from_proc) return 1; } fclose(tcp_info); #endif return 0; } /** * This function will run the fuzzed program and test it with the given inputs. This function * blocks until the program has finished processing the input. * @param state - the network_server_state_t object that represents the current state of the driver * @param inputs - an array of inputs to send to the program * @param lengths - an array of lengths for the buffers in the inputs parameter * @param inputs_count - the number of buffers in the inputs parameter * @return - FUZZ_ result on success or FUZZ_ERROR on failure */ static int network_server_run(network_server_state_t * state, char ** inputs, size_t * lengths, size_t inputs_count) { #ifdef _WIN32 SOCKET sock; #else int sock; #endif size_t i; int listening = 0; //Start the process and give it our input if(state->instrumentation->enable(state->instrumentation_state, &state->process, state->cmd_line, NULL, 0)) return FUZZ_ERROR; //Wait for the port to be listening while (!state->skip_network_check && listening == 0) { listening = is_port_listening(state->target_port, state->target_udp); if(listening == 0) #ifdef _WIN32 Sleep(5); #else usleep(5*1000); #endif } if(listening < 0) return FUZZ_ERROR; if (connect_to_target(state, &sock)) // opens socket return FUZZ_ERROR; for (i = 0; i < inputs_count; i++) { if (state->sleeps && state->sleeps[i] != 0) #ifdef _WIN32 Sleep(state->sleeps[i]); #else usleep(1000*state->sleeps[i]); #endif if ((state->target_udp && send_udp_input(state, &sock, inputs[i], lengths[i])) || (!state->target_udp && send_tcp_input(&sock, inputs[i], lengths[i]))) { #ifdef _WIN32 closesocket(sock); #else close(sock); #endif return FUZZ_ERROR; } } #ifdef _WIN32 closesocket(sock); #else close(sock); #endif //Wait for it to be done and return FUZZ_ result return generic_wait_for_process_completion(state->process, state->timeout, state->instrumentation, state->instrumentation_state); } static void network_server_test_input_cleanup(char ** inputs, size_t inputs_count, size_t * input_lengths) { for (size_t i = 0; i < inputs_count; i++) free(inputs[i]); free(inputs); free(input_lengths); } /** * This function will run the fuzzed program and test it with the given input. This function * blocks until the program has finished processing the input. * @param driver_state - a driver specific structure previously created by the network_server_create function * @param input - the input that should be tested * @param length - the length of the input parameter * @return - FUZZ_ result on success or FUZZ_ERROR on failure */ int network_server_test_input(void * driver_state, char * input, size_t length) { network_server_state_t * state = (network_server_state_t *)driver_state; char ** inputs; size_t * input_lengths; size_t inputs_count; int network_server_run_result = FUZZ_ERROR; if (decode_mem_array(input, &inputs, &input_lengths, &inputs_count)) return FUZZ_ERROR; if (inputs_count) { network_server_run_result = network_server_run(state, inputs, input_lengths, inputs_count); if (network_server_run_result == FUZZ_ERROR) { network_server_test_input_cleanup(inputs, inputs_count, input_lengths); return FUZZ_ERROR; } } network_server_test_input_cleanup(inputs, inputs_count, input_lengths); return network_server_run_result; } /** * This function will run the fuzzed program with the output of the mutator given during driver * creation. This function blocks until the program has finished processing the input. * @param driver_state - a driver specific structure previously created by the network_server_create function * @return - FUZZ_ result on success, FUZZ_ERROR on error, -2 if the mutator has finished generating inputs */ int network_server_test_next_input(void * driver_state) { network_server_state_t * state = (network_server_state_t *)driver_state; int i, ret; int network_server_run_result = FUZZ_ERROR; if (!state->mutator) return FUZZ_ERROR; memset(state->mutate_last_sizes, 0, sizeof(int) * state->num_inputs); for (i = 0; i < state->num_inputs; i++) { ret = state->mutator->mutate_extended(state->mutator_state, state->mutate_buffers[i], state->mutate_buffer_lengths[i], MUTATE_MULTIPLE_INPUTS | i); if (ret < 0) return FUZZ_ERROR; else if (ret == 0) return -2; state->mutate_last_sizes[i] = (size_t)ret; } network_server_run_result = network_server_run(state, state->mutate_buffers, state->mutate_last_sizes, state->num_inputs); return network_server_run_result; } /** * When this driver is using a mutator given to it during driver creation, this function retrieves * the last input that was tested with the network_server_test_next_input function. * @param driver_state - a driver specific structure previously created by the network_server_create function * @param length - a pointer to an integer used to return the length of the input that was last tested. * @return - NULL on error or if the driver doesn't have a mutator, or a buffer containing the last input * that was tested by the driver with the network_server_test_next_input function. This buffer should be freed * by the caller. */ char * network_server_get_last_input(void * driver_state, int * length) { network_server_state_t * state = (network_server_state_t *)driver_state; int i; if (!state->mutate_buffers) return NULL; for (i = 0; i < state->num_inputs; i++) { // If network_server_test_next_input has not been called or failed to mutate the // input, there could be no input to return // Assumption: mutate_last_size should never be set to 0 in correct // operation, only if it wasn't proper loaded with the mutate array // sizes. if (state->mutate_last_sizes[i] == 0) return NULL; } return encode_mem_array(state->mutate_buffers, state->mutate_last_sizes, state->num_inputs, length); } /** * This function returns help text for this driver. This help text will describe the driver and any options * that can be passed to network_server_create. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ int network_server_help(char ** help_str) { *help_str = strdup( "network_server - Fuzzes server-like applications by sending input over the network\n" "Required Options:\n" " ip The target IP to connect to\n" " path The path to the target process\n" " port The target port to connect to\n" "Optional Options:\n" " arguments Arguments to pass to the target process\n" " timeout The maximum number of seconds to wait for the target\n" " process to finish\n" " ratio The ratio of mutation buffer size to input size when\n" " given a mutator\n" " skip_network_check Whether or not to wait for the specified port to be\n" " listening on the localhost prior to connecting to\n" " the target program\n" " sleeps An array of milliseconds to wait between each input\n" " being sent to the target program\n" " udp Whether the fuzzed input should be sent to the target\n" " program on UDP (1) or TCP (0)\n" "\n" ); if (*help_str == NULL) return -1; return 0; } ================================================ FILE: driver/network_server_driver.h ================================================ #pragma once #include "driver.h" #include #include #ifndef _WIN32 // Linux #include // pid_t #endif void * network_server_create(char * options, instrumentation_t * instrumentation, void * instrumentation_state, mutator_t * mutator, void * mutator_state); void network_server_cleanup(void * driver_state); int network_server_test_input(void * driver_state, char * buffer, size_t length); int network_server_test_next_input(void * driver_state); char * network_server_get_last_input(void * driver_state, int * length); int network_server_help(char ** help_str); struct network_server_state { //Options char * path; //The path to the fuzzed executable char * arguments; //Arguments to give the binary int timeout; //Maximum number of seconds to allow the executable to run char * target_ip; //The IP address to send the fuzzed data to int target_port; //The port to send the fuzzed data to int target_udp; //Is the driver hitting a udp port (1) or tcp port (0) int skip_network_check; //Don't wait for the target_port to be listening double input_ratio; //the ratio of the maximum input size int * sleeps; //How many milliseconds to sleep between inputs int sleeps_count; //The number of items in the sleeps array //The handle to the fuzzed process instance #ifdef _WIN32 HANDLE process; #else pid_t process; #endif //command line of the fuzzed process char * cmd_line; //The instrumentation module instrumentation_t * instrumentation; //The instrumentation's state void * instrumentation_state; mutator_t * mutator; void * mutator_state; // it'd be nice if this could be size_t, but mutator function // get_input_info requires ints. int num_inputs; char ** mutate_buffers; size_t * mutate_buffer_lengths; size_t * mutate_last_sizes; }; typedef struct network_server_state network_server_state_t; ================================================ FILE: driver/stdin_driver.c ================================================ #include "stdin_driver.h" #include // for mutator_t #include #include #include #include "driver.h" // IWYU pragma: keep //c headers #include #include #ifdef _WIN32 //Windows API #include #include #else // linux #include // memset, strlen #endif /** * This function creates a stdin_state_t object based on the given options. * @param options - A JSON string of the options to set in the new stdin_state_t. See the * help function for more information on the specific options available. * @return - the stdin_state_t generated from the options in the JSON options string, or NULL on failure */ static stdin_state_t * setup_options(char * options) { stdin_state_t * state; size_t cmd_length; state = (stdin_state_t *)malloc(sizeof(stdin_state_t)); if (!state) return NULL; memset(state, 0, sizeof(stdin_state_t)); //Setup defaults state->timeout = 2; state->input_ratio = 2.0; //Parse the options PARSE_OPTION_STRING(state, options, path, "path", stdin_cleanup); PARSE_OPTION_STRING(state, options, arguments, "arguments", stdin_cleanup); PARSE_OPTION_INT(state, options, timeout, "timeout", stdin_cleanup); PARSE_OPTION_DOUBLE(state, options, input_ratio, "ratio", stdin_cleanup); cmd_length = (state->path ? strlen(state->path) : 0) + (state->arguments ? strlen(state->arguments) : 0) + 2; state->cmd_line = (char *)malloc(cmd_length); //Validate the options if (!state->path || !state->cmd_line || !file_exists(state->path) || state->input_ratio <= 0) { stdin_cleanup(state); return NULL; } snprintf(state->cmd_line, cmd_length, "%s %s", state->path, state->arguments ? state->arguments : ""); return state; } /** * This function allocates and initializes a new driver specific state object based on the given options. * @param options - a JSON string that contains the driver specific string of options * @param instrumentation - a pointer to an instrumentation instance that the driver will use * to instrument the requested program. This instrumentation instance should already be initialized. * @param instrumentation_state - a pointer to the instrumentation state for the passed in instrumentation * @return - a driver specific state object on success or NULL on failure */ void * stdin_create(char * options, instrumentation_t * instrumentation, void * instrumentation_state, mutator_t * mutator, void * mutator_state) { stdin_state_t * state; int num_inputs; size_t *input_sizes; //This driver requires at least the path to the program to run. Make sure we either have both a mutator and state if (!options || !strlen(options) || (mutator && !mutator_state) || (!mutator && mutator_state)) //or neither return NULL; state = setup_options(options); if (!state) return NULL; if (mutator) { mutator->get_input_info(mutator_state, &num_inputs, &input_sizes); if (num_inputs != 1 || setup_mutate_buffer(state->input_ratio, input_sizes[0], &state->mutate_buffer, &state->mutate_buffer_length)) { free(input_sizes); stdin_cleanup(state); return NULL; } free(input_sizes); } state->mutator = mutator; state->mutator_state = mutator_state; state->mutate_last_size = -1; state->instrumentation = instrumentation; state->instrumentation_state = instrumentation_state; return state; } /** * This function cleans up all resources with the passed in driver state. * @param driver_state - a driver specific state object previously created by the stdin_create function * This state object should not be referenced after this function returns. */ void stdin_cleanup(void * driver_state) { stdin_state_t * state = (stdin_state_t *)driver_state; free(state->mutate_buffer); free(state->path); free(state->arguments); free(state->cmd_line); free(state); } /** * This function will run the fuzzed program and test it with the given input. This function * blocks until the program has finished processing the input. * @param driver_state - a driver specific structure previously created by the stdin_create function * @param input - the input that should be tested * @param length - the length of the input parameter * @return - FUZZ_ on success or FUZZ_ERROR on failure */ int stdin_test_input(void * driver_state, char * input, size_t length) { stdin_state_t * state = (stdin_state_t *)driver_state; //Start the process and give it our input if(state->instrumentation->enable(state->instrumentation_state, &state->process, state->cmd_line, input, length)) return FUZZ_ERROR; //Wait for it to be done return generic_wait_for_process_completion(state->process, state->timeout, state->instrumentation, state->instrumentation_state); } /** * This function will run the fuzzed program with the output of the mutator given during driver * creation. This function blocks until the program has finished processing the input. * @param driver_state - a driver specific structure previously created by the stdin_create function * @return - FUZZ_ result on success, FUZZ_ERROR on error, -2 if the mutator has finished generating inputs */ int stdin_test_next_input(void * driver_state) { stdin_state_t * state = (stdin_state_t *)driver_state; return generic_test_next_input(state, state->mutator, state->mutator_state, state->mutate_buffer, state->mutate_buffer_length, stdin_test_input, &state->mutate_last_size); } /** * When this driver is using a mutator given to it during driver creation, this function retrieves * the last input that was tested with the stdin_test_next_input function. * @param driver_state - a driver specific structure previously created by the stdin_create function * @param length - a pointer to an integer used to return the length of the input that was last tested. * @return - NULL on error or if the driver doesn't have a mutator, or a buffer containing the last input * that was tested by the driver with the stdin_test_next_input function. This buffer should be freed * by the caller. */ char * stdin_get_last_input(void * driver_state, int * length) { stdin_state_t * state = (stdin_state_t *)driver_state; if (!state->mutator || state->mutate_last_size <= 0) return NULL; *length = state->mutate_last_size; return memdup(state->mutate_buffer, state->mutate_last_size); } /** * This function returns help text for this driver. This help text will describe the driver and any options * that can be passed to stdin_create. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ int stdin_help(char ** help_str) { *help_str = strdup( "stdin - Sends mutated input to the STDIN of the target process\n" "Required Options:\n" " path The path to the target process\n" "Optional Options:\n" " arguments Arguments to pass to the target process\n" " ratio The ratio of mutation buffer size to input size when\n"" given a mutator\n" " timeout The maximum number of seconds to wait for the target\n" " process to finish\n" "\n" ); if (*help_str == NULL) return -1; return 0; } ================================================ FILE: driver/stdin_driver.h ================================================ #pragma once #include "instrumentation.h" #include #include // for pid_t void * stdin_create(char * options, instrumentation_t * instrumentation, void * instrumentation_state, mutator_t * mutator, void * mutator_state); void stdin_cleanup(void * driver_state); int stdin_test_input(void * driver_state, char * buffer, size_t length); int stdin_test_next_input(void * driver_state); char * stdin_get_last_input(void * driver_state, int * length); int stdin_help(char ** help_str); struct stdin_state { //Options char * path; //The path to the fuzzed executable char * arguments; //Arguments to give the binary int timeout; //Maximum number of seconds to allow the executable to run double input_ratio; //the ratio of the maximum input size //The handle to the fuzzed process instance #ifdef _WIN32 HANDLE process; #else pid_t process; #endif //command line of the fuzzed process char * cmd_line; //The instrumentation module instrumentation_t * instrumentation; //The instrumentation's state void * instrumentation_state; mutator_t * mutator; void * mutator_state; char * mutate_buffer; size_t mutate_buffer_length; int mutate_last_size; }; typedef struct stdin_state stdin_state_t; ================================================ FILE: driver/wmp_driver.cpp ================================================ #include "wmp_driver.h" #include #include #include #include "driver.h" #include //c headers #include #include #include //Windows API #include #include #include #include static int is_playing_sound(); /** * This function creates a wmp_state_t object based on the given options. * @param options - A JSON string of the options to set in the new wmp_state_t. See the * help function for more information on the specific options available. * @return - the wmp_state_t generated from the options in the JSON options string, or NULL on failure */ static wmp_state_t * setup_options(char * options) { wmp_state_t * state; size_t cmd_length; state = (wmp_state_t *)malloc(sizeof(wmp_state_t)); if (!state) return NULL; memset(state, 0, sizeof(wmp_state_t)); //Setup defaults state->extension = strdup(".aac"); //strdup'd so we can uniformly free it later state->path = strdup("C:\\Program Files (x86)\\Windows Media Player\\wmplayer.exe"); //strdup'd so we can uniformly free it later state->timeout = 2; state->input_ratio = 2.0; if (options && strlen(options)) { PARSE_OPTION_STRING(state, options, path, "path", wmp_cleanup); PARSE_OPTION_STRING(state, options, extension, "extension", wmp_cleanup); PARSE_OPTION_INT(state, options, timeout, "timeout", wmp_cleanup); PARSE_OPTION_DOUBLE(state, options, input_ratio, "ratio", wmp_cleanup); } //Create a test filename to write the fuzz file to state->test_filename = get_temp_filename(state->extension); cmd_length = strlen(state->path) + strlen(state->test_filename) + 10; state->cmd_line = (char *)malloc(cmd_length); if (!state->cmd_line) { wmp_cleanup(state); return NULL; } snprintf(state->cmd_line, cmd_length, "\"%s\" /play %s", state->path, state->test_filename); return state; } /** * This function allocates and initializes a new driver specific state object based on the given options. * @param options - a JSON string that contains the driver specific string of options * @param instrumentation - a pointer to an instrumentation instance that the driver will use * to instrument the requested program. This instrumentation instance should already be initialized. * @param instrumentation_state - a pointer to the instrumentation state for the passed in instrumentation * @return - A driver specific state object on success or NULL on failure */ void * wmp_create(char * options, instrumentation_t * instrumentation, void * instrumentation_state, mutator_t * mutator, void * mutator_state) { wmp_state_t * state; int num_inputs; size_t *input_sizes; state = setup_options(options); if (!state) return NULL; //We need to call this before we make WINAPI calls to get the audio device below CoInitialize(NULL); if (mutator) { mutator->get_input_info(mutator_state, &num_inputs, &input_sizes); if (num_inputs != 1 || setup_mutate_buffer(state->input_ratio, input_sizes[0], &state->mutate_buffer, &state->mutate_buffer_length)) { free(input_sizes); wmp_cleanup(state); return NULL; } free(input_sizes); } state->mutator = mutator; state->mutator_state = mutator_state; state->mutate_last_size = -1; state->instrumentation = instrumentation; state->instrumentation_state = instrumentation_state; return state; } /** * This function cleans up all resources with the passed in driver state. * @param driver_state - a driver specific state object previously created by the wmp_create function * This state object should not be referenced after this function returns. */ void wmp_cleanup(void * driver_state) { wmp_state_t * state = (wmp_state_t *)driver_state; free(state->mutate_buffer); free(state->path); free(state->extension); free(state->cmd_line); if (state->test_filename) { unlink(state->test_filename); free(state->test_filename); } free(state); } /** * This function will run wmplayer.exe and test it with the given input. This function * blocks until the wmplayer.exe has finished processing the input. * @param driver_state - a driver specific structure previously created by the wmp_create function * @param input - the input that should be tested * @param length - the length of the input parameter * @return - FUZZ_ result on success or FUZZ_ERROR on failure */ int wmp_test_input(void * driver_state, char * input, size_t length) { wmp_state_t * state = (wmp_state_t *)driver_state; //Write the input to disk write_buffer_to_file(state->test_filename, input, length); //Start the process and give it our input if(state->instrumentation->enable(state->instrumentation_state, &state->process, state->cmd_line, NULL, 0)) return FUZZ_ERROR; time_t start_time = time(NULL); int tmp_result = FUZZ_ERROR; // This is reimplementing the loop in generic_wait_for_process // completion, because we want to do an additional check: // `is_playing_sound`, which can end early. // // We assume that if WMP is playing sound, it has successfully // processed input, which means that we won't get a crash. while (1) { tmp_result = state->instrumentation->is_process_done(state->instrumentation_state); if (tmp_result == 1) // process is done, it crashed or exited cleanly { // so fetch the result from the instrumentation return state->instrumentation->get_fuzz_result(state->instrumentation_state); } else if (tmp_result == -1) { return FUZZ_ERROR; } // else it's still running, so do our other checks // WMP is playing sound, so we don't expect a crash and can end // this fuzz round. if (is_playing_sound()) return FUZZ_NONE; if (time(NULL) - start_time > state->timeout) return FUZZ_HANG; // If we're stuck in a modal dialog we're "hung". Works for debug, but not dynamorio if (IsProcessInModalDialog(GetProcessId(state->process))) return FUZZ_HANG; Sleep(50); } // We should never get here, because we should take one of the return // statements in the above loop. return FUZZ_ERROR; } /** * This function will run the fuzzed program with the output of the mutator given during driver * creation. This function blocks until the program has finished processing the input. * @param driver_state - a driver specific structure previously created by the wmp_create function * @return - FUZZ_ result on success, FUZZ_ERROR on error, -2 if the mutator has finished generating inputs */ int wmp_test_next_input(void * driver_state) { wmp_state_t * state = (wmp_state_t *)driver_state; return generic_test_next_input(state, state->mutator, state->mutator_state, state->mutate_buffer, state->mutate_buffer_length, wmp_test_input, &state->mutate_last_size); } /** * When this driver is using a mutator given to it during driver creation, this function retrieves * the last input that was tested with the wmp_test_next_input function. * @param driver_state - a driver specific structure previously created by the wmp_create function * @param length - a pointer to an integer used to return the length of the input that was last tested. * @return - NULL on error or if the driver doesn't have a mutator, or a buffer containing the last input * that was tested by the driver with the wmp_test_next_input function. This buffer should be freed * by the caller. */ char * wmp_get_last_input(void * driver_state, int * length) { wmp_state_t * state = (wmp_state_t *)driver_state; if (!state->mutator || state->mutate_last_size <= 0) return NULL; *length = state->mutate_last_size; return (char *)memdup(state->mutate_buffer, state->mutate_last_size); } #define EXIT_ON_ERROR(hres) \ if (FAILED(hres)) { goto done; } #define SAFE_RELEASE(punk) \ if ((punk) != NULL) \ { (punk)->Release(); (punk) = NULL; } /** * This function determines if any sound is currently being played out the speakers. This is used to * determine if the wmplayer.exe process has finished parsing the fuzzed file and now trying to play it. * @return - 1 if sound is being played, 0 if sound is not being played, and -1 if an error occurs. */ static int is_playing_sound() { HRESULT hr = S_OK; IMMDeviceEnumerator *pEnumerator = NULL; IMMDevice *pDevice = NULL; IAudioMeterInformation *pMeterInfo = NULL; HWND hPeakMeter = NULL; float peak = 0; int ret = -1; hr = CoCreateInstance( __uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator); EXIT_ON_ERROR(hr); hr = pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice); EXIT_ON_ERROR(hr); hr = pDevice->Activate(__uuidof(IAudioMeterInformation), CLSCTX_ALL, NULL, (void**)&pMeterInfo); EXIT_ON_ERROR(hr); hr = pMeterInfo->GetPeakValue(&peak); EXIT_ON_ERROR(hr); ret = peak > 0; done: SAFE_RELEASE(pEnumerator); SAFE_RELEASE(pDevice); SAFE_RELEASE(pMeterInfo) return ret; } /** * This function returns help text for this driver. This help text will describe the driver and any options * that can be passed to wmp_create. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ int wmp_help(char ** help_str) { *help_str = strdup( "wmp - Windows Media Player driver (Fuzzes wmplayer.exe)\n" "Optional Arguments:\n" " extension The file extension of the input files to\n" " wmplayer.exe\n" " path The path to the wmplayer.exe\n" " ratio The ratio of mutation buffer size to input size\n" " when given a mutator\n" " timeout The maximum number of seconds to wait for the\n" " target process to finish\n" "\n" ); if (*help_str == NULL) return -1; return 0; } ================================================ FILE: driver/wmp_driver.h ================================================ #pragma once #include "driver.h" #include #ifdef __cplusplus #define FUNC_PREFIX extern "C" #else #define FUNC_PREFIX #endif FUNC_PREFIX void * wmp_create(char * options, instrumentation_t * instrumentation, void * instrumentation_state, mutator_t * mutator, void * mutator_state); FUNC_PREFIX void wmp_cleanup(void * driver_state); FUNC_PREFIX int wmp_test_input(void * driver_state, char * buffer, size_t length); FUNC_PREFIX int wmp_test_next_input(void * driver_state); FUNC_PREFIX char * wmp_get_last_input(void * driver_state, int * length); FUNC_PREFIX int wmp_help(char ** help_str); struct wmp_state { //Options char * path; //The path to wmplayer.exe char * extension; //The file extension of the input files to wmplayer.exe int timeout; //Maximum number of seconds to allow wmplayer.exe to run char * test_filename; //The filename that we're going to write our test input to double input_ratio; //the ratio of the maximum input size //The handle to the wmplayer.exe instance HANDLE process; //command line of the fuzzed process char * cmd_line; //The instrumentation module instrumentation_t * instrumentation; //The instrumentation's state void * instrumentation_state; mutator_t * mutator; void * mutator_state; char * mutate_buffer; size_t mutate_buffer_length; int mutate_last_size; }; typedef struct wmp_state wmp_state_t; ================================================ FILE: fuzzer/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (fuzzer) include_directories (${CMAKE_SOURCE_DIR}/driver/) include_directories (${CMAKE_SOURCE_DIR}/instrumentation/) include_directories (${CMAKE_SOURCE_DIR}/mutator/) include_directories (${CMAKE_SOURCE_DIR}/utils/) add_library(utils ${CMAKE_SOURCE_DIR}/utils/utils.c ${CMAKE_SOURCE_DIR}/utils/mutator_factory.c) # Utils requires -ldl (on UNIX) and -lpthread if (UNIX) target_link_libraries(utils dl) endif (UNIX) if (WIN32) add_library(xgetopt ${CMAKE_SOURCE_DIR}/utils/XGetopt.c) endif (WIN32) find_package(Threads REQUIRED) if(THREADS_HAVE_PTHREAD_ARG) set_property(TARGET utils PROPERTY COMPILE_OPTIONS "-pthread") set_property(TARGET utils PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread") endif() if(CMAKE_THREAD_LIBS_INIT) target_link_libraries(utils "${CMAKE_THREAD_LIBS_INIT}") endif() set(FUZZER_SRC ${PROJECT_SOURCE_DIR}/main.c) source_group("Executable Sources" FILES ${FUZZER_SRC}) add_executable(fuzzer ${FUZZER_SRC} $ $) target_compile_definitions(fuzzer PUBLIC DRIVER_NO_IMPORT) target_compile_definitions(fuzzer PUBLIC INSTRUMENTATION_NO_IMPORT) target_compile_definitions(fuzzer PUBLIC MUTATOR_NO_IMPORT) if (UNIX) target_link_libraries(fuzzer dl) endif (UNIX) target_link_libraries(fuzzer utils) target_link_libraries(fuzzer jansson) if (WIN32) target_link_libraries(fuzzer Shlwapi) # utils needs Shlwapi target_link_libraries(fuzzer ws2_32) # network driver needs ws2_32 target_link_libraries(fuzzer iphlpapi) # network driver needs iphlpapi target_link_libraries(fuzzer xgetopt) # CLI parsing endif (WIN32) ================================================ FILE: fuzzer/main.c ================================================ #include #include #include #include #include #include #include #ifdef _WIN32 #include #include #define F_OK 00 // for checking if a file is open/writable #define W_OK 02 #include "XGetopt.h" #else #include // dirname #include // access, F_OK, W_OK, getopt #include // mkdir #include // output directory creation #endif #include #include #include #include #include /** * This function prints out the usage information for the fuzzer and each of the individual components * @param program_name - the name of the program currently being run (for use in the outputted message) * @param mutator_directory - the directory to look for mutators in, when printing out the mutator help information */ void usage(char * program_name, char * mutator_directory) { printf( "\n" "Usage: %s\n" " [options] driver_name instrumentation_name mutator_name\n" "\n" "Options:\n" " -d driver_options JSON filename with options for the driver\n" " -hd Get help text about drivers\n" " -hi Get help text about instrumentation\n" " -hl Get help text about logging\n" " -hm Get help text about mutators\n" " -i instrumentation_options JSON filename with options for the instrumentation\n" " -j instrumentation_state_file Set the file that the instrumentation state should dump to\n" " -k instrumentation_state_file Set the file that the instrumentation state should load from\n" " -l logging_options JSON filename with options for logging\n" " -m mutator_options JSON filename with options for the mutator\n" " -n num_iterations Limit the number of iterations to run\n" " (optional, infinite by default)\n" " -o output_directory The directory to write files which cause a\n" " crash or hang\n" " -p mutator_directory The directory to look for mutator DLLs in\n" " (must be specified to view help for\n" " specific mutators)\n" " -r mutator_state Set the state that the mutator should load\n" " -s seed The seed file to use\n" " -t mutator_state_file Set the file that the mutator state should dump to\n" " -u mutator_state_file Set the file that the mutator state should load from\n" "\n\n", program_name ); exit(1); } //The global module state objects static driver_t * driver = NULL; static mutator_t * mutator = NULL; static void * mutator_state = NULL; static instrumentation_t * instrumentation = NULL; static void * instrumentation_state = NULL; static void cleanup_modules(void) { if(driver) driver->cleanup(driver->state); if(instrumentation && instrumentation_state) instrumentation->cleanup(instrumentation_state); if(mutator && mutator_state) mutator->cleanup(mutator_state); free(driver); free(instrumentation); free(mutator); } static void sigint_handler(int sig) { CRITICAL_MSG("CTRL-c detected, exiting\n"); cleanup_modules(); exit(0); } #define NUM_ITERATIONS_INFINITE -1 #define PRINT_HELP(x) \ puts(x); \ free(x); int main(int argc, char ** argv) { char *driver_name, *driver_options = NULL, *mutator_name, *mutator_options = NULL, *mutator_saved_state = NULL, *mutation_state_dump_file = NULL, *mutation_state_load_file = NULL, *mutate_buffer = NULL, *mutator_directory = NULL, *mutator_directory_cli = NULL, *logging_options = NULL, *seed_file = NULL, *seed_buffer = NULL, *instrumentation_name = NULL, *instrumentation_options = NULL, *instrumentation_state_string = NULL, *instrumentation_state_load_file = NULL, *instrumentation_state_dump_file = NULL; int seed_length = 0, mutate_length = 0, instrumentation_length = 0, mutator_state_length; time_t fuzz_begin_time; int i = 0, iteration = 0, fuzz_result = FUZZ_NONE, new_path = 0; char filename[MAX_PATH]; char filehash[256]; char c; char * directory; //Default options int num_iterations = NUM_ITERATIONS_INFINITE; //default to infinite char * output_directory = "output"; ////////////////////////////////////////////////////////////////////////////////////////////////////// // Mutator Setup ///////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// if (!mutator_directory) { char * mutator_repo_dir = getenv("KILLERBEEZ_MUTATORS"); //If the environment variable KILLERBEEZ_MUTATORS is set, try to autodetect the directory based on the repo build path if (mutator_repo_dir) { mutator_directory = (char *)malloc(MAX_PATH + 1); if (!mutator_directory) { printf("Couldn't get memory for default mutator_directory"); return 1; } memset(mutator_directory, 0, MAX_PATH + 1); #ifdef _WIN32 #if defined(_M_X64) || defined(__x86_64__) #ifdef _DEBUG snprintf(mutator_directory, MAX_PATH, "%s\\..\\build\\x64\\Debug\\mutators\\", mutator_repo_dir); #else snprintf(mutator_directory, MAX_PATH, "%s\\..\\build\\x64\\Release\\mutators\\", mutator_repo_dir); #endif #else #ifdef _DEBUG snprintf(mutator_directory, MAX_PATH, "%s\\..\\build\\X86\\Debug\\mutators\\", mutator_repo_dir); #else snprintf(mutator_directory, MAX_PATH, "%s\\..\\build\\X86\\Release\\mutators\\", mutator_repo_dir); #endif #endif #else snprintf(mutator_directory, MAX_PATH, "%s/../build/mutators/", mutator_repo_dir); #endif } else { #ifdef _WIN32 mutator_directory = filename_relative_to_binary_dir("..\\mutators\\"); #else // LINUX and APPLE mutator_directory = filename_relative_to_binary_dir("../mutators"); #endif } } ////////////////////////////////////////////////////////////////////////////////////////////////////// // Parse Arguments /////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// while ((c = getopt(argc, argv, "d:h:i:j:k:l:m:n:o:p:r:s:t:u:")) != -1) { switch (c) { case 'd': read_file(optarg, &driver_options); break; case 'h': if (optarg == NULL) { usage(argv[0], mutator_directory); } else if (strcmp(optarg, "l") == 0) { PRINT_HELP(logging_help()); } else if (strcmp(optarg, "d") == 0) { PRINT_HELP(driver_help()); } else if (strcmp(optarg, "i") == 0) { PRINT_HELP(instrumentation_help()); } else if (strcmp(optarg, "m") == 0) { PRINT_HELP(mutator_help(mutator_directory)); } exit(1); case 'i': read_file(optarg, &instrumentation_options); break; case 'j': instrumentation_state_dump_file = optarg; break; case 'k': instrumentation_state_load_file = optarg; break; case 'l': read_file(optarg, &logging_options); break; case 'm': read_file(optarg, &mutator_options); break; case 'n': num_iterations = atoi(optarg); break; case 'o': output_directory = optarg; break; case 'p': mutator_directory_cli = optarg; break; case 'r': mutator_saved_state = optarg; break; case 's': seed_file = optarg; break; case 't': mutation_state_dump_file = optarg; break; case 'u': mutation_state_load_file = optarg; break; } } // Make sure we have enough positional arguments if (argc-optind < 3) { usage(argv[0], mutator_directory); } driver_name = argv[optind]; instrumentation_name = argv[optind+1]; mutator_name = argv[optind+2]; if (setup_logging(logging_options)) { printf("Failed setting up logging, exiting\n"); return 1; } signal(SIGINT, sigint_handler); //Check number of iterations for valid number of rounds if (num_iterations != NUM_ITERATIONS_INFINITE && num_iterations <= 0) FATAL_MSG("Invalid number of iterations %d", num_iterations); if (mutator_directory_cli) { free(mutator_directory); mutator_directory = strdup(mutator_directory_cli); mutator_directory_cli = NULL; } if (!mutator_directory) FATAL_MSG("Mutator directory was not found in default location. You may need to pass the -md flag."); if (instrumentation_state_dump_file) { strncpy(filename, instrumentation_state_dump_file, sizeof(filename)); #ifdef _WIN32 PathRemoveFileSpec(filename); #else dirname(filename); #endif if (access(filename, W_OK)) FATAL_MSG("The provided instrumentation_state_dump_file filename (%s) is not writeable", instrumentation_state_dump_file); } if (mutation_state_dump_file) { strncpy(filename, mutation_state_dump_file, sizeof(filename)); #ifdef _WIN32 PathRemoveFileSpec(filename); #else dirname(filename); #endif if (access(filename, W_OK)) FATAL_MSG("The provided mutation_state_dump_file filename (%s) is not writeable", mutation_state_dump_file); } #ifdef _WIN32 #define create_output_directory(name) \ snprintf(filename, sizeof(filename), "%s" name, output_directory); \ if(!CreateDirectory(filename, NULL) && GetLastError() != ERROR_ALREADY_EXISTS) { \ FATAL_MSG("Unable to create directory %s", filename); \ } #else #define create_output_directory(name) \ snprintf(filename, sizeof(filename), "%s" name, output_directory); \ if (mkdir(filename, 0775) == -1) { \ if (errno != EEXIST) \ FATAL_MSG("Unable to create directory %s", filename); \ } // otherwise, it already exists and we don't need to do anything #endif //Setup the output directory create_output_directory(""); // creates ./output create_output_directory("/crashes"); // creates ./output/crashes and so on create_output_directory("/hangs"); create_output_directory("/new_paths"); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Ojbect Setup ////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// //Load the instrumentation state from disk (if specified, and create the instrumentation if (instrumentation_state_load_file) { instrumentation_length = read_file(instrumentation_state_load_file, &instrumentation_state_string); if (instrumentation_length <= 0) FATAL_MSG("Could not read instrumentation file or empty instrumentation file: %s", instrumentation_state_load_file); } // NULL means instrumentation failed to initialize. instrumentation = instrumentation_factory(instrumentation_name); if (!instrumentation) { free(instrumentation_state_string); FATAL_MSG("Unknown instrumentation '%s'", instrumentation_name); } instrumentation_state = instrumentation->create(instrumentation_options, instrumentation_state_string); if (!instrumentation_state) { free(instrumentation_state_string); FATAL_MSG("Bad options/state for instrumentation %s", instrumentation_name); } free(instrumentation_state_string); //Load the seed buffer from a file if (seed_file) { seed_length = read_file(seed_file, &seed_buffer); if (seed_length <= 0) FATAL_MSG("Could not read seed file or empty seed file: %s", seed_file); } if (!seed_buffer) FATAL_MSG("No seed file or seed id specified."); if (mutation_state_load_file) { free(mutator_saved_state); mutator_state_length = read_file(mutation_state_load_file, &mutator_saved_state); if (mutator_state_length <= 0) FATAL_MSG("Could not read mutator saved state from file: %s", mutation_state_load_file); } //Create the mutator mutator = mutator_factory_directory(mutator_directory, mutator_name); if (!mutator) FATAL_MSG("Unknown mutator (%s)", mutator_name); free(mutator_directory); mutator_state = mutator->create(mutator_options, mutator_saved_state, seed_buffer, seed_length); if (!mutator_state) FATAL_MSG("Bad mutator options or saved state for mutator %s", mutator_name); free(mutator_saved_state); free(seed_buffer); //Create the driver driver = driver_all_factory(driver_name, driver_options, instrumentation, instrumentation_state, mutator, mutator_state); if (!driver) { FATAL_MSG("Unknown driver '%s' or bad options: \n\n\tdriver options: %s\n\n"\ "\tmutator options: %s\n\n\tPass %s -hd for help.\n", driver_name, driver_options, mutator_options, argv[0]); } ////////////////////////////////////////////////////////////////////////////////////////////////////// // Main Fuzz Loop //////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// fuzz_begin_time = time(NULL); //Copy the input, mutate it, and run the fuzzed program for (iteration = 0; num_iterations == NUM_ITERATIONS_INFINITE || iteration < num_iterations; iteration++) { DEBUG_MSG("Fuzzing the %d iteration", iteration); fuzz_result = driver->test_next_input(driver->state); if (fuzz_result < 0) { if(fuzz_result == -2) WARNING_MSG("The mutator has run out of mutations to test after %d iterations", iteration); else ERROR_MSG("The driver failed to test the target program, fuzz_result was %d",fuzz_result); break; } new_path = instrumentation->is_new_path(instrumentation_state); if (new_path < 0) { ERROR_MSG("The instrumentation failed to determine the fuzzed process's fuzz_result"); break; } directory = NULL; if (fuzz_result == FUZZ_CRASH) { directory = "crashes"; CRITICAL_MSG("Found %s", directory); } else if (fuzz_result == FUZZ_HANG) { directory = "hangs"; ERROR_MSG("Found %s", directory); } else if (new_path > 0) { directory = "new_paths"; INFO_MSG("Found %s", directory); } if (directory != NULL) { mutate_buffer = driver->get_last_input(driver->state, &mutate_length); if (!mutate_buffer) { ERROR_MSG("Unable to dump mutate buffer\n"); } else { if (output_directory) { md5((uint8_t *)mutate_buffer, mutate_length, filehash, sizeof(filehash)); snprintf(filename, MAX_PATH, "%s/%s/%s", output_directory, directory, filehash); if (!file_exists(filename)) //If the file already exists, there's no reason to write it again write_buffer_to_file(filename, mutate_buffer, mutate_length); } free(mutate_buffer); } } } INFO_MSG("Ran %ld iterations in %lld seconds", iteration, time(NULL) - fuzz_begin_time); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Cleanup /////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// if (instrumentation_state_dump_file) { instrumentation_state_string = instrumentation->get_state(instrumentation_state); if (instrumentation_state_string) { write_buffer_to_file(instrumentation_state_dump_file, instrumentation_state_string, strlen(instrumentation_state_string)); instrumentation->free_state(instrumentation_state_string); } else WARNING_MSG("Couldn't dump instrumentation state to file %s", instrumentation_state_dump_file); } if (mutation_state_dump_file) { mutator_saved_state = mutator->get_state(mutator_state); if (mutator_saved_state) { write_buffer_to_file(mutation_state_dump_file, mutator_saved_state, strlen(mutator_saved_state)); mutator->free_state(mutator_saved_state); } else WARNING_MSG("Couldn't dump mutator state to file %s", mutation_state_dump_file); } //Cleanup everything and exit cleanup_modules(); return 0; } ================================================ FILE: instrumentation/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (instrumentation) set(INSTRUMENTATION_SRC ${PROJECT_SOURCE_DIR}/instrumentation.c ${PROJECT_SOURCE_DIR}/instrumentation_factory.c ) if (WIN32) set(INSTRUMENTATION_SRC ${INSTRUMENTATION_SRC} ${PROJECT_SOURCE_DIR}/debug_instrumentation.c ${PROJECT_SOURCE_DIR}/dynamorio_instrumentation.c ${PROJECT_SOURCE_DIR}/wingui.c ) else () set(INSTRUMENTATION_SRC ${INSTRUMENTATION_SRC} ${PROJECT_SOURCE_DIR}/return_code_instrumentation.c ${PROJECT_SOURCE_DIR}/afl_instrumentation.c ) if (NOT APPLE) set(INSTRUMENTATION_SRC ${INSTRUMENTATION_SRC} ${PROJECT_SOURCE_DIR}/linux_ipt_instrumentation.c ) set(FORKSERVER_SRC ${PROJECT_SOURCE_DIR}/forkserver.c ${PROJECT_SOURCE_DIR}/forkserver_hooking.c ) add_library(forkserver SHARED ${FORKSERVER_SRC}) target_link_libraries(forkserver dl) endif () endif () source_group("Library Sources" FILES ${INSTRUMENTATION_SRC}) add_library(instrumentation OBJECT ${INSTRUMENTATION_SRC}) target_compile_definitions(instrumentation PUBLIC INSTRUMENTATION_NO_IMPORT) ================================================ FILE: instrumentation/afl_instrumentation.c ================================================ #include #include // for NULL #include // for shm functions #include #include #include // for lseek, write, ftruncate #include // for FUZZ_* return values #include // for PARSE_OPTION_* #include "afl_instrumentation.h" /** * This function allocates and initializes a new instrumentation specific state * object based on the given options. * @param options - a JSON string that contains the instrumentation specific * string of options * @param state - an instrumentation specific JSON string previously returned * from afl_get_state that should be loaded * @return - An instrumentation specific state object on success or NULL on failure */ void * afl_create(char *options, char *state) { afl_state_t *afl_state = setup_options(options); if(!afl_state) return NULL; if(state && afl_set_state(afl_state, state)) { DEBUG_MSG("Unable to set state for afl instrumentation"); return NULL; } return afl_state; } /** * This function cleans up all resources with the passed in instrumentation state. * @param instrumentation_state - an instrumentation specific state object * previously created by the afl_create function * This state object should not be referenced after * this function returns. */ void afl_cleanup(void *instrumentation_state) { afl_state_t * state = (afl_state_t *)instrumentation_state; //Cleanup the SHM region shmctl(state->shm_id, IPC_RMID, NULL); //Kill any remaining target processes destroy_target_process(state, 1); // Cleanup the fork server if necessary if(state->fork_server_setup) { fork_server_exit(&state->fs); state->fork_server_setup = 0; } free(state->target_path); free(state->qemu_path); } char * afl_get_state(void *instrumentation_state) { afl_state_t * state = (afl_state_t *)instrumentation_state; json_t *state_obj, *temp; char * ret; state_obj = json_object(); if (!state_obj) return NULL; //Add the virgin_bits, virgin_tmout, and virgin_crash bitmaps ADD_MEM(temp, (const char *)state->virgin_bits, MAP_SIZE, state_obj, "virgin_bits"); ADD_MEM(temp, (const char *)state->virgin_tmout, MAP_SIZE, state_obj, "virgin_tmout"); ADD_MEM(temp, (const char *)state->virgin_crash, MAP_SIZE, state_obj, "virgin_crash"); ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } /** * This function frees an instrumentation state previously obtained via afl_get_state. * @param state - the instrumentation state to free */ void afl_free_state(char *state) { free(state); } #define get_bits(name, dest) \ GET_MEM(tempstr, state, tempstr, name, result); \ memcpy(dest, tempstr, MAP_SIZE); \ free(tempstr); int afl_set_state(void *instrumentation_state, char *state) { int result; char * tempstr; afl_state_t * afl_state = (afl_state_t *)instrumentation_state; if(!state || !instrumentation_state) return 1; afl_state->loaded_state = 1; get_bits("virgin_bits", afl_state->virgin_bits); get_bits("virgin_tmout", afl_state->virgin_tmout); get_bits("virgin_crash", afl_state->virgin_crash); return 0; } /** * This function merges the bitmap in src into the bitmap in dest * @param dest - the bitmap that will be combined with the src bitmap. * @param src - the bitmap that will be added to the dest bitmap */ void merge_bitmaps(u8 * dest, const u8 * src) { size_t i; for (i = 0; i < MAP_SIZE; i++) dest[i] &= src[i]; } void * afl_merge(void *instrumentation_state, void *other_instrumentation_state) { afl_state_t * first = (afl_state_t *)instrumentation_state; afl_state_t * second = (afl_state_t *)other_instrumentation_state; afl_state_t * ret; ret = (afl_state_t *)malloc(sizeof(afl_state_t)); if(!ret) return NULL; memset(ret, 0, sizeof(afl_state_t)); memcpy(ret->virgin_bits, first->virgin_bits, MAP_SIZE); merge_bitmaps(ret->virgin_bits, second->virgin_bits); memcpy(ret->virgin_tmout, first->virgin_tmout, MAP_SIZE); merge_bitmaps(ret->virgin_tmout, second->virgin_tmout); memcpy(ret->virgin_crash, first->virgin_crash, MAP_SIZE); merge_bitmaps(ret->virgin_crash, second->virgin_crash); return ret; } /** * This function enables the instrumentation and runs the fuzzed process. * @param instrumentation_state - an instrumentation specific state object * previously created by the afl_create function * @process - a pointer to return a pid_t to the process that the * instrumentation was enabled on * @cmd_line - the command line of the fuzzed process to enable instrumentation on * @input - a buffer to the input that should be sent to the fuzzed process * @input_length - the length of the input parameter * returns 0 on success, -1 on failure */ int afl_enable(void *instrumentation_state, pid_t *process, char *cmd_line, char *input, size_t input_length) { afl_state_t * state = (afl_state_t *)instrumentation_state; char ** argv; // If there's already a child process, get rid of it if(state->child_pid) { destroy_target_process(state, 0); } // Set up shared memory if(setup_shm(state)) return -1; /* After this memset, trace_bits[] are effectively volatile, so we must prevent any earlier operations from venturing into that territory. */ memset(state->trace_bits, 0, MAP_SIZE); MEM_BARRIER(); if(create_target_process(state, cmd_line, input, input_length)) return -1; state->process_finished = 0; state->fuzz_results_set = 0; *process = state->child_pid; return 0; } /** * This function determines if a new path was covered * @param instrumentation_state - an instrumentation specific state object * previously created by the afl_create function * @return - 1 if the previously setup process (via the enable function) took a * new path, 0 if it did not, or -1 on failure. */ int afl_is_new_path(void *instrumentation_state) { afl_state_t * state = (afl_state_t *)instrumentation_state; // If we haven't set the fuzz results, do that and return the result if(!state->fuzz_results_set) finish_fuzz_round(state); if(state->last_is_new_path) return 1; return 0; } /** * This function will return the result of the fuzz job. It should be called * after the process has finished processing the tested input. The target * process will also be cleaned up in the process. * @param instrumentation_state - an instrumentation specific structure * previously created by the afl_create function * @return - either FUZZ_NONE, FUZZ_HANG, FUZZ_CRASH, or -1 on error. */ int afl_get_fuzz_result(void *instrumentation_state) { afl_state_t * state = (afl_state_t *)instrumentation_state; // If we haven't set the fuzz results, do that and return the result if(!state->fuzz_results_set) return finish_fuzz_round(state); // otherwise we can just return the result return state->last_fuzz_result; } /** * This function determines if the target process CRASHED, HUNG or EXITED * NORMALLY, cleans up the process, and checks to see if any new code was * executed. The assumption is that if you are calling this function, * you're sick of waiting for the child, so if it is still executing by the * time we get here, we're calling it a HANG. This is implemented as an * internal function so we can use it when the caller calls any of the post- * fuzzing functions, such as get_fuzz_result or is_new_path(). * * @param state - The AFL specific state structure * @return - either FUZZ_NONE, FUZZ_HANG, FUZZ_CRASH, or -1 on error. */ static int finish_fuzz_round(afl_state_t *state) { int status, rc; // if our process is still running, then it was a hang if(!afl_is_process_done(state)) { destroy_target_process(state, 1); state->last_fuzz_result = FUZZ_HANG; #ifdef __x86_64__ simplify_trace((uint64_t*)state->trace_bits); #else simplify_trace((uint32_t*)state->trace_bits); #endif /* ^__x86_64__ */ state->last_is_new_path = has_new_bits(state->virgin_tmout, state->trace_bits); DEBUG_MSG("Process hung, has_new_bits = %d", state->last_is_new_path); state->fuzz_results_set = 1; } else if(WIFEXITED(state->last_status)) { /* Any subsequent operations on trace_bits must not be moved by the compiler below this point. Past this location, trace_bits[] behave very normally and do not have to be treated as volatile. */ MEM_BARRIER(); state->last_is_new_path = has_new_bits(state->virgin_bits, state->trace_bits); state->last_fuzz_result = FUZZ_NONE; // process exited normally DEBUG_MSG("Process exited normally, has_new_bits = %d", state->last_is_new_path); state->fuzz_results_set = 1; } else if(WIFSIGNALED(state->last_status)) { // process was terminated by a signal. We look for signals which // indicate non-crashing conditions (e.g. SIGPIPE) if(WTERMSIG(state->last_status) == SIGPIPE) { state->last_is_new_path = has_new_bits(state->virgin_bits, state->trace_bits); state->last_fuzz_result = FUZZ_NONE; // we'll say the process exited normally DEBUG_MSG("Process exited due to SIGPIPE, has_new_bits = %d", state->last_is_new_path); state->fuzz_results_set = 1; } else { state->last_fuzz_result = FUZZ_CRASH; #ifdef __x86_64__ simplify_trace((uint64_t*)state->trace_bits); #else simplify_trace((uint32_t*)state->trace_bits); #endif /* ^__x86_64__ */ state->last_is_new_path = has_new_bits(state->virgin_crash, state->trace_bits); DEBUG_MSG("Process crashed, has_new_bits = %d", state->last_is_new_path); state->fuzz_results_set = 1; } } else { // if it didn't exit normally, nor get interrupted by a signal... // I'm not sure what happened! return FUZZ_ERROR; } return state->last_fuzz_result; } /** * Checks if the target process is done fuzzing the inputs yet. * @param instrumentation_state - The afl_state_t object containing this * instrumentation's state * @return - 0 if the process is not done testing the fuzzed input, * non-zero if the process is done. */ int afl_is_process_done(void *instrumentation_state) { int status, rc; afl_state_t * state = (afl_state_t *)instrumentation_state; // If the state says we're done, our job is easy! if(state->process_finished) return 1; if(state->use_fork_server) { status = fork_server_get_status(&state->fs, 0); // if it's still alive or an error occurred and we can't tell if(status < 0 || status == FORKSERVER_NO_RESULTS_READY) return 0; state->last_status = status; state->process_finished = 1; return 1; } else { // We just need to check to see if the process is still alive rc = waitpid(state->child_pid, &status, WNOHANG); if(rc == 0) // child did not change state return 0; if(rc == state->child_pid) { // our child changed state (exited, received a signal, etc.) state->last_status = status; // Record it state->child_pid = 0; // We no longer have a child process state->process_finished = 1; // Mark that we're done return 1; } if(rc == -1) // waitpid failed return -1; ERROR_MSG("waitpid() said pid %d changed state but our child was %d", rc, state->child_pid); return -1; // Some other child process changed state? } ERROR_MSG("Fell through to end of afl_is_process_done()."); return -1; } int afl_help(char **help_str) { *help_str = strdup( "afl - AFL-based instrumentation\n" "Options:\n" " use_fork_server Whether to use a fork server; 1=yes, 0=no (default=1)\n" " persistence_max_cnt The number of executions to run in one process while\n" " fuzzing in persistence mode (default=1)\n" " qemu_mode Whether to use qemu mode; 1=yes, 0=no (default=0)\n" " qemu_path The path to afl-qemu-trace (including executable name)\n" " deferred_startup Whether to use deferred startup mode; 1=yes, 0=no (default=0)\n" "\n" ); if (*help_str == NULL) return -1; return 0; } /** * This function creates a afl_state_t object based on the given options. * @param options - A JSON string of the options to set in the new * afl_state_t. See the help function for more information on * the specific options available. * @return the afl_state_t generated from the options in the JSON options * string, or NULL on failure */ static afl_state_t * setup_options(char *options) { afl_state_t * state; char buffer[PATH_MAX]; char *pos; int fd, error = 0; state = malloc(sizeof(afl_state_t)); if(!state) return NULL; memset(state, 0, sizeof(afl_state_t)); state->use_fork_server = 1; // default to use the fork server if(options) { DEBUG_MSG("JSON options = %s", options); PARSE_OPTION_INT(state, options, use_fork_server, "use_fork_server", afl_cleanup); PARSE_OPTION_INT(state, options, persistence_max_cnt, "persistence_max_cnt", afl_cleanup); PARSE_OPTION_INT(state, options, deferred_startup, "deferred_startup", afl_cleanup); PARSE_OPTION_INT(state, options, qemu_mode, "qemu_mode", afl_cleanup); PARSE_OPTION_STRING(state, options, qemu_path, "qemu_path", afl_cleanup); } if(state->persistence_max_cnt && !state->use_fork_server) { ERROR_MSG("Cannot use persistence mode without the fork server"); error = 1; } else if(state->deferred_startup && !state->use_fork_server) { ERROR_MSG("Cannot use deferred startup mode without the fork server"); error = 1; } else if(state->qemu_mode && !state->use_fork_server) { ERROR_MSG("Cannot use qemu mode without the fork server"); error = 1; } else if(state->qemu_mode && state->persistence_max_cnt) { ERROR_MSG("Cannot use qemu mode and persistence mode (yet)."); error = 1; } if(error) { afl_cleanup(state); return NULL; } if(state->qemu_mode && !state->qemu_path) { //Try to autodetect afl-qemu-trace if(file_exists("../../killerbeez/afl_progs/afl-qemu-trace")) { //try looking in the source directory state->qemu_path = realpath("../../killerbeez/afl_progs/afl-qemu-trace", NULL); } else { //check $PATH system("which afl-qemu-trace > /tmp/which-afl-qemu-trace"); fd = open("/tmp/which-afl-qemu-trace", O_RDONLY); if(fd >= 0) { memset(buffer, 0, sizeof(buffer)); if(read(fd, buffer, sizeof(buffer)-1) > 0) { //Trim newlines if ((pos = strchr(buffer, '\n')) != NULL) *pos = 0; if ((pos = strchr(buffer, '\r')) != NULL) *pos = 0; //If we read a valid path, use that if(file_exists(buffer)) state->qemu_path = strdup(buffer); } close(fd); } unlink("/tmp/which-afl-qemu-trace"); } if(!state->qemu_path) { ERROR_MSG("Cannot find afl-qemu-trace for use with qemu mode, please specify the path with the qemu_path option"); afl_cleanup(state); return NULL; } } return state; } /** * This function starts the fuzzed process * @param state - The afl_state_t object containing this instrumentation's state * @param cmd_line - the command line of the fuzzed process to start * @param input - a buffer to the input that should be sent to the fuzzed process * @param input_length - the length of the input parameter * @return - zero on success, non-zero on failure. */ static int create_target_process(afl_state_t * state, char* cmd_line, char * input, size_t input_length) { char ** argv; char qemu_command_line[4096]; int i; if(state->use_fork_server) { if(!state->fork_server_setup) { DEBUG_MSG("Using fork server..."); if(state->qemu_mode) { //prepend the command with the path of afl-qemu-trace snprintf(qemu_command_line, sizeof(qemu_command_line), "%s %s", state->qemu_path, cmd_line); cmd_line = qemu_command_line; } //Split the command line into the executable and arguments if(split_command_line(cmd_line, &state->target_path, &argv)) return -1; if(state->deferred_startup) { //set the deferred environment variable to let the forkserver know it setenv(DEFER_ENV_VAR, "1", 1); //shouldn't do the startup right away } //Start the fork server fork_server_init(&state->fs, state->target_path, argv, 0, state->persistence_max_cnt, input_length != 0); state->fork_server_setup = 1; //Free the split arguments for(i = 0; argv[i]; i++) free(argv[i]); free(argv); } if(state->fs.target_stdin != -1) { //Take care of the stdin input, write over the file, then truncate it accordingly lseek(state->fs.target_stdin, 0, SEEK_SET); if(input != NULL && input_length != 0) { if(write(state->fs.target_stdin, input, input_length) != input_length) FATAL_MSG("Short write to target's stdin file"); } if(ftruncate(state->fs.target_stdin, input_length)) FATAL_MSG("ftruncate() failed"); lseek(state->fs.target_stdin, 0, SEEK_SET); } //Start the new child and tell it to go state->child_pid = fork_server_fork_run(&state->fs); if(state->child_pid < 0) { ERROR_MSG("Fork server failed to fork a new child\n"); return -1; } } else { DEBUG_MSG("Not using fork server, executing %s", cmd_line); if (start_process_and_write_to_stdin(cmd_line, input, input_length, &state->child_pid)) { state->child_pid = 0; ERROR_MSG("Failed to create process with command line: %s\n", cmd_line); return -1; } } DEBUG_MSG("Child process ID = %d", state->child_pid); return 0; } /** * This function terminates the fuzzed process. * @param state - The afl_state_t object containing this instrumentation's state */ static void destroy_target_process(afl_state_t * state, int force) { if(state->child_pid && state->child_pid != -1) { DEBUG_MSG("Cleaning up old child process (pid=%d)", state->child_pid); if(!state->persistence_max_cnt || force) { kill(state->child_pid, SIGKILL); state->child_pid = 0; } if(state->use_fork_server) { state->last_status = fork_server_get_status(&state->fs, 1); } } } /** * This sets up the shared memory between our fuzzer and the target process * being fuzzed. The target process will write to this as it executes and * we will read it once the fuzzing is complete (crash, hang, or normal exit) * @param instrumentation_state - The afl_state_t object containing this * instrumentation's state * @returns zero on success, non-zero on error */ int setup_shm(void *instrumentation_state) { /* This function is based on the AFL setup_shm function present in afl-fuzz.c, available at this URL: https://github.com/mirrorer/afl/blob/master/afl-fuzz.c#L1968. AFL's license is as shown below: american fuzzy lop - fuzzer code -------------------------------- Written and maintained by Michal Zalewski Forkserver design by Jann Horn Copyright 2013, 2014, 2015, 2016, 2017 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 */ char* shm_str; afl_state_t * state = (afl_state_t *)instrumentation_state; if(state->trace_bits) // if trace_bits already points at the shm return 0; // region, we've already run this function! // If we loaded a saved input bitmap, do not overwrite the // map showing what was fuzzed showing everything as untouched if(!state->loaded_state) { memset(state->virgin_bits, 255, MAP_SIZE); memset(state->virgin_tmout, 255, MAP_SIZE); memset(state->virgin_crash, 255, MAP_SIZE); } // Allocate shared memory; shm_id must be module level or global so // the atexit function has access to it (as we can not pass arguments // to the callback function) state->shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600); if(state->shm_id < 0) { ERROR_MSG("shmget() failed"); return 1; } shm_str = alloc_printf("%d", state->shm_id); // set the environment variable so the instrumented binary knows which // shared memory ID to attach to when it goes to write the bitmap setenv(SHM_ENV_VAR, shm_str, 1); ck_free(shm_str); // Attach to shared memory region state->trace_bits = shmat(state->shm_id, NULL, 0); if(!state->trace_bits) { ERROR_MSG("shmat() failed"); return 1; } return 0; } /** * Check if the current execution path brings anything new to the table. * Update virgin bits to reflect the new paths found, so subsequent calls will * always return 0. * * This function is called after every exec() on a fairly large buffer, so * it needs to be fast. We do this in 32-bit and 64-bit flavors. * * @param virgin_map - The map we should compare against, which will be * virgin_{bits,tmout,crash} in practice. * @param trace_bits - The trace for this particular run * @returns - 1 if the only change is the hit-count for a particular tuple; * 2 if there are new tuples seen, 0 if it is not a new path **/ static inline uint8_t has_new_bits(uint8_t* virgin_map, uint8_t *trace_bits) { /* This function is based on the AFL has_new_bits function present in afl-fuzz.c, available at this URL: https://github.com/mirrorer/afl/blob/master/afl-fuzz.c#L1968. AFL's license is as shown below: american fuzzy lop - fuzzer code -------------------------------- Written and maintained by Michal Zalewski Forkserver design by Jann Horn Copyright 2013, 2014, 2015, 2016, 2017 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 */ #ifdef __x86_64__ uint64_t* current = (uint64_t*)trace_bits; uint64_t* virgin = (uint64_t*)virgin_map; uint32_t i = (MAP_SIZE >> 3); #else uint32_t* current = (uint32_t*)trace_bits; uint32_t* virgin = (uint32_t*)virgin_map; uint32_t i = (MAP_SIZE >> 2); #endif /* ^__x86_64__ */ uint8_t ret = 0; while (i--) { /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap that have not been already cleared from the virgin map - since this will almost always be the case. */ if (unlikely(*current) && unlikely(*current & *virgin)) { if (likely(ret < 2)) { uint8_t* cur = (uint8_t*)current; uint8_t* vir = (uint8_t*)virgin; /* Looks like we have not found any new bytes yet; see if any non-zero bytes in current[] are pristine in virgin[]. */ #ifdef __x86_64__ if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) || (cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff) || (cur[4] && vir[4] == 0xff) || (cur[5] && vir[5] == 0xff) || (cur[6] && vir[6] == 0xff) || (cur[7] && vir[7] == 0xff)) ret = 2; else ret = 1; #else if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) || (cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff)) ret = 2; else ret = 1; #endif /* ^__x86_64__ */ } *virgin &= ~*current; } current++; virgin++; } return ret; } /* Destructively simplify trace by eliminating hit count information and replacing it with 0x80 or 0x01 depending on whether the tuple is hit or not. Called on every new crash or timeout, should be reasonably fast. */ static const uint8_t simplify_lookup[256] = { [0] = 1, [1 ... 255] = 128 }; #ifdef __x86_64__ static void simplify_trace(uint64_t* mem) { uint32_t i = MAP_SIZE >> 3; while (i--) { /* Optimize for sparse bitmaps. */ if (unlikely(*mem)) { uint8_t* mem8 = (uint8_t*)mem; mem8[0] = simplify_lookup[mem8[0]]; mem8[1] = simplify_lookup[mem8[1]]; mem8[2] = simplify_lookup[mem8[2]]; mem8[3] = simplify_lookup[mem8[3]]; mem8[4] = simplify_lookup[mem8[4]]; mem8[5] = simplify_lookup[mem8[5]]; mem8[6] = simplify_lookup[mem8[6]]; mem8[7] = simplify_lookup[mem8[7]]; } else *mem = 0x0101010101010101ULL; mem++; } } #else static void simplify_trace(uint32_t* mem) { uint32_t i = MAP_SIZE >> 2; while (i--) { /* Optimize for sparse bitmaps. */ if (unlikely(*mem)) { uint8_t* mem8 = (uint8_t*)mem; mem8[0] = simplify_lookup[mem8[0]]; mem8[1] = simplify_lookup[mem8[1]]; mem8[2] = simplify_lookup[mem8[2]]; mem8[3] = simplify_lookup[mem8[3]]; } else *mem = 0x01010101; mem++; } } #endif /* ^__x86_64__ */ ================================================ FILE: instrumentation/afl_instrumentation.h ================================================ #include // fprintf #include // for pid_t #include // uint*_t #include // for memset, strdup #include // for waitpid #include "forkserver_internal.h" #include "../afl_progs/config.h" #include "../afl_progs/alloc-inl.h" struct afl_state { int shm_id; char *qemu_path; char *target_path; pid_t child_pid; forkserver_t fs; int process_finished; int last_fuzz_result; int fuzz_results_set; // have we set the fuzz results? int last_status; // the last input did what? (CRASH, HANG, NONE, etc.) int last_is_new_path; // did the last input hit a new code path? int use_fork_server; int fork_server_setup; int persistence_max_cnt; int qemu_mode; int deferred_startup; int loaded_state; uint8_t virgin_bits[MAP_SIZE]; // Regions yet untouched by fuzzing uint8_t virgin_tmout[MAP_SIZE]; // Bits we haven't seen in tmouts uint8_t virgin_crash[MAP_SIZE]; // Bits we haven't seen in crashes uint8_t *trace_bits; // SHM with instrumentation bitmap }; typedef struct afl_state afl_state_t; void * afl_create(char *options, char *state); void afl_cleanup(void *instrumentation_state); char * afl_get_state(void *instrumentation_state); void afl_free_state(char *state); int afl_set_state(void *instrumentation_state, char *state); void * afl_merge(void *instrumentation_state, void *other_instrumentation_state); int afl_enable(void *instrumentation_state, pid_t *process, char *cmd_line, char *input, size_t input_length); int afl_is_new_path(void *instrumentation_state); int afl_get_fuzz_result(void *instrumentation_state); int afl_is_process_done(void *instrumentation_state); int afl_help(char **help_str); static afl_state_t * setup_options(char *options); static void destroy_target_process(afl_state_t * state, int force); static int create_target_process(afl_state_t * state, char* cmd_line, char * input, size_t input_length); int setup_shm(void *instrumentation_state); static void remove_shm(); #ifdef __x86_64__ static void simplify_trace(uint64_t* mem); #else static void simplify_trace(uint32_t* mem); #endif /* ^__x86_64__ */ static inline uint8_t has_new_bits(uint8_t* virgin_map, uint8_t *trace_bits); static int finish_fuzz_round(afl_state_t *state); ================================================ FILE: instrumentation/debug_instrumentation.c ================================================ #include #include #include #include "instrumentation.h" #include "debug_instrumentation.h" #include #include /** * This function creates the target process and debugs it. This function runs in * a separate thread, releasing the process_creation_semaphore once it has created * the target process. This function then runs the debug loop on the target, setting * state->last_status when the process crashes, hangs, or exits normally. * @param args - A thread_arguments_t object with the thread's arguments in it * @return - zero on success, non-zero on failure */ static int debugging_thread(debug_state_t * state) { DEBUG_EVENT de; DWORD cont, child_pid; while (1) { //Wait for the main thread to tell us to go take_semaphore(state->fuzz_round_semaphore); //Create the child process, mark it as running, and let the main thread know we're done if (start_process_and_write_to_stdin_flags(state->thread_args.cmd_line, state->thread_args.stdin_input, state->thread_args.stdin_length, &state->child_handle, DEBUG_ONLY_THIS_PROCESS)) { release_semaphore(state->process_creation_semaphore); state->child_handle = NULL; ERROR_MSG("Failed to create process with command line: %s\n", state->thread_args.cmd_line); return 1; } state->process_running = 1; //Let the main thread know we've created the process release_semaphore(state->process_creation_semaphore); //Loop while debugging and look for process exits and exceptions child_pid = GetProcessId(state->child_handle); state->last_status = FUZZ_RUNNING; memset(&de, 0, sizeof(DEBUG_EVENT)); while (state->process_running && WaitForDebugEvent(&de, INFINITE)) { cont = DBG_CONTINUE; if (de.dwProcessId == child_pid && state->process_running) { if (de.dwDebugEventCode == EXCEPTION_DEBUG_EVENT) { // Not all exceptions are real crashes - we ignore breakpoints being hit and // exceptions that are encountered multiple times if (!de.u.Exception.dwFirstChance || // if the debugger has not encountered this exception before (de.u.Exception.ExceptionRecord.ExceptionCode != EXCEPTION_BREAKPOINT && de.u.Exception.ExceptionRecord.ExceptionCode != STATUS_WX86_BREAKPOINT)) { state->last_status = FUZZ_CRASH; cont = DBG_EXCEPTION_NOT_HANDLED; state->process_running = 0; //Once we know the result, kill the process to speed things up TerminateProcess(state->child_handle, 0); } } else if (de.dwDebugEventCode == EXIT_PROCESS_DEBUG_EVENT) { state->last_status = FUZZ_NONE; state->process_running = 0; } } if (!ContinueDebugEvent(de.dwProcessId, de.dwThreadId, cont)) { ERROR_MSG("ContinueDebugEvent: Failed to check child process health"); state->last_status = -1; release_semaphore(state->results_ready_semaphore); return -1; } memset(&de, 0, sizeof(DEBUG_EVENT)); } //Let the main thread know we've finished looking at the current fuzzed process' debug events release_semaphore(state->results_ready_semaphore); } return 0; } /** * This function terminates the fuzzed process. * @param state - The debug_state_t object containing this instrumentation's state */ static void destroy_target_process(debug_state_t * state) { if (state->child_handle) { state->last_child_hung = get_process_status(state->child_handle); //If the process hung, then make sure the debug thread finishes its debug loop if(state->last_child_hung)//otherwise we'll be waiting for it forever state->process_running = 0; TerminateProcess(state->child_handle, 0); CloseHandle(state->child_handle); state->child_handle = NULL; //Wait for the debug thread to be done with the child. We need to wait //here, since we don't want the results_ready_semaphore to becoming stale //because the debug instrumentation user did not read the results of a previous //fuzzed process take_semaphore(state->results_ready_semaphore); } } /** * This function starts the fuzzed process * @param state - The debug_state_t object containing this instrumentation's state * @param cmd_line - the command line of the fuzzed process to start * @param stdin_input - the input to pass to the fuzzed process's stdin * @param stdin_length - the length of the stdin_input parameter * @return - zero on success, non-zero on failure. */ static int create_target_process(debug_state_t * state, char* cmd_line, char * stdin_input, size_t stdin_length) { //Reset the state for this fuzz process state->finished_last_run = 0; state->last_child_hung = 0; state->last_status = FUZZ_RUNNING; //Tell the debug thread to start a new process state->thread_args.cmd_line = cmd_line; state->thread_args.stdin_input = stdin_input; state->thread_args.stdin_length = stdin_length; release_semaphore(state->fuzz_round_semaphore); //Wait for the debug thread to finish creating the new process if (take_semaphore(state->process_creation_semaphore) || !state->child_handle) return 1; return 0; } /** * This function ends the fuzzed process (if it wasn't previously ended). * @param state - The debug_state_t object containing this instrumentation's state * @return - returns 0 on success or -1 on error */ static int finish_fuzz_round(debug_state_t * state) { if (!state->finished_last_run) { destroy_target_process(state); state->finished_last_run = 1; } if (state->last_status < 0) return -1; if(state->last_child_hung) state->last_status = FUZZ_HANG; // else leave it as whatever it was return 0; } //////////////////////////////////////////////////////////////// // Instrumentation methods ///////////////////////////////////// //////////////////////////////////////////////////////////////// /** * This function allocates and initializes a new instrumentation specific state object based on the given options. * @param options - a JSON string that contains the instrumentation specific string of options * @param state - an instrumentation specific JSON string previously returned from debug_get_state that should be loaded * @return - An instrumentation specific state object on success or NULL on failure */ void * debug_create(char * options, char * state) { debug_state_t * debug_state; debug_state = malloc(sizeof(debug_state_t)); if (!debug_state) return NULL; memset(debug_state, 0, sizeof(debug_state_t)); debug_state->fuzz_round_semaphore = create_semaphore(0, 1); debug_state->process_creation_semaphore = create_semaphore(0, 1); debug_state->results_ready_semaphore = create_semaphore(0, 1); if (!debug_state->fuzz_round_semaphore || !debug_state->process_creation_semaphore || !debug_state->results_ready_semaphore) { debug_cleanup(debug_state); return NULL; } if (state && debug_set_state(debug_state, state)) { debug_cleanup(debug_state); return NULL; } debug_state->debug_thread_handle = CreateThread( NULL, // default security attributes 0, // default stack size (LPTHREAD_START_ROUTINE)debugging_thread, // thread function debug_state, // thread argument 0, // default creation flags NULL // record the thread handle ); if (!debug_state->debug_thread_handle) { debug_cleanup(debug_state); return NULL; } return debug_state; } /** * This function cleans up all resources with the passed in instrumentation state. * @param instrumentation_state - an instrumentation specific state object previously created by the debug_create function * This state object should not be referenced after this function returns. */ void debug_cleanup(void * instrumentation_state) { debug_state_t * state = (debug_state_t *)instrumentation_state; destroy_target_process(state); if (state->debug_thread_handle) { TerminateThread(state->debug_thread_handle, 0); CloseHandle(state->debug_thread_handle); state->debug_thread_handle = NULL; } if(state->fuzz_round_semaphore) destroy_semaphore(state->fuzz_round_semaphore); if (state->process_creation_semaphore) destroy_semaphore(state->process_creation_semaphore); if (state->results_ready_semaphore) destroy_semaphore(state->results_ready_semaphore); free(state); } /** * This function merges the coverage information from two instrumentation states. This will always fail for the * debug instrumentation, since it does not record instrumentation data. * @param instrumentation_state - an instrumentation specific state object previously created by the debug_create function * @param other_instrumentation_state - an instrumentation specific state object previously created by the debug_create function * @return - An instrumentation specific state object that contains the combination of both of the passed in instrumentation states * on success, or NULL on failure */ void * debug_merge(void * instrumentation_state, void * other_instrumentation_state) { return NULL; //No instrumentation data, so we can't ever merge } /** * This function returns the state information holding the previous execution path info. The returned value can later be passed to * debug_create or debug_set_state to load the state. * @param instrumentation_state - an instrumentation specific state object previously created by the debug_create function * @return - A JSON string that holds the instrumentation specific state object information on success, or NULL on failure */ char * debug_get_state(void * instrumentation_state) { debug_state_t * state = (debug_state_t *)instrumentation_state; json_t *state_obj, *temp; char * ret; state_obj = json_object(); ADD_INT(temp, state->last_status, state_obj, "last_status"); ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } /** * This function frees an instrumentation state previously obtained via debug_get_state. * @param state - the instrumentation state to free */ void debug_free_state(char * state) { free(state); } /** * This function sets the instrumentation state to the passed in state previously obtained via debug_get_state. * @param instrumentation_state - an instrumentation specific state object previously created by the debug_create function * @param state - an instrumentation state previously obtained via debug_get_state * @return - 0 on success, non-zero on failure. */ int debug_set_state(void * instrumentation_state, char * state) { debug_state_t * current_state = (debug_state_t *)instrumentation_state; int result, temp_int; if (!state) return 1; //If a child process is running when the state is being set destroy_target_process(current_state);//kill it so we don't orphan it GET_INT(temp_int, state, current_state->last_status, "last_status", result); current_state->finished_last_run = 1; return 0; //No state to set, so just return success } /** * This function enables the instrumentation and runs the fuzzed process. If the process needs to be restarted, it will be. * @param instrumentation_state - an instrumentation specific state object previously created by the debug_create function * @process - a pointer to return a handle to the process that instrumentation was enabled on * @cmd_line - the command line of the fuzzed process to enable instrumentation on * @input - a buffer to the input that should be sent to the fuzzed process on stdin * @input_length - the length of the input parameter * returns 0 on success, -1 on failure */ int debug_enable(void * instrumentation_state, HANDLE * process, char * cmd_line, char * input, size_t input_length) { debug_state_t * state = (debug_state_t *)instrumentation_state; if(state->child_handle) destroy_target_process(state); if (create_target_process(state, cmd_line, input, input_length)) return -1; *process = state->child_handle; state->enable_called = 1; return 0; } /** * This function determines whether the process being instrumented has taken a new path. The debug instrumentation does * not track the fuzzed process's path, so it is unable to determine if the process took a new path. * @param instrumentation_state - an instrumentation specific state object previously created by the debug_create function * @return - 0 when a new path wasn't detected (as it always won't be with the debug instrumentation), or -1 on failure. */ int debug_is_new_path(void * instrumentation_state) { debug_state_t * state = (debug_state_t *)instrumentation_state; if (!state->enable_called) return -1; return 0; //We don't gather instrumentation data, so we can't ever tell if we hit a new path. } /** * This function will return the result of the fuzz job. It should be called * after the process has finished processing the tested input. * @param instrumentation_state - an instrumentation specific structure previously created by the create() function * @return - either FUZZ_NONE, FUZZ_HANG, FUZZ_CRASH, or -1 on error. */ int debug_get_fuzz_result(void * instrumentation_state) { debug_state_t * state = (debug_state_t *)instrumentation_state; if (!state->enable_called) return -1; finish_fuzz_round(state); return state->last_status; } /** * Checks if the target process is done fuzzing the inputs yet. If it has finished, it will have * written last_status, the result of the fuzz job. * * @param state - The debug_state_t object containing this instrumentation's state * @return - 0 if the process has not finished testing the fuzzed input, 1 if the process is done, * or -1 on error. */ int debug_is_process_done(void * instrumentation_state) { debug_state_t * state = (debug_state_t *)instrumentation_state; if (!state->enable_called) return -1; if (state->process_running) return 0; else return 1; // we don't need to setup for get_fuzz_result, it should be handled by the debug thread. } /** * This function returns help text for this instrumentation. This help text will describe the instrumentation and any options * that can be passed to debug_create. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ int debug_help(char ** help_str) { *help_str = strdup( "debug - Windows debug thread \"instrumentation\", only detects crashes\n" "Options:\n" "\tNone\n" "\n" ); if (*help_str == NULL) return -1; return 0; } ================================================ FILE: instrumentation/debug_instrumentation.h ================================================ #pragma once #include #ifdef _WIN32 #include // HANDLE (winnt.h might work instead) #else #include // pid_t #endif void * debug_create(char * options, char * state); void debug_cleanup(void * instrumentation_state); void * debug_merge(void * instrumentation_state, void * other_instrumentation_state); char * debug_get_state(void * instrumentation_state); void debug_free_state(char * state); int debug_set_state(void * instrumentation_state, char * state); #ifdef _WIN32 int debug_enable(void * instrumentation_state, HANDLE * process, char * cmd_line, char * input, size_t input_length); #else int debug_enable(void * instrumentation_state, pid_t * process, char * cmd_line, char * input, size_t input_length); #endif int debug_is_new_path(void * instrumentation_state); int debug_get_fuzz_result(void * instrumentation_state); int debug_is_process_done(void * instrumentation_state); int debug_help(char ** help_str); typedef struct { char * cmd_line; //the command line of the target process to start char * stdin_input; //input to the STDIN of the target process size_t stdin_length; //the length of the input to write stdin } thread_args_t; struct debug_state { #ifdef _WIN32 HANDLE child_handle; HANDLE debug_thread_handle; #else pid_t child_handle; #endif int process_running; //This semaphore is used to make the debug thread wait until the main //thread wants it to start a new process and begin debugging it. semaphore_t fuzz_round_semaphore; //This semaphore is used by the main thread to wait until the debug //thread has created the fuzzed process. semaphore_t process_creation_semaphore; //This semaphore is used by the main thread to wait until the debug //thread has finished debugging the fuzzed process and the results can //now be viewed (in last_status). semaphore_t results_ready_semaphore; int finished_last_run; int last_status; int last_child_hung; int enable_called; //This struct is used to pass arguments to the debugging thread. It //should only be accessed while in the create_thread_process function, //as they will point to memory that is out of scope afterwards. The //strings in the thread_args should not be freed. thread_args_t thread_args; }; typedef struct debug_state debug_state_t; ================================================ FILE: instrumentation/dynamorio_instrumentation.c ================================================ #define _CRT_RAND_S #include #include #include #include #include "instrumentation.h" #include "dynamorio_instrumentation.h" #include #include //AFL headers #include "winafl_config.h" #include "winafl_hash.h" #include "winafl_types.h" #include "winafl_alloc_inl.h" //#define DEBUG_TRACE_BITS static BOOL connect_to_pipe(HANDLE pipe, char * pipe_name, DWORD timeout); static HANDLE create_pipe(char * pipe_name, DWORD timeout); static void cleanup_pipe(HANDLE * pipe); static int has_new_coverage_per_module(dynamorio_state_t * state); static int has_new_coverage(u8 * trace_bits, u8 * virgin_bits, u8 * ignore_bytes, u32 * last_shm_hash, char * dump_map_dir); //////////////////////////////////////////////////////////////// // SHM Memory Analysis and Misc Functions ////////////////////// //////////////////////////////////////////////////////////////// /* The code in this section (SHM Memory Analysis and Misc Functions) was taken from WinAFL and falls under the following license: Original AFL code written by Michal Zalewski Windows fork written and maintained by Ivan Fratric Copyright 2016 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. The code in this section has been modified from the original to suit the purposes of this project. */ /** * Allocates a string and formats it based on the passed arguments. * @param format_string - the printf-style format string to format the allocated string with * @return - the newly allocated string, that should be freed with ck_free, containing the * formatted text. */ char *alloc_printf(const char *format_string, ...) { va_list argptr; char* _tmp; s32 _len; va_start(argptr, format_string); _len = vsnprintf(NULL, 0, format_string, argptr); if (_len < 0) FATAL_MSG("Whoa, snprintf() fails?!"); _tmp = (char *)ck_alloc(_len + 1); vsnprintf(_tmp, _len + 1, format_string, argptr); va_end(argptr); return _tmp; } #define FFL(_b) (0xffULL << ((_b) << 3)) #define FF(_b) (0xff << ((_b) << 3)) /** * Check if the current execution path brings anything new to the table. * Update virgin bits to reflect the finds. Returns 1 if the only change is * the hit-count for a particular tuple; 2 if there are new tuples seen. * Updates the map, so subsequent calls will always return 0. * * This function is called after every exec() on a fairly large buffer, so * it needs to be fast. We do this in 32-bit and 64-bit flavors. * * @param trace_bits - the bitmap representing the edges hit in the last run * @param virgin_map - the bitmap representing the edges that have been hit so far * @return - 1 if the only change is hit-count, 2 if there are new edges, 0 otherwise */ static inline u8 has_new_bits(u8*trace_bits, u8* virgin_map) { #ifdef __x86_64__ u64* current = (u64*)trace_bits; u64* virgin = (u64*)virgin_map; u32 i = (MAP_SIZE >> 3); #else u32* current = (u32*)trace_bits; u32* virgin = (u32*)virgin_map; u32 i = (MAP_SIZE >> 2); #endif /* ^__x86_64__ */ u8 ret = 0; while (i--) { #ifdef __x86_64__ u64 cur = *current; u64 vir = *virgin; #else u32 cur = *current; u32 vir = *virgin; #endif /* ^__x86_64__ */ /* Optimize for *current == ~*virgin, since this will almost always be the case. */ if (cur & vir) { if (ret < 2) { /* This trace did not have any new bytes yet; see if there's any current[] byte that is non-zero when virgin[] is 0xff. */ #ifdef __x86_64__ if (((cur & FFL(0)) && (vir & FFL(0)) == FFL(0)) || ((cur & FFL(1)) && (vir & FFL(1)) == FFL(1)) || ((cur & FFL(2)) && (vir & FFL(2)) == FFL(2)) || ((cur & FFL(3)) && (vir & FFL(3)) == FFL(3)) || ((cur & FFL(4)) && (vir & FFL(4)) == FFL(4)) || ((cur & FFL(5)) && (vir & FFL(5)) == FFL(5)) || ((cur & FFL(6)) && (vir & FFL(6)) == FFL(6)) || ((cur & FFL(7)) && (vir & FFL(7)) == FFL(7))) ret = 2; else ret = 1; #else if (((cur & FF(0)) && (vir & FF(0)) == FF(0)) || ((cur & FF(1)) && (vir & FF(1)) == FF(1)) || ((cur & FF(2)) && (vir & FF(2)) == FF(2)) || ((cur & FF(3)) && (vir & FF(3)) == FF(3))) ret = 2; else ret = 1; #endif /* ^__x86_64__ */ } *virgin = vir & ~cur; } current++; virgin++; } return ret; } #ifdef DEBUG_TRACE_BITS static int first_run = 1; #endif /** * Check if the current execution path brings anything new to the table. * Update virgin bits to reflect the finds. Returns 1 if the only change is * the hit-count for a particular tuple; 2 if there are new tuples seen. * Updates the map, so subsequent calls will always return 0. * * This function is called after every exec() on a fairly large buffer, so * it needs to be fast. We do this in 32-bit and 64-bit flavors. * * This function is identical to has_new_bits, but takes an ignore_bytes bitmap * that lists bits that should be ignored when determining if a new edge has been found * * @param trace_bits - the bitmap representing the edges hit in the last run * @param virgin_map - the bitmap representing the edges that have been hit so far * @param ignore_bytes - a bitmap representing the edges that should be ignored when reporting * new edges * @return - 1 if the only change is hit-count, 2 if there are new edges, 0 otherwise */ static inline u8 has_new_bits_with_ignore(u8*trace_bits, u8* virgin_map, u8* ignore_bytes) { u8 ret = 0; u32 i = 0; u8 trace, virgin, ignore; while (i < MAP_SIZE) { trace = *trace_bits; virgin = *virgin_map; ignore = *ignore_bytes; /* Optimize for *current == ~*virgin, since this will almost always be the case. */ if (!ignore && (trace & virgin)) { if (ret < 2) { /* This trace did not have any new bytes yet; see if there's any current[] byte that is non-zero when virgin[] is 0xff. */ if (trace & FFL(0) && (virgin & FFL(0)) == FFL(0)) ret = 2; else ret = 1; } #ifdef DEBUG_TRACE_BITS if (!first_run && (trace & FFL(0) && (virgin & FFL(0)) == FFL(0))) printf("diff byte %d\n", i); #endif *virgin_map = virgin & ~trace; } trace_bits++; virgin_map++; ignore_bytes++; i++; } #ifdef DEBUG_TRACE_BITS first_run = 0; #endif return ret; } #define AREP4(_sym) (_sym), (_sym), (_sym), (_sym) #define AREP8(_sym) AREP4(_sym), AREP4(_sym) #define AREP16(_sym) AREP8(_sym), AREP8(_sym) #define AREP32(_sym) AREP16(_sym), AREP16(_sym) #define AREP64(_sym) AREP32(_sym), AREP32(_sym) #define AREP128(_sym) AREP64(_sym), AREP64(_sym) static u8 count_class_lookup[256] = { /* 0 - 3: 4 */ 0, 1, 2, 4, /* 4 - 7: +4 */ AREP4(8), /* 8 - 15: +8 */ AREP8(16), /* 16 - 31: +16 */ AREP16(32), /* 32 - 127: +96 */ AREP64(64), AREP32(64), /* 128+: +128 */ AREP128(128) }; #ifdef __x86_64__ /** * Destructively classify execution counts in a trace. This is used as a * preprocessing step for any newly acquired traces. Called on every exec, * must be fast. * @param mem - the bitmap which defines the edges that have been hit by a trace */ static inline void classify_counts(u64* mem) { u32 i = MAP_SIZE >> 3; while (i--) { /* Optimize for sparse bitmaps. */ if (*mem) { u8* mem8 = (u8*)mem; mem8[0] = count_class_lookup[mem8[0]]; mem8[1] = count_class_lookup[mem8[1]]; mem8[2] = count_class_lookup[mem8[2]]; mem8[3] = count_class_lookup[mem8[3]]; mem8[4] = count_class_lookup[mem8[4]]; mem8[5] = count_class_lookup[mem8[5]]; mem8[6] = count_class_lookup[mem8[6]]; mem8[7] = count_class_lookup[mem8[7]]; } mem++; } } #else /** * Destructively classify execution counts in a trace. This is used as a * preprocessing step for any newly acquired traces. Called on every exec, * must be fast. * @param mem - the bitmap which defines the edges that have been hit by a trace */ static inline void classify_counts(u32* mem) { u32 i = MAP_SIZE >> 2; while (i--) { /* Optimize for sparse bitmaps. */ if (*mem) { u8* mem8 = (u8*)mem; mem8[0] = count_class_lookup[mem8[0]]; mem8[1] = count_class_lookup[mem8[1]]; mem8[2] = count_class_lookup[mem8[2]]; mem8[3] = count_class_lookup[mem8[3]]; } mem++; } } #endif /* ^__x86_64__ */ /** * This function merges the bitmap in src into the bitmap in dest * @param dest - the bitmap that will be combined with the src bitmap. * @param src - the bitmap that will be added to the dest bitmap */ void merge_bitmaps(u8 * dest, const u8 * src) { size_t i; for (i = 0; i < MAP_SIZE; i++) dest[i] &= src[i]; } //////////////////////////////////////////////////////////////// // Process and SHM Management ////////////////////////////////// //////////////////////////////////////////////////////////////// /** * This function cleans up the shared memory. * @param trace_bits - a pointer to the mapped shared memory region * @param shm_handle - a Windows handle to the shared memory region */ static void remove_shm(u8 * trace_bits, HANDLE shm_handle) { if (trace_bits) UnmapViewOfFile(trace_bits); if (shm_handle) CloseHandle(shm_handle); } /** * This function creates and maps a shared memory region. * @param fuzzer_id - the id associated with this shared memory region * @param index - the index of the target module that this shared memory region will be associated with. * If this shared memory region isn't associated with a target module, -1 should be passed in. * @param out_trace_bits - A pointer to a pointer of memory that will be assigned to a mapped view of * the shared memory region * @param for_edges - whether the shm region is for the full edge recording or not * @return - a Windows handle to the shared memory region on success, or NULL on failure */ static HANDLE setup_shm_region(char * fuzzer_id, int index, u8 ** out_trace_bits, int for_edges) { char* shm_str; HANDLE shm_handle; DWORD size; if (for_edges) size = EDGES_SHM_SIZE; else size = MAP_SIZE; if (index < 0) shm_str = (char *)alloc_printf("afl_shm_%s", fuzzer_id); else shm_str = (char *)alloc_printf("afl_shm_%s_%d", fuzzer_id, index); DEBUG_MSG("Setting up shm region: %s", shm_str); shm_handle = CreateFileMapping( INVALID_HANDLE_VALUE, // use paging file NULL, // default security PAGE_READWRITE, // read/write access 0, // maximum object size (high-order DWORD) size, // maximum object size (low-order DWORD) shm_str); // name of mapping object ck_free(shm_str); if (shm_handle == NULL) { if (GetLastError() != ERROR_ALREADY_EXISTS) FATAL_MSG("CreateFileMapping failed"); return NULL; } *out_trace_bits = (u8 *)MapViewOfFile( shm_handle, // handle to map object FILE_MAP_ALL_ACCESS, // read/write permission 0, 0, size ); if (!*out_trace_bits) FATAL_MSG("MapViewOfFile() failed"); return shm_handle; } /** * This function generates a fuzzer_id for use with mapping of shared memory regions and assigns * the fuzzer_id to state->fuzzer_id (first freeing the previous state->fuzzer_id if set). * @param state - The dynamorio_state_t object containing this instrumentation's state */ void generate_fuzzer_id(dynamorio_state_t * state) { unsigned int seeds[2]; if (state->fuzzer_id != NULL) { ck_free(state->fuzzer_id); state->fuzzer_id = NULL; } rand_s(&seeds[0]); rand_s(&seeds[1]); state->fuzzer_id = (char *)alloc_printf("%I32x%I32x", seeds[0], seeds[1]); } /** * This function configures shared memory and virgin_bits. * @param state - The dynamorio_state_t object containing this instrumentation's state * @param reset_virgin_bits - Whether the virgin bits (that define which edges have already been * seen), should be reset */ static void setup_shm_and_pick_fuzzer_id(dynamorio_state_t * state, int reset_virgin_bits) { u8 attempts = 0; target_module_t * target_module; if (state->per_module_coverage) { //First pick a fuzzer id by trying to create the first shm region target_module = state->modules; while (attempts < 5 && !target_module->shm_handle) { attempts++; generate_fuzzer_id(state); if(state->edges) target_module->shm_handle = setup_shm_region(state->fuzzer_id, target_module->index, (u8**)&target_module->edges_memory, 1); else target_module->shm_handle = setup_shm_region(state->fuzzer_id, target_module->index, &target_module->trace_bits, 0); } if (!target_module->shm_handle) { PFATAL("Couldn't create shm region for %s module\n", state->module_names[target_module->index]); } if(reset_virgin_bits) memset(target_module->virgin_bits, 0xFF, MAP_SIZE); if (state->edges) memset(target_module->edges_memory, 0, EDGES_SHM_SIZE); //Next create the rest of them with that fuzzer id target_module = target_module->next; while (target_module) { if (state->edges) target_module->shm_handle = setup_shm_region(state->fuzzer_id, target_module->index, (u8**)&target_module->edges_memory, 1); else target_module->shm_handle = setup_shm_region(state->fuzzer_id, target_module->index, &target_module->trace_bits, 0); if (!target_module->shm_handle) FATAL_MSG("Couldn't create shm region for %s module", state->module_names[target_module->index]); if (reset_virgin_bits) memset(target_module->virgin_bits, 0xFF, MAP_SIZE); if (state->edges) memset(target_module->edges_memory, 0, EDGES_SHM_SIZE); target_module = target_module->next; } } else { while (attempts < 5 && !state->shm_handle) { attempts++; generate_fuzzer_id(state); if (state->edges) state->shm_handle = setup_shm_region(state->fuzzer_id, -1, (u8**)&state->edges_memory, 1); else state->shm_handle = setup_shm_region(state->fuzzer_id, -1, &state->trace_bits, 0); } if (!state->shm_handle) { FATAL_MSG("Couldn't create shm region"); } if (reset_virgin_bits) memset(state->virgin_bits, 0xFF, MAP_SIZE); if (state->edges) memset(state->edges_memory, 0, EDGES_SHM_SIZE); } } /** * This function kills a process with the specified exit code * @param dwProcessId - the process id of the process to kill * @param uExitCode - the exit code that the specified process should be killed with * @return - a BOOL describing whether the process was successfully terminated (TRUE) or not (FALSE) */ BOOL TerminateProcessByPid(DWORD dwProcessId, UINT uExitCode) { DWORD dwDesiredAccess = PROCESS_TERMINATE; BOOL bInheritHandle = FALSE; HANDLE hProcess = OpenProcess(dwDesiredAccess, bInheritHandle, dwProcessId); if (hProcess == NULL) return FALSE; BOOL result = TerminateProcess(hProcess, uExitCode); CloseHandle(hProcess); return result; } /** * This function wraps the creation of a pipe * @param pipe_name - The name of the pipe to create * @param timeout - The maximum time to wait for the pipe to be created * @return - A handle to the created pipe */ static HANDLE create_pipe(char * pipe_name, DWORD timeout) { HANDLE pipe; pipe = CreateNamedPipe( pipe_name, // pipe name PIPE_ACCESS_DUPLEX | // read/write access FILE_FLAG_OVERLAPPED, // asynchronous (so we can time out) 0, 1, // max. instances 512, // output buffer size 512, // input buffer size timeout, // client time-out NULL); // default security attribute if (pipe == INVALID_HANDLE_VALUE) FATAL_MSG("CreateNamedPipe failed for pipe %s, GLE=%d.", pipe_name, GetLastError()); return pipe; } /** * This function connects to a pipe. * @param pipe - A handle to the pipe to connect to * @param pipe_name - The name of the pipe to connect to. Only used in help messages, if creation fails. * @param timeout - The maximum time to wait for the client to connect to the pipe * @return - TRUE if the connection succeeds, FALSE otherwise */ static BOOL connect_to_pipe(HANDLE pipe, char * pipe_name, DWORD timeout) { BOOL success = FALSE; OVERLAPPED overlap = { 0 }; overlap.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); if (ConnectNamedPipe(pipe, &overlap)) { // Overlapped ConnectNamedPipe is expected to always return 0 ERROR_MSG("ConnectNamedPipe failed for the pipe %s, GLE=%d.", pipe_name, GetLastError()); return FALSE; } switch (GetLastError()) { case ERROR_PIPE_CONNECTED: success = TRUE; break; case ERROR_IO_PENDING: if (WaitForSingleObject(overlap.hEvent, timeout) == WAIT_OBJECT_0) { // Pipe is connected DWORD ignored_bytestransferred; success = GetOverlappedResult(pipe, &overlap, &ignored_bytestransferred, FALSE); break; } else { // Timed out or failed CancelIo(pipe); } } if (!success) { ERROR_MSG("Did not receive connection from DynamoRIO child process on pipe %s, GLE=%d.", pipe_name, GetLastError()); ERROR_MSG("Try increasing the instrumentation timeout option (currently set to %lu).", timeout); } CloseHandle(overlap.hEvent); return success; } /** * This function cleans up a pipe. * @param pipe - A handle to the pipe to clean up */ static void cleanup_pipe(HANDLE * pipe) { if (*pipe) { DisconnectNamedPipe(*pipe); CloseHandle(*pipe); *pipe = NULL; } } /** * This function terminates the fuzzed process (running in drrun.exe). * @param state - The dynamorio_state_t object containing this instrumentation's state * @param wait_exit - The maximum number of milliseconds to wait when trying to wait for fuzzed process. */ static void destroy_target_process(dynamorio_state_t * state, int wait_exit) { //TODO this seems like it'll be really slow. Optimize it if possible char kill_cmd[512]; HANDLE kill_handle; if (state->child_handle) { //nudge the child process if (WaitForSingleObject(state->child_handle, wait_exit) == WAIT_TIMEOUT) { //Try to nudge the process first snprintf(kill_cmd, sizeof(kill_cmd) - 1, "%s\\drconfig.exe -nudge_pid %d 0 1", state->dynamorio_dir, state->child_pid); if (start_process_and_write_to_stdin(kill_cmd, NULL, 0, &kill_handle)) FATAL_MSG("Could not nudge process with drconfig"); CloseHandle(kill_handle); //wait until the child process exits if (WaitForSingleObject(state->child_handle, state->timeout) == WAIT_TIMEOUT) { //It didn't exit, so kill drrun if (!TerminateProcess(state->child_handle, 9)) FATAL_MSG("Could not stop fuzzed program (pid %d) with TerminateProcess (GLE=%d)", state->child_pid, GetLastError()); //Clean up the target process as well TerminateProcessByPid(state->child_pid, 9); } } CloseHandle(state->child_handle); state->child_handle = NULL; } cleanup_pipe(&state->pipe_handle); } /** * This function starts the fuzzed process inside of DynamoRIO * @param state - The dynamorio_state_t object containing this instrumentation's state * @param cmd_line - the command line of the fuzzed process to start * @param stdin_input - the input to pass to the fuzzed process's stdin * @param stdin_length - the length of the stdin_input parameter */ static void create_target_process(dynamorio_state_t * state, char* cmd_line, char * stdin_input, size_t stdin_length) { char* dr_cmd; FILE *fp; size_t pidsize; char buffer[MAX_PATH]; state->pipe_handle = create_pipe(state->pipe_name, state->timeout); //Create the child process dr_cmd = alloc_printf( "%s\\drrun.exe -pidfile %s -no_follow_children -c \"%s\\winafl.dll\" %s -fuzzer_id %s -- %s", state->dynamorio_dir, state->pidfile, state->winafl_dir, state->client_params, state->fuzzer_id, cmd_line); if (start_process_and_write_to_stdin(dr_cmd, stdin_input, stdin_length, &state->child_handle)) FATAL_MSG("Child process died when started with command line: %s", dr_cmd); if (!connect_to_pipe(state->pipe_handle, state->pipe_name, state->timeout)) //Connect to the comms pipe { if (get_process_status(state->child_handle) == 0) // process is not alive FATAL_MSG("Child process died when started with command line: %s", dr_cmd); else FATAL_MSG("Error communicating with child process with command line: %s", dr_cmd); } ck_free(dr_cmd); //by the time pipe has connected the pidfile must have been created fp = fopen(state->pidfile, "rb"); if (!fp) FATAL_MSG("Error opening pidfile %s", state->pidfile); pidsize = fread(buffer, 1, sizeof(buffer)-1, fp); buffer[pidsize] = 0; fclose(fp); remove(state->pidfile); state->child_pid = atoi(buffer); //Reset the fuzz iteration count state->fuzz_iterations_current = 0; } /** * This function ends the fuzzed process (if it wasn't previously ended), cleans * up the pipe, and calculates the whether a new path was taken. * @param state - The dynamorio_state_t object containing this instrumentation's state * @return - returns -1 on error, -2 when in edge mode, or the results of * has_new_coverage/has_new_coverage_per_module functions */ static int finish_fuzz_round(dynamorio_state_t * state) { DWORD num_bytes_available; char result; DWORD num_read; int ret; if (state->analyzed_last_round) return state->last_path_was_new; //Determine if the last process hung or not. If there's nothing in the pipe, then it obviously hung. if (!PeekNamedPipe(state->pipe_handle, NULL, 0, NULL, &num_bytes_available, NULL)) return -1; if (!num_bytes_available) { destroy_target_process(state, 0); state->last_process_status = FUZZ_HANG; } else { //Read the result from the child ReadFile(state->pipe_handle, &result, 1, &num_read, NULL); //See if we should restart the client state->fuzz_iterations_current++; if (state->fuzz_iterations_current == state->fuzz_iterations_max) { destroy_target_process(state, state->timeout); } //Record the process status if (num_read == 1 && result == 'K') //Normal { state->last_process_status = FUZZ_NONE; } else //The process hung or crashed, restart it { destroy_target_process(state, 0); if (num_read == 1 && result == 'C') //Crash state->last_process_status = FUZZ_CRASH; else //unknown char or couldn't read, Hang state->last_process_status = FUZZ_HANG; } } //Now check to see if the instrumentation found a new path if (state->edges) ret = -2; else if (state->per_module_coverage) ret = has_new_coverage_per_module(state); else ret = has_new_coverage(state->trace_bits, state->virgin_bits, state->ignore_bytes, &state->last_shm_hash, state->dump_map_dir); state->last_path_was_new = ret; state->analyzed_last_round = 1; return ret; } /** * Checks if the target process is done fuzzing the inputs yet. If it has finished, it will have * written the results to the dynamorio instrumentation's pipe. * @param state - The dynamorio_state_t object containing this instrumentation's state * @return - 0 if the process has not done testing the fuzzed input, 1 if the process is done, -1 on error. */ int dynamorio_is_process_done(void * instrumentation_state) { dynamorio_state_t * state = (dynamorio_state_t *)instrumentation_state; DWORD num_bytes_available; if (!state->enable_called) return -1; if (!PeekNamedPipe(state->pipe_handle, NULL, 0, NULL, &num_bytes_available, NULL)) return -1; return num_bytes_available != 0; } //////////////////////////////////////////////////////////////// // Instrumentation methods ///////////////////////////////////// //////////////////////////////////////////////////////////////// /** * This function loads the ignore_bytes (the bytes in the edges bitmap that should be ignored * when checking which edges are new). * @param state - The dynamorio_state_t object containing this instrumentation's state */ static void load_ignore_bytes(dynamorio_state_t * state) { char filename[4096]; int size; target_module_t * target_module; if (!state->per_module_coverage && state->ignore_bytes_file) { size = read_file(state->ignore_bytes_file, (char **)&state->ignore_bytes); if (size < 0) FATAL_MSG("Could not open ignore bytes file %s", state->ignore_bytes_file); else if (size != MAP_SIZE) FATAL_MSG("Incorrect size of ignore bytes file %s", state->ignore_bytes_file); } else if(state->per_module_coverage && state->ignore_bytes_dir) { FOREACH_MODULE(target_module, state) { snprintf(filename, sizeof(filename), "%s\\%s.dat", state->ignore_bytes_dir, state->module_names[target_module->index]); size = read_file(filename, (char **)&target_module->ignore_bytes); if (size >= 0 && size != MAP_SIZE) //ignore missing ignore bytes files FATAL_MSG("Incorrect size of ignore bytes file %s", filename); else if (size >= 0) DEBUG_MSG("Loaded ignore bytes file %s for modules %s", filename, state->module_names[target_module->index]); } } } /** * This function generates the arguments that will be passed to the winafl.dll DynamoRIO tool. * @param state - The dynamorio_state_t object containing this instrumentation's state */ static void generate_client_params(dynamorio_state_t * state) { char * temp; size_t size, i; //Format client params size = 10 * 4096; temp = (char *)malloc(size); if (!temp) FATAL_MSG("Couldn't get memory for client_params"); snprintf(temp, size - 1, "%s -fuzz_iterations %d", state->client_params ? state->client_params : "", state->fuzz_iterations_max); if (state->num_modules) { char line[1024]; char * modules_filename = get_temp_filename(".txt"); FILE * fp = fopen(modules_filename, "wb"); if (!fp) FATAL_MSG("Couldn't open modules file '%s'", modules_filename); for (i = 0; i < state->num_modules; i++) { snprintf(line, sizeof(line), "%s\n", state->module_names[i]); fwrite(line, 1, strlen(line), fp); } fclose(fp); snprintf(temp, size - 1, "%s -coverage_module_file %s", temp, modules_filename); free(modules_filename); } if (state->per_module_coverage) snprintf(temp, size - 1, "%s -per_module_coverage", temp); if (state->edges) snprintf(temp, size - 1, "%s -verbose_edges", temp); if (state->client_params) free(state->client_params); state->client_params = temp; } /** * This function adds the string \bin32\ or \bin64\ to the end of the provided * dynamorio base path. The architecture is chosen to match the binary in * target_path, or the fuzzer if target_path is NULL. * @param base_path - char * containing the base dynamorio directory * @param target_path - char * containing the path to the binary being fuzzed, * or NULL if that option has not been provided to the fuzzer * @return - A newly allocated char * pointing to the path with the architecture * suffix added */ static char * add_architecture_to_path(char * base_path, char * target_path) { char * temp; DWORD binary_type; temp = (char *)malloc(MAX_PATH + 1); if (!temp) FATAL_MSG("Couldn't get memory for dynamorio_dir"); memset(temp, 0, MAX_PATH + 1); if (target_path) { if (GetBinaryTypeA(target_path, &binary_type)) { //Pick the path based on the target file's architecture if (binary_type == SCS_32BIT_BINARY) snprintf(temp, MAX_PATH - 1, "%s\\bin32\\", base_path); else if (binary_type == SCS_64BIT_BINARY) snprintf(temp, MAX_PATH - 1, "%s\\bin64\\", base_path); //Assign the default path if (binary_type == SCS_32BIT_BINARY || binary_type == SCS_64BIT_BINARY) return temp; } } //Couldn't get the architecture from the target path //so just guess the dynamorio path that matches the current architecture #ifdef _M_X64 snprintf(temp, MAX_PATH - 1, "%s\\bin64\\", base_path); #else snprintf(temp, MAX_PATH - 1, "%s\\bin32\\", base_path); #endif return temp; } /** * This function populates the default_dynamorio_dir field of the state. It * searches the folders containing the fuzzer to find a dynamorio directory in * one of a few likely locations. * @param state - a pointer to the state to modify * @param target_path - char * containing the path to the binary being fuzzed, * or NULL if that option has not been provided to the fuzzer */ static void pick_default_dynamorio_dir(dynamorio_state_t * state, char * target_path) { char * path; size_t pathlen; if (state->default_dynamorio_dir) free(state->default_dynamorio_dir); state->default_dynamorio_dir = NULL; //Try to autodetect the dynamorio directory // Usual location for binary distribution path = filename_relative_to_binary_dir("..\\dynamorio\\bin32\\drrun.exe"); if (!path) { // Usual location for 32-bit developer environment path = filename_relative_to_binary_dir("..\\..\\..\\dynamorio\\bin32\\drrun.exe"); } if (!path) { // Usual location for 64-bit developer environment path = filename_relative_to_binary_dir("..\\..\\..\\..\\dynamorio\\bin64\\drrun.exe"); } if (!path) return; pathlen = strlen(path); path[pathlen - 16] = '\0'; // Remove "\\binXX\\drrun.exe" state->default_dynamorio_dir = add_architecture_to_path(path, target_path); free(path); } /** * This function copies a dynamorio_state_t object, or allocates a new one with the default parameters. * @param original - The dynamorio_state_t object that should be copied. If this parameter is NULL, * a new dynamorio_state_t is allocated with the default options. * @return - the newly allocated dynamorio_state_t object. */ static dynamorio_state_t * copy_state(dynamorio_state_t * original) { size_t i; dynamorio_state_t * ret = (dynamorio_state_t *)malloc(sizeof(dynamorio_state_t)); target_module_t * target_module, *ret_target_module, *new_target_module; if (!ret) return NULL; memset(ret, 0, sizeof(dynamorio_state_t)); if (!original) { //No original passed in, just make the default one pick_default_dynamorio_dir(ret, NULL); ret->default_winafl_dir = filename_relative_to_binary_dir("."); ret->fuzz_iterations_max = 1; ret->timeout = 1000; //1 second ret->edges = 0; ret->analyzed_last_round = 1; return ret; } //Copy all the relevant options ret->default_dynamorio_dir = strdup(original->default_dynamorio_dir); if (original->dynamorio_dir) ret->dynamorio_dir = strdup(original->dynamorio_dir); ret->winafl_dir = strdup(original->winafl_dir); if (original->target_path) ret->target_path = strdup(original->target_path); if (original->dump_map_dir) ret->dump_map_dir = strdup(original->dump_map_dir); if (original->ignore_bytes_dir) ret->ignore_bytes_dir = strdup(original->ignore_bytes_dir); if (original->ignore_bytes_file) ret->ignore_bytes_file = strdup(original->ignore_bytes_file); ret->per_module_coverage = original->per_module_coverage; ret->fuzz_iterations_max = original->fuzz_iterations_max; if (original->client_params) ret->client_params = strdup(original->client_params); ret->timeout = original->timeout; ret->fuzz_iterations_current = original->fuzz_iterations_current; ret->edges = original->edges; if (original->per_module_coverage) { ret->num_modules = original->num_modules; ret->module_names = (char **)malloc(ret->num_modules * sizeof(char *)); for (i = 0; i < ret->num_modules; i++) ret->module_names[i] = strdup(original->module_names[i]); ret_target_module = NULL; FOREACH_MODULE(target_module, original) { new_target_module = (target_module_t *)malloc(sizeof(target_module_t)); memset(new_target_module, 0, sizeof(target_module_t)); new_target_module->index = target_module->index; memcpy(new_target_module->virgin_bits, original->virgin_bits, MAP_SIZE); new_target_module->last_shm_hash = target_module->last_shm_hash; new_target_module->last_path_was_new = target_module->last_path_was_new; if (target_module->ignore_bytes) { new_target_module->ignore_bytes = (u8 *)malloc(MAP_SIZE); memcpy(new_target_module->ignore_bytes, target_module->ignore_bytes, MAP_SIZE); } //Add the new module to the linked list of modules if (ret_target_module) ret->modules = new_target_module; else ret_target_module->next = new_target_module; ret_target_module = new_target_module; } } else { memcpy(ret->virgin_bits, original->virgin_bits, MAP_SIZE); ret->last_shm_hash = original->last_shm_hash; ret->last_path_was_new = original->last_path_was_new; } return ret; } /** * This function creates a dynamorio_state_t object based on the given options. * @param options - A JSON string of the options to set in the new dynamorio_state_t. See the * help function for more information on the specific options available. * @return the dynamorio_state_t generated from the options in the JSON options string, or NULL on failure */ static dynamorio_state_t * setup_options(char * options) { dynamorio_state_t * state; size_t i, length; target_module_t * target_module; char * temp; char buffer[MAX_PATH]; state = copy_state(NULL); if (!state) return NULL; //Parse the options PARSE_OPTION_STRING(state, options, dynamorio_dir, "dynamorio_dir", dynamorio_cleanup); PARSE_OPTION_STRING(state, options, winafl_dir, "winafl_dir", dynamorio_cleanup); PARSE_OPTION_STRING(state, options, target_path, "target_path", dynamorio_cleanup); PARSE_OPTION_STRING(state, options, dump_map_dir, "dump_map_dir", dynamorio_cleanup); PARSE_OPTION_STRING(state, options, ignore_bytes_dir, "ignore_bytes_dir", dynamorio_cleanup); PARSE_OPTION_STRING(state, options, ignore_bytes_file, "ignore_bytes_file", dynamorio_cleanup); PARSE_OPTION_STRING(state, options, client_params, "client_params", dynamorio_cleanup); PARSE_OPTION_INT(state, options, fuzz_iterations_max, "fuzz_iterations", dynamorio_cleanup); PARSE_OPTION_INT(state, options, per_module_coverage, "per_module_coverage", dynamorio_cleanup); PARSE_OPTION_INT(state, options, timeout, "timeout", dynamorio_cleanup); PARSE_OPTION_ARRAY(state, options, module_names, num_modules, "coverage_modules", dynamorio_cleanup); PARSE_OPTION_INT(state, options, edges, "edges", dynamorio_cleanup); if (!state->num_modules && state->target_path) { //if the user didn't specify a module, we'll pick the executable itself by default state->num_modules = 1; state->module_names = malloc(sizeof(char *)); length = strlen(state->target_path) + 1; state->module_names[0] = malloc(length); strncpy(state->module_names[0], PathFindFileName(state->target_path), length); INFO_MSG("No Coverage Module selected, choosing the target executable \"%s\" by default.", state->module_names[0]); } if (!state->num_modules) FATAL_MSG("No Coverage Module selected, please specify one with the coverage_modules option."); if (state->target_path) { pick_default_dynamorio_dir(state, state->target_path); } if (state->dynamorio_dir) //if the user specified a dynamorio directory, use that. Otherwise use the default one { temp = state->dynamorio_dir; state->dynamorio_dir = add_architecture_to_path(temp, state->target_path); free(temp); } else { if (state->default_dynamorio_dir) state->dynamorio_dir = strdup(state->default_dynamorio_dir); else FATAL_MSG("Dynamorio was not found in the default location, and dynamorio_dir was not specified."); } if (!state->winafl_dir) { //if the user didn't specify a winafl directory, try to automatically determine one state->winafl_dir = add_architecture_to_path(state->default_winafl_dir, state->target_path); } //Verify winafl.dll exists snprintf(buffer, sizeof(buffer) - 1, "%s\\winafl.dll", state->winafl_dir); if (access(buffer, 0)) FATAL_MSG("Failed to find the winafl.dll in %s. Use the winafl_dir option to modify the directory to look for winafl.dll, and ensure that you have matching bitness (bin32 vs 64) between winafl.dll and the fuzz target.", state->winafl_dir); //printf("Modules (%zu):\n", state->num_modules); for (i = 0; i < state->num_modules; i++) { //printf("%d: %s\n", i, state->module_names[i]); target_module = (target_module_t *)malloc(sizeof(target_module_t)); memset(target_module, 0, sizeof(target_module_t)); target_module->index = i; target_module->next = state->modules; state->modules = target_module; } //printf("\n"); generate_client_params(state); load_ignore_bytes(state); return state; } /** * This function allocates and initializes a new instrumentation specific state object based on the given options. * @param options - a JSON string that contains the instrumentation specific string of options * @param state - an instrumentation specific JSON string previously returned from dynamorio_get_state that should be loaded * @return - An instrumentation specific state object on success or NULL on failure */ void * dynamorio_create(char * options, char * state) { dynamorio_state_t * dynamorio_state; dynamorio_state = setup_options(options); if (!dynamorio_state) return NULL; if (state && dynamorio_set_state(dynamorio_state, state)) { dynamorio_cleanup(dynamorio_state); return NULL; } return dynamorio_state; } /** * This function cleans up all resources with the passed in instrumentation state. * @param instrumentation_state - an instrumentation specific state object previously created by the dynamorio_create function * This state object should not be referenced after this function returns. */ void dynamorio_cleanup(void * instrumentation_state) { target_module_t * target_module, *next; dynamorio_state_t * state = (dynamorio_state_t *)instrumentation_state; destroy_target_process(state, 0); remove_shm(state->trace_bits, state->shm_handle); for (target_module = state->modules; target_module; ) { next = target_module->next; remove_shm(target_module->trace_bits, target_module->shm_handle); free(state->ignore_bytes); free(state->module_names[target_module->index]); free(target_module); target_module = next; } if (state->pidfile) ck_free(state->pidfile); if (state->pipe_name) ck_free(state->pipe_name); if (state->fuzzer_id) ck_free(state->fuzzer_id); free(state->default_dynamorio_dir); free(state->dynamorio_dir); free(state->default_winafl_dir); free(state->winafl_dir); free(state->target_path); free(state->dump_map_dir); free(state->client_params); free(state->ignore_bytes_dir); free(state->ignore_bytes_file); free(state->module_names); free(state->ignore_bytes); free(state); } /** * This function merges the coverage information from two instrumentation states. * @param instrumentation_state - an instrumentation specific state object previously created by the dynamorio_create function * @param other_instrumentation_state - an instrumentation specific state object previously created by the dynamorio_create function * @return - An instrumentation specific state object that contains the combination of both of the passed in instrumentation states * on success, or NULL on failure */ void * dynamorio_merge(void * instrumentation_state, void * other_instrumentation_state) { target_module_t * ret_module, * new_module; size_t i, j; int found; dynamorio_state_t * ret; dynamorio_state_t * first = (dynamorio_state_t *)instrumentation_state; dynamorio_state_t * second = (dynamorio_state_t *)other_instrumentation_state; //Check that the instrumenation states are similar enough if (first->per_module_coverage != second->per_module_coverage || first->num_modules != second->num_modules) return NULL; for (i = 0; i < first->num_modules; i++) { found = 0; for (j = 0; j < first->num_modules; j++) { if (!strcmp(first->module_names[i], second->module_names[j])) found = 1; } if (!found) return NULL; } ret = copy_state(first); if (!ret) return NULL; if (ret->per_module_coverage) { FOREACH_MODULE(ret_module, ret) { FOREACH_MODULE(new_module, second) { if (!strcmp(ret->module_names[ret_module->index], second->module_names[new_module->index])) { merge_bitmaps(ret_module->virgin_bits, new_module->virgin_bits); //We don't really need to track these, they're not relevant for merged instrumentations ret_module->last_path_was_new = ret_module->last_shm_hash = 0; } } } } else { merge_bitmaps(ret->virgin_bits, second->virgin_bits); ret->last_path_was_new = ret->last_shm_hash = 0; } return ret; } /** * This function returns the state information holding the previous execution path info. The returned value can later be passed to * dynamorio_create or dynamorio_set_state to load the state. * @param instrumentation_state - an instrumentation specific state object previously created by the dynamorio_create function * @return - A JSON string that holds the instrumentation specific state object information on success, or NULL on failure */ char * dynamorio_get_state(void * instrumentation_state) { dynamorio_state_t * state = (dynamorio_state_t *)instrumentation_state; json_t *state_obj, *module_obj, *temp, *module_list; target_module_t * target_module; char * ret; state_obj = json_object(); if (!state_obj) return NULL; ADD_INT(temp, state->last_process_status, state_obj, "last_process_status"); if (!state->per_module_coverage) { ADD_MEM(temp, (const char *)state->virgin_bits, MAP_SIZE, state_obj, "virgin_bits"); ADD_INT(temp, state->last_shm_hash, state_obj, "last_shm_hash"); ADD_INT(temp, state->last_path_was_new, state_obj, "last_path_was_new"); } else { module_list = json_array(); if (!module_list) return NULL; FOREACH_MODULE(target_module, state) { module_obj = json_object(); if (!module_obj) return NULL; ADD_STRING(temp, state->module_names[target_module->index], module_obj, "name"); ADD_MEM(temp, (const char *)target_module->virgin_bits, MAP_SIZE, module_obj, "virgin_bits"); ADD_INT(temp, target_module->last_shm_hash, module_obj, "last_shm_hash"); ADD_INT(temp, target_module->last_path_was_new, module_obj, "last_path_was_new"); json_array_append_new(module_list, module_obj); } json_object_set_new(state_obj, "modules", module_list); } ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } /** * This function frees an instrumentation state previously obtained via dynamorio_get_state. * @param state - the instrumentation state to free */ void dynamorio_free_state(char * state) { free(state); } #define get_item(arg1, dest, temp, func, name, ret) \ temp = func(arg1, name, &ret); \ if (ret <= 0) \ return 1; \ dest = temp; #define get_virgin_bits(arg1, dest, temp, func, ret) \ temp = func(arg1, "virgin_bits", &ret); \ if (ret <= 0) \ return 1; \ memcpy(dest, temp, MAP_SIZE); \ free(temp); /** * This function sets the instrumentation state to the passed in state previously obtained via dynamorio_get_state. * @param instrumentation_state - an instrumentation specific state object previously created by the dynamorio_create function * @param state - an instrumentation state previously obtained via dynamorio_get_state * @return - 0 on success, non-zero on failure. */ int dynamorio_set_state(void * instrumentation_state, char * state) { int result, inner_result, tempint, found; char * tempstr; json_t * module_obj; target_module_t * target_module; dynamorio_state_t * dynamorio_state = (dynamorio_state_t *)instrumentation_state; if (!state) return 1; //If a child process is running when the state is being set destroy_target_process(dynamorio_state, 0);//kill it so we don't orphan it get_item(state, dynamorio_state->last_process_status, tempint, get_int_options, "last_process_status", result); dynamorio_state->analyzed_last_round = 1; if (!dynamorio_state->per_module_coverage) { get_item(state, dynamorio_state->last_shm_hash, tempint, get_int_options, "last_shm_hash", result); get_item(state, dynamorio_state->last_path_was_new, tempint, get_int_options, "last_path_was_new", result); get_virgin_bits(state, dynamorio_state->virgin_bits, tempstr, get_mem_options, result); } else { FOREACH_OBJECT_JSON_ARRAY_ITEM_BEGIN(state, modules, "modules", module_obj, result) tempstr = get_string_options_from_json(module_obj, "name", &inner_result); if (inner_result <= 0) return 1; found = 0; FOREACH_MODULE(target_module, dynamorio_state) { if (!strcmp(dynamorio_state->module_names[target_module->index], tempstr)) { get_item(module_obj, target_module->last_shm_hash, tempint, get_int_options_from_json, "last_shm_hash", inner_result); get_item(module_obj, target_module->last_path_was_new, tempint, get_int_options_from_json, "last_path_was_new", inner_result); get_virgin_bits(module_obj, target_module->virgin_bits, tempstr, get_mem_options_from_json, inner_result); found = 1; } } free(tempstr); if (!found) return 1; FOREACH_OBJECT_JSON_ARRAY_ITEM_END(modules); if (result < 0) return 1; } return 0; } /** * This function enables the instrumentation and runs the fuzzed process. If the process needs to be restarted, it will be. * @param instrumentation_state - an instrumentation specific state object previously created by the dynamorio_create function * @process - a pointer to return a handle to the process that instrumentation was enabled on * @cmd_line - the command line of the fuzzed process to enable instrumentation on * @input - a buffer to the input that should be sent to the fuzzed process on stdin * @input_length - the length of the input parameter * returns 0 on success, -1 on failure */ int dynamorio_enable(void * instrumentation_state, HANDLE * process, char * cmd_line, char * input, size_t input_length) { dynamorio_state_t * state = (dynamorio_state_t *)instrumentation_state; DWORD num_written; target_module_t * target_module; if (!state->fuzzer_id) { setup_shm_and_pick_fuzzer_id(state, 1); state->pipe_name = (char *)alloc_printf("\\\\.\\pipe\\afl_pipe_%s", state->fuzzer_id); state->pidfile = alloc_printf("childpid_%s.txt", state->fuzzer_id); } if (!state->child_handle //if we haven't started the child yet || get_process_status(state->child_handle) == 0 //or the child died || input_length != 0) //or the fuzzer wants to send input on stdin (which doesn't work with persistence mode) { if (state->child_handle) destroy_target_process(state, 0); create_target_process(state, cmd_line, input, input_length); } else //the child is alive and we haven't cleaned up from last round finish_fuzz_round(state); *process = state->child_handle; //Blank the map state if (state->per_module_coverage) { FOREACH_MODULE(target_module, state) memset(state->edges ? (void *)target_module->edges_memory : target_module->trace_bits, 0, state->edges ? EDGES_SHM_SIZE : MAP_SIZE); } else memset(state->edges ? (void *)state->edges_memory : state->trace_bits, 0, state->edges ? EDGES_SHM_SIZE : MAP_SIZE); //Tell the child instrumentation to go WriteFile(state->pipe_handle, "F", 1, &num_written, NULL); state->analyzed_last_round = 0; state->enable_called = 1; return 0; } /** * This function determines if the last run had new coverage. * @param trace_bits - the edge bitmap of the most recent run * @parma virgin_bits - the edge bitmap of all edges previously seen * @param ignore_bytes - the edge bitmap of which bytes in the edge bitmap should be ignored * @param last_shm_hash - a pointer to a hash of the last run's trace_bits region. This pointer will be * updated with the current run's trace_bits' hash. * @param dump_map_dir - This optional parameter will cause the trace_bits bitmap to be dumped to the directory * specified by this parameter. * @return - returns 1 if new edge was found or an edge's hit count changed, or 0 otherwise */ static int has_new_coverage(u8 * trace_bits, u8 * virgin_bits, u8 * ignore_bytes, u32 * last_shm_hash, char * dump_map_dir) { u8 hnb; u32 hash, temp; #ifdef __x86_64__ classify_counts((u64*)trace_bits); #else classify_counts((u32*)trace_bits); #endif // ^__x86_64__ //A quick check of the last hash we saw to see if this output took the same path //Used to speed up the memory compare if (ignore_bytes) { hash = hash32_with_ignore(trace_bits, ignore_bytes, MAP_SIZE, HASH_CONST); temp = hash32(trace_bits, MAP_SIZE, HASH_CONST); //TODO REMOVE } else temp = hash = hash32(trace_bits, MAP_SIZE, HASH_CONST); DEBUG_MSG("Dynamorio Instrumentation got hash %08x temp %08x (last hash %08x)", hash, temp, *last_shm_hash); if (hash == *last_shm_hash) return 0; if (dump_map_dir) { //Write out the hash bitmap for debugging purposes char buffer[MAX_PATH]; snprintf(buffer, sizeof(buffer) - 1, "%s\\%08x", dump_map_dir, hash); write_buffer_to_file(buffer, (char *)trace_bits, MAP_SIZE); } //We had a new path (or hash collision), record it to the virgin bits *last_shm_hash = hash; if (ignore_bytes) hnb = has_new_bits_with_ignore(trace_bits, virgin_bits, ignore_bytes); else hnb = has_new_bits(trace_bits, virgin_bits); DEBUG_MSG("has_new_bits = %hhu", hnb); return hnb != 0; } /** * This function determines if which of the target_modules being tracked have new edges hit from the most recent run. * This function should only be called when per_module_coverage is enabled. * @param state - a dynamorio_state_t object previously created by the dynamorio_create function * @return - returns 1 if new edge was found or an edge's hit count changed, or 0 otherwise */ static int has_new_coverage_per_module(dynamorio_state_t * state) { target_module_t * target_module; int isnew, ret = 0; u32 last_hash; FOREACH_MODULE(target_module, state) { last_hash = target_module->last_shm_hash; isnew = has_new_coverage(target_module->trace_bits, target_module->virgin_bits, target_module->ignore_bytes, &target_module->last_shm_hash, state->dump_map_dir); ret |= isnew; target_module->last_path_was_new = isnew; if (isnew) DEBUG_MSG("Module %s has new bits (hash %08x, last hash %08x)", state->module_names[target_module->index], target_module->last_shm_hash, last_hash); } return ret != 0; } /** * This function determines whether the process being instrumented has taken a new path. It should be * called after the process has finished processing the tested input. * @param instrumentation_state - an instrumentation specific state object previously created by the dynamorio_create function * @return - 1 if the previously setup process (via the enable function) took a new path, 0 if it did not, or -1 on failure. */ int dynamorio_is_new_path(void * instrumentation_state) { dynamorio_state_t * state = (dynamorio_state_t *)instrumentation_state; if (!state->enable_called) return -1; return finish_fuzz_round(state); } /** * This function will return the result of the fuzz job. It should be called * after the process has finished processing the tested input. * @param instrumentation_state - an instrumentation specific structure previously created by the create() function * @return - either FUZZ_NONE, FUZZ_HANG, or FUZZ_CRASH, or -1 on error. */ int dynamorio_get_fuzz_result(void * instrumentation_state) { dynamorio_state_t * state = (dynamorio_state_t *)instrumentation_state; if (!state->enable_called) return -1; if (finish_fuzz_round(state) < 0) return -1; return state->last_process_status; } /** * This function returns information about each of the modules that the instrumentation is tracing. * @param instrumentation_state - an instrumentation specific state object previously created by the dynamorio_create function * @param index - an index into the module list for the module that information should be retrieved about. The return value * will indicate if a module exists for this index. Indices start at 0 and increase from there. * @param is_new - This parameter returns whether or not the last run of the instrumentation returned a new path for the module * with the specified index. In order for the information returned in this parameter to be accurate, the is_new_path method should * be called first. This parameter is optional and can be set to NULL. * @param module_name - This parameter returns the filename of the module at the specified index. This parameter is optional and can * be set to NULL. The value returned in this parameter should not be freed by the caller. * @param info - This parameter returns the an AFL style bitmap of the edges associated with the module at the specified index. This * parameter is optional and can be set to NULL. The value returned in this parameter should not be freed by the caller * @param size - This parameter returns the size of the AFL style bitmap of edges returned in the info parameter. This parameter is * optional and can be set to NULL. * @return - 0 if the module with the specified index is found, non-zero on error or if the module is not found */ int dynamorio_get_module_info(void * instrumentation_state, int index, int * is_new, char ** module_name, char ** info, int * size) { dynamorio_state_t * state = (dynamorio_state_t *)instrumentation_state; target_module_t * target_module; if (info || is_new) { if (!state->enable_called) return -1; if (finish_fuzz_round(state) < 0) return -1; } FOREACH_MODULE(target_module, state) { if (target_module->index == index) { if(is_new) *is_new = target_module->last_path_was_new; if(module_name) *module_name = state->module_names[index]; if (info) { if (state->edges) //If they asked for edges, we don't have the trace info *info = NULL; else *info = (char *)target_module->trace_bits; } if(size) *size = MAP_SIZE; return 0; } } return 1; } /* * This function gets a list of the edges hit during the instrumented programs most recent run * @param instrumentation_state - an instrumentation specific state object previously created by the dynamorio_create function * @param index - The index of the module to retrieve the edges for. This parameter is only needed if the per_module_coverage * option is enabled. * @return - a list of the edges hit during the instrumented programs most recent run, or NULL on error */ instrumentation_edges_t * dynamorio_get_edges(void * instrumentation_state, int index) { dynamorio_state_t * state = (dynamorio_state_t *)instrumentation_state; target_module_t * target_module; if (!state->enable_called) return -1; if (!state->edges) //If they didn't ask for edges ahead of time, we don't have them return NULL; if (finish_fuzz_round(state) == -1) return NULL; if (!state->per_module_coverage) return state->edges_memory; FOREACH_MODULE(target_module, state) { if (target_module->index == index) return target_module->edges_memory; } return NULL; } /** * This function returns help text for this instrumentation. This help text will describe the instrumentation and any options * that can be passed to dynamorio_create. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ int dynamorio_help(char ** help_str) { *help_str = strdup( "dynamorio - DynamoRIO instrumentation (based heavily on winafl)\n" "Options:\n" " dynamorio_dir Set the directory with DynamoRIO binaries in it\n" " winafl_dir Set the directory with winafl.dll in it\n" " target_path The path to the target program to fuzz\n" " dump_map_dir Set the directory to dump the instrumentation bitmap\n" " to, for debugging purposes\n" " ignore_bytes_dir Set the directory to load ignore bit files from when\n" " per_module_coverage is set (of the form\n" " $ignore_bits_dir\\$dll_name.dll.dat).\n" " ignore_bytes_file Set the file to load ignore bit files from when\n" " per_module_coverage is not set.\n" " timeout The number of milliseconds to wait when communicating\n" " with the instrumentation in the target process\n" " client_params Parameters to pass to the winafl.dll DynamoRIO tool\n" " (Do not specify per_module_coverage,\n" " fuzz_iterations, or coverage_modules here)\n" " fuzz_iterations Maximum number of iterations for the target function\n" " to run before restarting the target process\n" " coverage_modules An array of modules that should be instrumented to\n" " record coverage information\n" " per_module_coverage Whether coverage should be tracked in one bitmap (0),\n" " or in a separate bitmap for each module (1)\n" "\n" ); if (*help_str == NULL) return -1; return 0; } /** * This function will log information about the given instrumentation state to the logger. It's mostly useful for debugging. * @param instrumentation_state - an instrumentation specific state object previously created by the dynamorio_create function */ void dynamorio_print_state(void * instrumentation_state) { dynamorio_state_t * state = (dynamorio_state_t *)instrumentation_state; target_module_t * target_module; if (!state->per_module_coverage) INFO_MSG("DynamoRIO State: last_hash %08x (hash %08x) last_path_as_new %d", state->last_shm_hash, hash32(state->virgin_bits, MAP_SIZE, HASH_CONST), state->last_path_was_new); else { INFO_MSG("DynamoRIO State:"); FOREACH_MODULE(target_module, state) { INFO_MSG("module %s: last_hash %08x (hash %08x) last_path_as_new %d", state->module_names[target_module->index], target_module->last_shm_hash, hash32(target_module->virgin_bits, MAP_SIZE, HASH_CONST), target_module->last_path_was_new); } } } ================================================ FILE: instrumentation/dynamorio_instrumentation.h ================================================ #pragma once #include #include "winafl_types.h" #include "winafl_config.h" void * dynamorio_create(char * options, char * state); void dynamorio_cleanup(void * instrumentation_state); void * dynamorio_merge(void * instrumentation_state, void * other_instrumentation_state); char * dynamorio_get_state(void * instrumentation_state); void dynamorio_free_state(char * state); int dynamorio_set_state(void * instrumentation_state, char * state); int dynamorio_enable(void * instrumentation_state, HANDLE * process, char * cmd_line, char * input, size_t input_length); int dynamorio_is_new_path(void * instrumentation_state); int dynamorio_get_module_info(void * instrumentation_state, int index, int * is_new, char ** module_name, char ** info, int * size); instrumentation_edges_t * dynamorio_get_edges(void * instrumentation_state, int index); int dynamorio_is_process_done(void * instrumentation_state); int dynamorio_get_fuzz_result(void * instrumentation_state); int dynamorio_help(char ** help_str); void dynamorio_print_state(void * instrumentation_state); #define FOREACH_MODULE(x, state) for(x = state->modules; x; x = x->next) struct target_module { int index; HANDLE shm_handle; /* Handle of the SHM region */ u8 * trace_bits; /* SHM with instrumentation bitmap */ u8 virgin_bits[MAP_SIZE]; /* Regions yet untouched by fuzzing */ u32 last_shm_hash; /* The most recent hash of the SHM region */ int last_path_was_new; u8 * ignore_bytes; instrumentation_edges_t * edges_memory; /* SHM with list of edges */ struct target_module * next; }; typedef struct target_module target_module_t; struct dynamorio_state { //Options char * default_dynamorio_dir; char * dynamorio_dir; char * default_winafl_dir; char * winafl_dir; char * target_path; char * dump_map_dir; char * ignore_bytes_dir; char * ignore_bytes_file; int per_module_coverage; int fuzz_iterations_max; char * client_params; int timeout; int edges; HANDLE child_handle; /* Handle to the child process */ s32 child_pid; /* PID of the fuzzed program */ HANDLE pipe_handle; /* Handle of the comms named pipe */ HANDLE shm_handle; /* Handle of the SHM region */ char ** module_names; size_t num_modules; target_module_t * modules; char * pidfile; /* pid file name */ char * pipe_name; /* name of the pipe to communicate with Dynamorio */ int fuzz_iterations_current; u8 virgin_bits[MAP_SIZE]; /* Regions yet untouched by fuzzing */ char *fuzzer_id; /* The fuzzer ID or a randomized seed allowing multiple instances */ u8 * trace_bits; /* SHM with instrumentation bitmap */ u8 * ignore_bytes; u32 last_shm_hash; /* The most recent hash of the SHM region */ int last_path_was_new; int last_process_status; int analyzed_last_round; int enable_called; instrumentation_edges_t * edges_memory; /* SHM with list of edges */ }; typedef struct dynamorio_state dynamorio_state_t; ================================================ FILE: instrumentation/forkserver.c ================================================ #define _GNU_SOURCE #include #include #include #include #include #include #include "forkserver_internal.h" static void forkserver_persistence_init(void); ////////////////////////////////////////////////////////////// //Fork Server //////////////////////////////////////////////// ////////////////////////////////////////////////////////////// //The fork server design was inspired by the LLVM mode of AFL. It however, //has been modified significantly to suit our purposes. The LLVM mode of //AFL is available at: //https://github.com/mirrorer/afl/blob/master/llvm_mode/afl-llvm-rt.o.c#L95 void __forkserver_init(void) { int response = 0x41414141; char command; int child_pid = -1; int target_pipe[2]; // Phone home and tell the parent that we're OK. If parent isn't there, // assume we're not running in forkserver mode and just execute program. if(write(FORKSRV_TO_FUZZER, &response, sizeof(int)) != sizeof(int)) return; if(getenv(PERSIST_MAX_VAR)) { forkserver_persistence_init(); return; } if(pipe(target_pipe)) _exit(1); while (1) { // Wait for parent by reading from the pipe. Exit if read fails. if(read(FUZZER_TO_FORKSRV, &command, sizeof(command)) != sizeof(command)) _exit(1); switch(command) { case EXIT: _exit(0); break; case FORK: case FORK_RUN: child_pid = fork(); if(child_pid < 0) _exit(1); //In child process: close fds, resume execution. if(!child_pid) { close(FUZZER_TO_FORKSRV); close(FORKSRV_TO_FUZZER); close(target_pipe[1]); //If we're just forking, wait for the forkserver to tell us to go if(command == FORK && read(target_pipe[0], &response, sizeof(int)) != sizeof(int)) _exit(1); close(target_pipe[0]); return; } response = child_pid; break; case RUN: //Make sure the target process has started if(child_pid == -1) { response = FORKSERVER_ERROR; break; } //Tell the target process to go response = 0; if(write(target_pipe[1], &response, sizeof(int)) != sizeof(int)) _exit(1); break; case GET_STATUS: if(waitpid(child_pid, &response, 0) < 0) _exit(1); break; } if(write(FORKSRV_TO_FUZZER, &response, sizeof(int)) != sizeof(int)) _exit(1); } } ////////////////////////////////////////////////////////////// //Persistence Mode /////////////////////////////////////////// ////////////////////////////////////////////////////////////// static int max_cnt = 0; static int cycle_cnt = 0; static int forkserver_cycle_cnt = 0; static void forkserver_persistence_init(void) { int response = 0x41414141; char command; int child_pid = -1; //Get the maximum number of persistent executions max_cnt = atoi(getenv(PERSIST_MAX_VAR)); if(!max_cnt) _exit(1); while (1) { // Wait for parent by reading from the pipe. Exit if read fails. if(read(FUZZER_TO_FORKSRV, &command, sizeof(command)) != sizeof(command)) _exit(1); switch(command) { case EXIT: if(child_pid != -1) kill(child_pid, SIGKILL); _exit(0); break; case FORK: case FORK_RUN: if(child_pid == -1 || forkserver_cycle_cnt == max_cnt) { if(child_pid != -1 && forkserver_cycle_cnt == max_cnt) { //if we've hit the maximum cycle count, continue the child, so it may exit //and clean up. We do this now, rather than in GET_STATUS commands, to ensure that //the exit portion of the target process does not get traced. kill(child_pid, SIGCONT); if(waitpid(child_pid, &response, 0) < 0) _exit(1); forkserver_cycle_cnt = 0; } child_pid = fork(); if(child_pid < 0) _exit(1); //In child process: close fds, resume execution. if(!child_pid) { close(FUZZER_TO_FORKSRV); close(FORKSRV_TO_FUZZER); return; } if(waitpid(child_pid, &response, WUNTRACED) < 0 || !WIFSTOPPED(response)) { //Failed to start the child, kill it and report failure kill(child_pid, SIGKILL); child_pid = -1; } } response = child_pid; if(command != FORK_RUN && response != -1) //If the command is FORK_RUN, fall into the RUN case break; case RUN: //Tell the target process to go if(child_pid == -1) { response = FORKSERVER_ERROR; break; } kill(child_pid, SIGCONT); forkserver_cycle_cnt++; if(command != FORK_RUN) //Don't overwrite the FORK case's response response = 0; break; case GET_STATUS: if(waitpid(child_pid, &response, WUNTRACED) < 0) _exit(1); if(WIFEXITED(response) || WIFSIGNALED(response)) { //The process ended, either child_pid = -1; //by hitting the max_cnt count and exiting, or by crashing forkserver_cycle_cnt = 0; } else if(WIFSTOPPED(response)) //If we hit a SIGSTOP, then the child didn't response = 0; //die, just return 0 to the parent break; } if(write(FORKSRV_TO_FUZZER, &response, sizeof(response)) != sizeof(response)) _exit(1); } } int __killerbeez_loop(void) { raise(SIGSTOP); return cycle_cnt++ != max_cnt; } ================================================ FILE: instrumentation/forkserver.h ================================================ #pragma once //Macros and fucntion definitions for use when instrumenting target programs int __killerbeez_loop(void); #define KILLERBEEZ_LOOP() __killerbeez_loop() void __forkserver_init(void); #define KILLERBEEZ_INIT() __forkserver_init() ================================================ FILE: instrumentation/forkserver_config.h ================================================ #pragma once //This header file controls the function that the forkserver hooks in order to //startup in the target process. By modifying these marcos, the forkserver can //be made to start much later in the target process, allowing for reduced //startup code of each new process. //Whether to disable function hooking. This should only be set if the target //program has been modified to explicitly call KILLERBEEZ_INIT(). #define DISABLE_HOOKING 0 //Whether we should hook __libc_start_main or not. This is a default option //that should work for most Linux programs. #define USE_LIBC_START_MAIN 1 //If we're not hooking __libc_start_main, this defines the function to hook #define CUSTOM_FUNCTION_NAME custom_function_to_hook //If we're not hooking __libc_start_main, this defines whether we should start //the forkserver before (1) or after (0) the function that we are hooking #define RUN_BEFORE_CUSTOM_FUNCTION 0 ================================================ FILE: instrumentation/forkserver_hooking.c ================================================ #define _GNU_SOURCE #include #include "forkserver.h" #include "forkserver_config.h" #if !DISABLE_HOOKING ////////////////////////////////////////////////////////////// //Types, Function Prototypes, and Globals //////////////////// ////////////////////////////////////////////////////////////// //In order to allow for the hooking of functions, regardless of their arguments, we define the hook //function as having a ton of void * arguments. This allows us to pass these arguments on (regardless //of whether they actually exist or not). typedef void * (*orig_function_type)(void *, void *, void *, void *, void *, void *, void *, void *); //Whether or not we've already started the forkserver static int init_done = 0; //A pointer to the original function that we hooked static orig_function_type orig_func = 0; ////////////////////////////////////////////////////////////// //Function Hooking /////////////////////////////////////////// ////////////////////////////////////////////////////////////// #ifdef __APPLE__ //Define a fake prototype here, otherwise it will complain when it's used. void CUSTOM_FUNCTION_NAME(void); #define FUNCTION CUSTOM_FUNCTION_NAME #define NEW_FUNCTION new_##FUNCTION #define DYLD_INTERPOSE(_replacment,_replacee) \ __attribute__((used)) static struct{ const void* replacment; const void* replacee; } _interpose_##_replacee \ __attribute__ ((section ("__DATA,__interpose"))) = { (const void*)(unsigned long)&_replacment, (const void*)(unsigned long)&_replacee }; #else //LINUX #if USE_LIBC_START_MAIN #define FUNCTION __libc_start_main #else #define FUNCTION CUSTOM_FUNCTION_NAME #endif #define NEW_FUNCTION FUNCTION #endif //Convert FUNCTION into "FUNCTION" so we can use it to call dlsym #define STRINGIFY_INNER(s) (#s) #define STRINGIFY(name) STRINGIFY_INNER(name) #define FUNCTION_NAME STRINGIFY(FUNCTION) #if USE_LIBC_START_MAIN static orig_function_type orig_main = 0; void * fake_main(void * a0, void * a1, void * a2, void * a3, void * a4, void * a5, void * a6, void * a7) { __forkserver_init(); return orig_main(a0, a1, a2, a3, a4, a5, a6, a7); } #endif void * NEW_FUNCTION(void * a0, void * a1, void * a2, void * a3, void * a4, void * a5, void * a6, void * a7) { void * ret; if(orig_func == 0) orig_func = (orig_function_type)dlsym(RTLD_NEXT, FUNCTION_NAME); #if USE_LIBC_START_MAIN //we're hooking __libc_start_main orig_main = a0; ret = orig_func((void *)fake_main, a1, a2, a3, a4, a5, a6, a7); #else //We're hooking a custom function #if RUN_BEFORE_CUSTOM_FUNCTION //If we want to run before the hooked function if(!init_done) { __forkserver_init(); init_done = 1; } #endif ret = orig_func(a0, a1, a2, a3, a4, a5, a6, a7); #if !RUN_BEFORE_CUSTOM_FUNCTION //If we want to run after the hooked function if(!init_done) { __forkserver_init(); init_done = 1; } #endif #endif return ret; } #ifdef __APPLE__ DYLD_INTERPOSE(NEW_FUNCTION, FUNCTION) #endif #endif //!DISABLE_HOOKING ================================================ FILE: instrumentation/forkserver_internal.h ================================================ #pragma once #define PERSIST_MAX_VAR "PERSISTENCE_MAX_CNT" #define DEFER_ENV_VAR "DEFER_ENV_VAR" //Designated file descriptors for read/write to the forkserver //and target process #define FUZZER_TO_FORKSRV 198 #define FORKSRV_TO_FUZZER 199 #define QEMU_TSL_FD 200 #define MAX_FORKSRV_FD 201 //Commands that the fuzzer can send to the forkserver #define EXIT 0 #define FORK 1 #define RUN 2 #define FORK_RUN 3 #define GET_STATUS 4 //Possible response codes returned from the forkserver #define FORKSERVER_ERROR -1 #define FORKSERVER_NO_RESULTS_READY -2 struct forkserver { int fuzzer_to_forksrv; int forksrv_to_fuzzer; int target_stdin; int sent_get_status; int last_status; int pid; }; typedef struct forkserver forkserver_t; //These functions control all interactions with the forkserver, sending the //commands listed above void fork_server_init(forkserver_t * fs, char * target_path, char ** argv, int use_forkserver_library, int persistence_max_cnt, int needs_stdin_fd); int fork_server_exit(forkserver_t * fs); int fork_server_fork(forkserver_t * fs); int fork_server_fork_run(forkserver_t * fs); int fork_server_run(forkserver_t * fs); int fork_server_get_status(forkserver_t * fs, int wait); int fork_server_get_pending_status(forkserver_t * fs, int wait); ================================================ FILE: instrumentation/instrumentation.c ================================================ #ifndef _WIN32 //Headers necessary for the forkserver #include #include #include #include #include #include #include #include #include #include #include #include #endif #include "instrumentation.h" #include #ifndef _WIN32 //The forkserver is not supported on Windows #include "forkserver_internal.h" #define STRINGIFY_INTERNAL(x) #x #define STRINGIFY(x) STRINGIFY_INTERNAL(x) #define MSAN_ERROR 86 //The amount of time to wait before considering the fork server initialization failed #define FORK_SERVER_STARTUP_TIME 10 //Save a fd to the /dev/null, so we don't have to keep opening/closing it static int dev_null_fd = -1; //TODO implement memory limiting static int mem_limit = 0; //TODO asan detection static int uses_asan = 0; /** * This function locates the fork server library * @param buffer - A buffer to return the path to the fork server library * @param buffer_len - The length of the buffer parameter */ static void find_fork_server_library(char * buffer, size_t buffer_len) { #ifdef __APPLE__ char * library_name = "libforkserver.dylib"; #else char * library_name = "libforkserver.so"; #endif char * directory = filename_relative_to_binary_dir("."); snprintf(buffer, buffer_len, "%s/%s", directory, library_name); if (!file_exists(buffer)) FATAL_MSG("Failed to find the %s in %s.", library_name, directory); } ////////////////////////////////////////////////////////////// // Fork Server Initialization //////////////////////////////// ////////////////////////////////////////////////////////////// /** * * @param needs_stdin_fd - whether we should open a library for the stdin of * the newly created process * @param target_path - The path to the program to start * @param argv - Arguments to pass to the program * @param fs - A forkserver_t structure to hold the fork server state, or NULL * if not using a fork server * @param use_forkserver_library - Whether or not to use * LD_PRELOAD/DYLD_INSERT_LIBRARIES to inject * the fork server * @param st_pipe - pointer to an array of two status pipes for the fork server * @param ctl_pipe - pointer to an array of two control pipes for the fork server * @param persistence_max_cnt - if fork server is in use, and perssistent mode * is in use, this is the number of inputs which * will be handled by each execution of the target * @return the process ID of spawned process */ static pid_t run_target(int needs_stdin_fd, char *target_path, char **argv, forkserver_t * fs, int use_forkserver_library, int *st_pipe, int *ctl_pipe, int persistence_max_cnt) { /* This function is based on the AFL run_target function present in afl-fuzz.c, available at this URL: https://github.com/mirrorer/afl/blob/master/afl-fuzz.c#L1968. AFL's license is as shown below: american fuzzy lop - fuzzer code -------------------------------- Written and maintained by Michal Zalewski Forkserver design by Jann Horn Copyright 2013, 2014, 2015, 2016, 2017 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 */ int child_pid; DEBUG_MSG("Forking child process for target executable..."); child_pid = fork(); if(child_pid < 0) FATAL_MSG("fork() failed"); DEBUG_MSG("Fork succeeded, child pid: %d", child_pid); if(!child_pid) { struct rlimit r; // Umpf. On OpenBSD, the default fd limit for root users is set to // soft 128. Let's try to fix that... if (!getrlimit(RLIMIT_NOFILE, &r) && r.rlim_cur < MAX_FORKSRV_FD) { r.rlim_cur = MAX_FORKSRV_FD; setrlimit(RLIMIT_NOFILE, &r); // Ignore errors } if (mem_limit) { r.rlim_max = r.rlim_cur = ((rlim_t)mem_limit) << 20; #ifdef RLIMIT_AS setrlimit(RLIMIT_AS, &r); // Ignore errors #else // This takes care of OpenBSD, which doesn't have RLIMIT_AS, but // according to reliable sources, RLIMIT_DATA covers anonymous // maps - so we should be getting good protection against OOM bugs. setrlimit(RLIMIT_DATA, &r); // Ignore errors #endif // ^RLIMIT_AS DEBUG_MSG("Set memory limits"); } // Dumping cores is slow and can lead to anomalies if SIGKILL is delivered // before the dump is complete. r.rlim_max = r.rlim_cur = 0; setrlimit(RLIMIT_CORE, &r); // Ignore errors /* Isolate the process and configure standard descriptors. If out_file is specified, stdin is /dev/null; otherwise, out_fd is cloned instead. */ setsid(); if(dev_null_fd < 0) dev_null_fd = open("/dev/null", O_RDWR); if(needs_stdin_fd) { dup2(fs->target_stdin, 0); close(fs->target_stdin); } else { dup2(dev_null_fd, 0); } DEBUG_MSG("About to send stdout to /dev/null"); if(dup2(dev_null_fd, 1) < 0) WARNING_MSG("Sending stdout to /dev/null failed! errno=%d", errno); DEBUG_MSG("About to send stderr to /dev/null"); if(dup2(dev_null_fd, 2) < 0) WARNING_MSG("Sending stderr to /dev/null failed! errno=%d", errno); DEBUG_MSG("Child is still alive and well!"); // The forkserver requires setting up some control pipes for interaction // between the fuzzer and forkserver (which lives in the target process) if(fs) { DEBUG_MSG("Setting up pipes for forkserver..."); // Set up control and status pipes, close the unneeded original fds. if(dup2(ctl_pipe[0], FUZZER_TO_FORKSRV) < 0) FATAL_MSG("dup2() failed"); if(dup2(st_pipe[1], FORKSRV_TO_FUZZER) < 0) FATAL_MSG("dup2() failed"); DEBUG_MSG("Forkserver pipes set up"); close(ctl_pipe[0]); close(ctl_pipe[1]); close(st_pipe[0]); close(st_pipe[1]); DEBUG_MSG("Extra pipes closed"); } /* On Linux, would be faster to use O_CLOEXEC. Maybe TODO. */ close(dev_null_fd); DEBUG_MSG("Setting up pipes is complete..."); // If we are using a forksrv, we might need to inject it dynamically if it // is not already in the executable. We also want to make sure we set the // environment variable which is used for persistence mode, and finally we // add the optimization to load all the libraries once so this is only // done on the execv, as opposed to each time the target process calls fork if(fs) { // Preload the forkserver library if(use_forkserver_library) { char fork_server_library_path[MAX_PATH]; find_fork_server_library(fork_server_library_path, sizeof(fork_server_library_path)); #ifdef __APPLE__ setenv("DYLD_INSERT_LIBRARIES", fork_server_library_path, 1); #else setenv("LD_PRELOAD", fork_server_library_path, 1); #endif } if(persistence_max_cnt) { char buffer[16]; snprintf(buffer, sizeof(buffer),"%d",persistence_max_cnt); setenv(PERSIST_MAX_VAR, buffer, 1); } // This should improve performance a bit, since it stops the linker from // doing extra work post-fork(). if (!getenv("LD_BIND_LAZY")) setenv("LD_BIND_NOW", "1", 0); } // Set sane defaults for ASAN if nothing else specified. setenv("ASAN_OPTIONS", "abort_on_error=1:" "detect_leaks=0:" "symbolize=0:" "allocator_may_return_null=1", 0); // MSAN uses slightly different arguments when using the forkserver if(fs) { // MSAN is tricky, because it doesn't support abort_on_error=1 at this // point. So, we do this in a very hacky way. setenv("MSAN_OPTIONS", "exit_code=" STRINGIFY(MSAN_ERROR) ":" "symbolize=0:" "abort_on_error=1:" "allocator_may_return_null=1:" "msan_track_origins=0", 0); } else { setenv("MSAN_OPTIONS", "exit_code=" STRINGIFY(MSAN_ERROR) ":" "symbolize=0:" "msan_track_origins=0", 0); } DEBUG_MSG("Setup done, about to execv: %s", target_path); execv(target_path, argv); // The only time execv() returns is if it failed FATAL_MSG("Target executable failed to execute (execv())"); exit(1); } return child_pid; } /** * This function starts a program with the fork server embedded in it * @param fs - A forkserver_t structure to hold the fork server state * @param target_path - The path to the program to start * @param argv - Arguments to pass to the program * @param use_forkserver_library - Whether or not to use LD_PRELOAD/DYLD_INSERT_LIBRARIES to inject the fork server * library or not * @param persistence_max_cnt - the maximum number of fuzz iterations a persistence mode process should run * @param needs_stdin_fd - whether we should open a library for the stdin of the newly created process */ void fork_server_init(forkserver_t * fs, char * target_path, char ** argv, int use_forkserver_library, int persistence_max_cnt, int needs_stdin_fd) { static struct itimerval it; int st_pipe[2], ctl_pipe[2]; int err, status, forksrv_pid; int rlen = -1, timed_out = 1; char stdin_filename[100]; time_t start_time; if(dev_null_fd < 0) { dev_null_fd = open("/dev/null", O_RDWR); if (dev_null_fd < 0) FATAL_MSG("Unable to open /dev/null"); } fs->sent_get_status = 0; fs->last_status = -1; if(needs_stdin_fd) { strncpy(stdin_filename, "/tmp/fuzzfileXXXXXX", sizeof(stdin_filename)); fs->target_stdin = mkstemp(stdin_filename); if(fs->target_stdin < 0) FATAL_MSG("Couldn't make temp file\n"); } else fs->target_stdin = -1; DEBUG_MSG("Spinning up the fork server..."); /* The code in the rest of this function is based on the AFL startup fork server present in afl-fuzz.c, available at this URL: https://github.com/mirrorer/afl/blob/master/afl-fuzz.c#L1968. AFL's license is as shown below: american fuzzy lop - fuzzer code -------------------------------- Written and maintained by Michal Zalewski Forkserver design by Jann Horn Copyright 2013, 2014, 2015, 2016, 2017 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 */ if(pipe(st_pipe) || pipe(ctl_pipe)) FATAL_MSG("pipe() failed"); forksrv_pid = run_target(needs_stdin_fd, target_path, argv, fs, use_forkserver_library, st_pipe, ctl_pipe, persistence_max_cnt); // Close the unneeded endpoints. close(ctl_pipe[0]); close(st_pipe[1]); fs->fuzzer_to_forksrv = ctl_pipe[1]; fs->forksrv_to_fuzzer = st_pipe[0]; fs->pid = forksrv_pid; // Wait for the fork server to come up, but don't wait too long. // Note, we do this looping, rather than blocking on read and using // a SIGALRM to breakout on time out, because we want to avoid globals // so the code can be used without worrying about any existing signal handlers start_time = time(NULL); while(time(NULL) - start_time < FORK_SERVER_STARTUP_TIME) { err = ioctl(fs->forksrv_to_fuzzer, FIONREAD, &rlen); if(!err && rlen == sizeof(int)) { rlen = read(fs->forksrv_to_fuzzer, &status, sizeof(status)); timed_out = 0; break; } usleep(5); } // If we have a four-byte "hello" message from the server, we're all set. // Otherwise, try to figure out what went wrong. if (rlen == 4) { DEBUG_MSG("All right - fork server (PID %d) is up.", forksrv_pid); return; } kill(forksrv_pid, SIGKILL); if(timed_out) FATAL_MSG("Timeout while initializing fork server\n"); if (waitpid(forksrv_pid, &status, 0) <= 0) FATAL_MSG("waitpid() failed"); if (WIFSIGNALED(status)) { if (mem_limit && mem_limit < 500 && uses_asan) { ERROR_MSG( "Whoops, the target binary crashed suddenly, before receiving any input\n" " from the fuzzer! Since it seems to be built with ASAN and you have a\n" " restrictive memory limit configured, this is expected"); } else if (!mem_limit) { ERROR_MSG( "Whoops, the target binary crashed suddenly, before receiving any input\n" " from the fuzzer! There are several probable explanations:\n\n" " - The binary is just buggy and explodes entirely on its own. If so, you\n" " need to fix the underlying problem or find a better replacement.\n\n" #ifdef __APPLE__ " - On MacOS X, the semantics of fork() syscalls are non-standard and may\n" " break afl-fuzz performance optimizations when running platform-specific\n" " targets. To fix this, try running without the forkserver.\n\n" #endif // __APPLE__ " - Less likely, there is a horrible bug in the fuzzer."); } else { ERROR_MSG( "Whoops, the target binary crashed suddenly, before receiving any input\n" " from the fuzzer! There are several probable explanations:\n\n" " - The current memory limit (%s) is too restrictive, causing the\n" " target to hit an OOM condition in the dynamic linker. Try bumping up\n" " the limit with the -m setting in the command line. A simple way confirm\n" " this diagnosis would be:\n\n" #ifdef RLIMIT_AS " ( ulimit -Sv $[%llu << 10]; /path/to/fuzzed_app )\n\n" #else " ( ulimit -Sd $[%llu << 10]; /path/to/fuzzed_app )\n\n" #endif // ^RLIMIT_AS " Tip: you can use http://jwilk.net/software/recidivm to quickly\n" " estimate the required amount of virtual memory for the binary.\n\n" " - The binary is just buggy and explodes entirely on its own. If so, you\n" " need to fix the underlying problem or find a better replacement.\n\n" #ifdef __APPLE__ " - On MacOS X, the semantics of fork() syscalls are non-standard and may\n" " break afl-fuzz performance optimizations when running platform-specific\n" " targets. To fix this, try running without the forkserver.\n\n" #endif // __APPLE__ " - Less likely, there is a horrible bug in the fuzzer.", mem_limit << 20, mem_limit - 1); } FATAL_MSG("Fork server crashed with signal %d", WTERMSIG(status)); } if (mem_limit && mem_limit < 500 && uses_asan) { ERROR_MSG( "Hmm, looks like the target binary terminated before we could complete a\n" " handshake with the injected code. Since it seems to be built with ASAN and\n" " you have a restrictive memory limit configured, this is expected."); } else if (!mem_limit) { ERROR_MSG( "Hmm, looks like the target binary terminated before we could complete a\n" " handshake with the injected code. Perhaps there is a horrible bug in the\n" " fuzzer."); } else { ERROR_MSG( "Hmm, looks like the target binary terminated before we could complete a\n" " handshake with the injected code. There are a few probable explanations:\n\n" " - The current memory limit (%s) is too restrictive, causing an OOM\n" " fault in the dynamic linker. This can be fixed with the -m option. A\n" " simple way to confirm the diagnosis may be:\n\n" #ifdef RLIMIT_AS " ( ulimit -Sv $[%llu << 10]; /path/to/fuzzed_app )\n\n" #else " ( ulimit -Sd $[%llu << 10]; /path/to/fuzzed_app )\n\n" #endif // ^RLIMIT_AS " Tip: you can use http://jwilk.net/software/recidivm to quickly\n" " estimate the required amount of virtual memory for the binary.\n\n" " - Less likely, there is a horrible bug in the fuzzer. If other options\n" " fail.", mem_limit << 20, mem_limit - 1); } FATAL_MSG("Fork server handshake failed"); } ////////////////////////////////////////////////////////////// // Fork Server Communication Functions /////////////////////// ////////////////////////////////////////////////////////////// /** * This function sends a command to the fork server * @param fs - A forkserver_t structure to hold the fork server state * @param command - the command to send * @return - 0 on success, FORKSERVER_ERROR on failure */ static int send_command(forkserver_t * fs, char command) { if (write(fs->fuzzer_to_forksrv, &command, sizeof(command)) != sizeof(command)) return FORKSERVER_ERROR; return 0; } /** * This function reads a response from the fork server * @param fs - A forkserver_t structure to hold the fork server state * @return - the response value on success, FORKSERVER_ERROR on failure */ static int read_response(forkserver_t * fs) { int response; if (read(fs->forksrv_to_fuzzer, &response, sizeof(response)) != sizeof(response)) return FORKSERVER_ERROR; return response; } /** * This function tells the forkserver to exit, and closes any open file descriptors to it * @param fs - A forkserver_t structure to hold the fork server state * @return - the 0 on success, FORKSERVER_ERROR on failure */ int fork_server_exit(forkserver_t * fs) { int ret = send_command(fs, EXIT); if(!ret) { close(fs->fuzzer_to_forksrv); close(fs->forksrv_to_fuzzer); close(fs->target_stdin); } return ret; } /** * This function tells the forkserver to fork or fork and run, and returns the newly created process's pid * @param fs - A forkserver_t structure to hold the fork server state * @param command - Either the FORK or FORK_RUN command * @return - the newly created process's pid on success, FORKSERVER_ERROR on failure */ static int send_fork(forkserver_t * fs, char command) { if(send_command(fs, command)) return FORKSERVER_ERROR; fs->sent_get_status = 0; return read_response(fs); //Wait for the target pid } /** * This function tells the forkserver to fork, and returns the newly created process's pid * @param fs - A forkserver_t structure to hold the fork server state * @return - the newly created process's pid on success, FORKSERVER_ERROR on failure */ int fork_server_fork(forkserver_t * fs) { return send_fork(fs, FORK); } /** * This function tells the forkserver to fork and run, and returns the newly created process's pid * @param fs - A forkserver_t structure to hold the fork server state * @return - the newly created process's pid on success, FORKSERVER_ERROR on failure */ int fork_server_fork_run(forkserver_t * fs) { return send_fork(fs, FORK_RUN); } /** * This function tells the forkserver to run * @param fs - A forkserver_t structure to hold the fork server state * @return - 0 on success, FORKSERVER_ERROR on failure */ int fork_server_run(forkserver_t * fs) { if(send_command(fs, RUN)) return FORKSERVER_ERROR; if(read_response(fs) != 0) return FORKSERVER_ERROR; return 0; } /** * This function gets the response of prevously sent GET_STATUS command from the fork server (i.e. the process's exit status) * @param fs - A forkserver_t structure to hold the fork server state * @param wait - whether this function should block or not * @return - the finished process's exit status (see waitpid) on success, FORKSERVER_ERROR on failure, or * FORKSERVER_NO_RESULTS_READY when not blocking and the forkserver has not responded yet */ int fork_server_get_pending_status(forkserver_t * fs, int wait) { unsigned long bytes_available = 0; int err; if(fs->sent_get_status && fs->last_status != -1) return fs->last_status; if(wait) return read_response(fs); //Wait for the target's exit status else { err = ioctl(fs->forksrv_to_fuzzer, FIONREAD, &bytes_available); if(!err && bytes_available == sizeof(int)) { fs->last_status = read_response(fs); //Wait for the target's exit status return fs->last_status; } } return FORKSERVER_NO_RESULTS_READY; } /** * This function sends a GET_STATUS command to the fork server and gets the response (i.e. the process's exit status) * @param fs - A forkserver_t structure to hold the fork server state * @param wait - whether this function should block or not * @return - the finished process's exit status (see waitpid) on success, FORKSERVER_ERROR on failure, or * FORKSERVER_NO_RESULTS_READY when not blocking and the forkserver has not responded yet */ int fork_server_get_status(forkserver_t * fs, int wait) { if(!fs->sent_get_status) { if(send_command(fs, GET_STATUS)) return FORKSERVER_ERROR; fs->sent_get_status = 1; fs->last_status = -1; } return fork_server_get_pending_status(fs, wait); } #endif //!_WIN32 ================================================ FILE: instrumentation/instrumentation.h ================================================ #pragma once #include #include #ifdef _WIN32 #include // HANDLE #endif #ifdef INSTRUMENTATION_EXPORTS #define INSTRUMENTATION_API __declspec(dllexport) #elif defined(INSTRUMENTATION_NO_IMPORT) #define INSTRUMENTATION_API #else #define INSTRUMENTATION_API __declspec(dllimport) #endif struct instrumentation_edge { #ifdef _M_X64 uint64_t from; uint64_t to; #else uint32_t from; uint32_t to; #endif }; typedef struct instrumentation_edge instrumentation_edge_t; struct instrumentation_edges { #ifdef _M_X64 uint64_t num_edges; #else uint32_t num_edges; #endif instrumentation_edge_t edges[1]; }; typedef struct instrumentation_edges instrumentation_edges_t; struct instrumentation { void *(*create)(char * options, char * state); void(*cleanup)(void * instrumentation_state); void *(*merge)(void * instrumentation_state, void * other_instrumentation_state); char * (*get_state)(void * instrumentation_state); void(*free_state)(char * state); int(*set_state)(void * instrumentation_state, char * state); #ifdef _WIN32 int(*enable)(void * instrumentation_state, HANDLE * process, char * cmd_line, char * input, size_t input_length); #else int(*enable)(void * instrumentation_state, pid_t * process, char * cmd_line, char * input, size_t input_length); #endif int(*is_new_path)(void * instrumentation_state); int(*get_fuzz_result)(void * instrumentation_state); //Optional int (*get_module_info)(void * instrumentation_state, int index, int * is_new, char ** module_name, char ** info, int * size); instrumentation_edges_t * (*get_edges)(void * instrumentation_state, int index); int(*is_process_done)(void * instrumentation_state); }; typedef struct instrumentation instrumentation_t; ================================================ FILE: instrumentation/instrumentation_factory.c ================================================ #include "instrumentation_factory.h" #ifdef _WIN32 #include "debug_instrumentation.h" #include "dynamorio_instrumentation.h" #else #include "return_code_instrumentation.h" #include "afl_instrumentation.h" #if !__APPLE__ // Linux #include "linux_ipt_instrumentation.h" #endif #endif #include #include #include #define FACTORY_ERROR() { free(ret); return NULL; } /** * This function obtains a instrumentation_t object by calling the instrumentation specified by instrumentation_type's create method. * @param instrumentation_type - the name of the instrumentation that should be created. Currently known instrumentation types are: dynamorio. * @param options - a JSON string that contains the instrumentation specific string of options * @return - a instrumentation_t object of the specified type on success or NULL on failure */ instrumentation_t * instrumentation_factory(char * instrumentation_type) { instrumentation_t * ret = (instrumentation_t *)malloc(sizeof(instrumentation_t)); memset(ret, 0, sizeof(instrumentation_t)); #ifdef _WIN32 if (!strcmp(instrumentation_type, "debug")) { ret->create = debug_create; ret->cleanup = debug_cleanup; ret->merge = debug_merge; ret->get_state = debug_get_state; ret->free_state = debug_free_state; ret->set_state = debug_set_state; ret->enable = debug_enable; ret->is_new_path = debug_is_new_path; ret->get_fuzz_result = debug_get_fuzz_result; ret->is_process_done = debug_is_process_done; } else if (!strcmp(instrumentation_type, "dynamorio")) { ret->create = dynamorio_create; ret->cleanup = dynamorio_cleanup; ret->merge = dynamorio_merge; ret->get_state = dynamorio_get_state; ret->free_state = dynamorio_free_state; ret->set_state = dynamorio_set_state; ret->enable = dynamorio_enable; ret->is_new_path = dynamorio_is_new_path; ret->get_module_info = dynamorio_get_module_info; ret->get_edges = dynamorio_get_edges; ret->is_process_done = dynamorio_is_process_done; ret->get_fuzz_result = dynamorio_get_fuzz_result; } #else if (!strcmp(instrumentation_type, "return_code")) { ret->create = return_code_create; ret->cleanup = return_code_cleanup; ret->merge = return_code_merge; ret->get_state = return_code_get_state; ret->free_state = return_code_free_state; ret->set_state = return_code_set_state; ret->enable = return_code_enable; ret->is_new_path = return_code_is_new_path; ret->get_fuzz_result = return_code_get_fuzz_result; ret->is_process_done = return_code_is_process_done; } else if (!strcmp(instrumentation_type, "afl")) { ret->create = afl_create; ret->cleanup = afl_cleanup; ret->merge = afl_merge; ret->get_state = afl_get_state; ret->free_state = afl_free_state; ret->set_state = afl_set_state; ret->enable = afl_enable; ret->is_new_path = afl_is_new_path; ret->get_fuzz_result = afl_get_fuzz_result; ret->is_process_done = afl_is_process_done; } #if !__APPLE__ // Linux else if (!strcmp(instrumentation_type, "ipt")) { ret->create = linux_ipt_create; ret->cleanup = linux_ipt_cleanup; ret->merge = linux_ipt_merge; ret->get_state = linux_ipt_get_state; ret->free_state = linux_ipt_free_state; ret->set_state = linux_ipt_set_state; ret->enable = linux_ipt_enable; ret->is_new_path = linux_ipt_is_new_path; ret->get_fuzz_result = linux_ipt_get_fuzz_result; ret->is_process_done = linux_ipt_is_process_done; } #endif #endif else FACTORY_ERROR(); return ret; } #define APPEND_HELP(text, new_text, func) \ if(!func(&new_text)) { \ text = (char *)realloc(text, strlen(text) + strlen(new_text) + 1); \ strcat(text, new_text); \ free(new_text); \ } /** * This function returns help text for all available instrumentations. This help text will describe the instrumentations and any options * that can be passed to their create functions. * @return - a newly allocated string containing the help text. */ char * instrumentation_help(void) { char * text, *new_text; text = strdup("Instrumentation Options:\n\n"); #ifdef _WIN32 APPEND_HELP(text, new_text, debug_help); APPEND_HELP(text, new_text, dynamorio_help); #else APPEND_HELP(text, new_text, return_code_help); APPEND_HELP(text, new_text, afl_help); #if !__APPLE__ // Linux APPEND_HELP(text, new_text, linux_ipt_help); #endif #endif return text; } ================================================ FILE: instrumentation/instrumentation_factory.h ================================================ #pragma once #include "instrumentation.h" INSTRUMENTATION_API instrumentation_t * instrumentation_factory(char * instrumentation_type); INSTRUMENTATION_API char * instrumentation_help(void); ================================================ FILE: instrumentation/linux_ipt_instrumentation.c ================================================ // Linux-only Intel PT instrumentation. #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include "instrumentation.h" #include "linux_ipt_instrumentation.h" #include "forkserver_internal.h" #include "uthash.h" #include "xxhash.h" #include #include //////////////////////////////////////////////////////////////// // IPT Packet Analyzer ///////////////////////////////////////// //////////////////////////////////////////////////////////////// //Uncomment this #define to make the IPT parser print each packet and parser details //#define IPT_DEBUG #ifdef IPT_DEBUG //Prints each IPT packet and the packet bytes #define IPT_DEBUG_MSG_PACKET(...) DEBUG_MSG(__VA_ARGS__) //Prints status messages about the parser #define IPT_DEBUG_MSG(...) DEBUG_MSG(__VA_ARGS__) #else #define IPT_DEBUG_MSG_PACKET(...) #define IPT_DEBUG_MSG(...) #endif #define BYTES_LEFT(num) ((end - p) >= (num)) #define BIT_TEST(num, bit) ((num) & (1 << (bit))) /** * This function sign extends a number * @param num - the number to sign extend * @param sign_bit - which bit in num is the value's current sign bit * @return - the sign extended number */ static uint64_t sign_extend(uint64_t num, uint8_t sign_bit) { uint64_t mask = ~0ULL << sign_bit; return num & (1ULL << (sign_bit - 1)) ? num | mask : num & ~mask; } /** * This function parsers an IPT TIP/FUP packet and obtains the IP address that it refers to * @param outp - The position of the TIP/FUP packet bytes in the IPT packet buffer. This pointer will * be updated to point after the parsed TIP/FUP packet. * @param end - The end of the IPT packet buffer. Used to ensure, we don't read past the end * @param last_ip - The IP address that was in the most recent TIP/FUP packet. This value will be * updated with the IP address from the parsed packet. * @return - the IP address from the TIP/FUP packet. */ static uint64_t handle_ip_packet(unsigned char ** outp, unsigned char *end, uint64_t *last_ip) { unsigned char *p = *outp; uint64_t new_ip; int num_bytes; uint64_t new_bytes; int ip_bytes = p[0] >> 5; if (ip_bytes == 0) //IP is out of context return 0; else if(ip_bytes == 1) { //Bottom 32 bits, last_ip top 48 num_bytes = 2; new_bytes = *((uint64_t *)(p+1)) & 0xFFFFULL; new_ip = (*last_ip & (0xFFFFFFFFFFFFULL << 16)) | new_bytes; } else if(ip_bytes == 2) { //Bottom 32 bits, last_ip top 32 num_bytes = 4; new_bytes = *((uint64_t *)(p+1)) & 0xFFFFFFFFULL; new_ip = (*last_ip & (0xFFFFFFFFULL << 32)) | new_bytes; } else if(ip_bytes == 3) { //Bottom 48 bits, sign extended num_bytes = 6; new_bytes = *((uint64_t *)(p+1)) & 0xFFFFFFFFFFFFULL; new_ip = sign_extend(new_bytes, 48); } else if(ip_bytes == 4) { //Bottom 48 bits, last_ip top 16 num_bytes = 6; new_bytes = *((uint64_t *)(p+1)) & 0xFFFFFFFFFFFFULL; new_ip = (*last_ip & (0xFFFFULL << 48)) | new_bytes; } else if(ip_bytes == 6) { //All 64 bits num_bytes = 8; new_ip = *((uint64_t *)(p+1)); } else { WARNING_MSG("Got unknown IP packet (IPBytes=%d)", ip_bytes); return 0; } if (!BYTES_LEFT(num_bytes)) { WARNING_MSG("Got error in handle_ip_packet: Not enough bytes for decoding IP (have %lu, need %lu)", end-p, ip_bytes); return 0; } *outp = p + num_bytes; *last_ip = new_ip; return new_ip; } /** * This function adds any remaining TNT packet bits to the TNT hash being recorded * @param ipt_hashes - A pointer to the hash structure with the TNT hash to update */ static void finish_tnt_hash(struct ipt_hash_state * ipt_hashes) { if(ipt_hashes->num_bits != 0) { if(XXH64_update(ipt_hashes->tnt, &ipt_hashes->tnt_bits, sizeof(uint64_t)) == XXH_ERROR) WARNING_MSG("Updating the TNT hash failed!"); //Should never happen } //Add in the total number of bits, so we can differentiate between a packet with TNN and a packet with TN if(XXH64_update(ipt_hashes->tnt, &ipt_hashes->total_num_bits, sizeof(uint64_t)) == XXH_ERROR) WARNING_MSG("Updating the TNT hash failed!"); //Should never happen } /** * This function adds TNT packet bits to the TNT hash being recorded * @param ipt_hashes - A pointer to the hash structure with the TNT hash to update * @param tnt_bits - the TNT bits to add to the hash * @param num_bits - the number of bits in the tnt_bits parameter */ static void add_tnt_to_hash(struct ipt_hash_state * ipt_hashes, unsigned char * tnt_bits, int num_bits) { uint64_t i; #ifdef IPT_DEBUG char bit_string[64]; for(i = 0; i < num_bits; i++) bit_string[i] = BIT_TEST(tnt_bits[i / 8], i % 8) ? 'T' : 'N'; bit_string[num_bits] = 0; IPT_DEBUG_MSG("TNT bits %d: %s", num_bits, bit_string); #endif for(i = 0; i < num_bits; i++) { ipt_hashes->tnt_bits |= (BIT_TEST(tnt_bits[i / 8], i % 8) << ipt_hashes->num_bits); ipt_hashes->num_bits++; if(ipt_hashes->num_bits == sizeof(ipt_hashes->tnt_bits)) { if(XXH64_update(ipt_hashes->tnt, &ipt_hashes->tnt_bits, sizeof(uint64_t)) == XXH_ERROR) WARNING_MSG("Updating the TNT hash failed!"); //Should never happen ipt_hashes->tnt_bits = 0; ipt_hashes->num_bits = 0; } } ipt_hashes->total_num_bits += num_bits; } /** * This function adds a TIP packet's IP address to the TIP hash being recorded * @param state - The linux_ipt_state_t object containing this instrumentation's state * @param tip - the IP address to add to the TIP hash */ static void add_tip_to_hash(linux_ipt_state_t * state, uint64_t tip) { uint64_t adjusted_address = tip; int i; long index = -1; IPT_DEBUG_MSG("TIP %lx", tip); //Adjust the reported address to remove ASLR if(state->num_coverage_libraries) { for(i = 0; i < state->num_coverage_libraries; i++) { if(state->library_starts[i] <= tip && tip < state->library_ends[i]) { index = i; break; } } //Normalize the address, then mix in the hash of the library to ensure there are not collisions //when two separate libraries report a TIP at the same offset if(index != -1) adjusted_address = (tip - state->library_starts[i]) | (((uint64_t)state->library_hashes[i]) << 32); } else if(state->target_start <= tip && tip < state->target_end) //if the address is in the target executable adjusted_address = tip - state->target_start; //normalize the address with the target's start address if(XXH64_update(state->ipt_hashes.tip, &adjusted_address, sizeof(uint64_t)) == XXH_ERROR) WARNING_MSG("Updating the TIP hash failed!"); //Should never happen } /** * This function determines how many bits are in a TNT packet * @param packet - A pointer to the IPT packet buffer * @param max - the maximum possible bits that could be in a packet */ static int get_tnt_num_bits(unsigned char * packet, int max_bits) { int num_bits; for(num_bits = max_bits; num_bits >= 0; num_bits--) { //Find the stop bit if(BIT_TEST(packet[num_bits / 8], num_bits % 8)) break; } return num_bits; } /** * This function parses the IPT packet buffer to determine if the execution trace was new or not. If it was, the * execution trace's hash is added to the hashtable to ensure we do not mark it as new again. * @param state - The linux_ipt_state_t object containing this instrumentation's state * @param return - -1 on error, 0 if the IPT packets in the IPT packet buffer don't describe a unique run, or 1 if they do */ static int analyze_ipt(linux_ipt_state_t * state) { unsigned char * p, * start, * end, * psb_pos; struct ipt_hashtable_entry * hashes, * match = NULL; uint64_t ip_address; size_t num_bytes_at_end; int unknown_packet_hit = 0; const unsigned char psb[0x10] = { 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82 }; //Disable IPT while we're analyzing it ioctl(state->perf_fd, PERF_EVENT_IOC_DISABLE, 0); //Perform a quick sanity check to ensure the IPT trace data is sane if (state->pem->aux_head == state->pem->aux_tail) { WARNING_MSG("No IPT trace data was recorded, something is likely wrong."); return -1; } else if (state->persistence_max_cnt == 0 && state->pem->aux_head < state->pem->aux_tail) { WARNING_MSG("The IPT trace data has overflown. Use the ipt_mmap_size option to increase the size."); return -1; } //Reset the IPT hashes struct state->ipt_hashes.tnt_bits = 0; state->ipt_hashes.num_bits = 0; state->ipt_hashes.total_num_bits = 0; if(XXH64_reset(state->ipt_hashes.tnt, 0) == XXH_ERROR || XXH64_reset(state->ipt_hashes.tip, 0) == XXH_ERROR) return -1; hashes = malloc(sizeof(struct ipt_hashtable_entry)); if(!hashes) return -1; //Reorder the buffer if it wrapped around to make parsing easier if(state->pem->aux_head < state->pem->aux_tail) { state->reorder_buffer = malloc(state->ipt_mmap_size); num_bytes_at_end = state->pem->aux_size - state->pem->aux_tail; memcpy(state->reorder_buffer, state->perf_aux_buf + state->pem->aux_tail, num_bytes_at_end); memcpy(state->reorder_buffer + num_bytes_at_end, state->perf_aux_buf, state->pem->aux_head); start = state->reorder_buffer; end = state->reorder_buffer + num_bytes_at_end + state->pem->aux_head; } else { start = (char *)state->perf_aux_buf + state->pem->aux_tail; end = (char *)state->perf_aux_buf + state->pem->aux_head; } p = start; #ifdef IPT_DEBUG write_buffer_to_file("/tmp/ipt_dump", start, end-start); #endif //Rather than use Intel's libipt, we instead parse the buffer ourselves to ensure we can do so //quickly. As we only need the TIP/TNT packets, this parser attempts to parse as little else //as possible. Further, we only record hashes of the TIP/TNT packets, as full decoding of the //IPT packets to match them to the basic blocks transitions is far too slow. while(p < end) { if(unknown_packet_hit) { psb_pos = memmem(p, end - p, psb, sizeof(psb)); if(!psb_pos) { DEBUG_MSG("Couldn't find PSB packet"); break; } if(psb_pos - p != 0) IPT_DEBUG_MSG("Skipping %d bytes", psb_pos - p); p = psb_pos + sizeof(psb); state->last_ip = 0; unknown_packet_hit = 0; } while(p < end) { IPT_DEBUG_MSG_PACKET("%04x: %02x %02x %02x %02x %02x %02x %02x %02x", p - start, (unsigned char)p[0], (unsigned char)p[1], (unsigned char)p[2], (unsigned char)p[3], (unsigned char)p[4], (unsigned char)p[5], (unsigned char)p[6], (unsigned char)p[7]); if (p[0] == 2 && BYTES_LEFT(2)) { if (p[1] == 0xa3 && BYTES_LEFT(8)) { // Long TNT IPT_DEBUG_MSG_PACKET("Long TNT"); add_tnt_to_hash(&state->ipt_hashes, p+2, get_tnt_num_bits(p+2, 47)); p += 8; continue; } if (p[1] == 0x43 && BYTES_LEFT(8)) { // PIP IPT_DEBUG_MSG_PACKET("PIP"); p += 8; continue; } if (p[1] == 3 && BYTES_LEFT(4)) { // CBR IPT_DEBUG_MSG_PACKET("CBR"); p += 4; continue; } if (p[1] == 0x83) { //TRACESTOP IPT_DEBUG_MSG_PACKET("TRACESTOP"); p += 2; continue; } if (p[1] == 0xf3 && BYTES_LEFT(8)) { // OVF p += 8; WARNING_MSG("IPT received overflow packet"); continue; } if (p[1] == 0x82 && BYTES_LEFT(16) && !memcmp(p, psb, 16)) { // PSB IPT_DEBUG_MSG_PACKET("PSB"); p += 16; state->last_ip = 0; continue; } if (p[1] == 0x23) { // PSBEND IPT_DEBUG_MSG_PACKET("PSBEND"); p += 2; continue; } if (p[1] == 0xc3 && BYTES_LEFT(11) && p[2] == 0x88) { //MNT IPT_DEBUG_MSG_PACKET("MNT"); p += 10; continue; } if (p[1] == 0x73 && BYTES_LEFT(7)) { //TMA IPT_DEBUG_MSG_PACKET("TMA"); p += 7; continue; } if (p[1] == 0xc8 && BYTES_LEFT(7)) { //VMCS IPT_DEBUG_MSG_PACKET("VMCS"); p += 7; continue; } } if(!(p[0] & 1)) { if (p[0] == 0) { // PAD IPT_DEBUG_MSG_PACKET("PAD"); p++; continue; } // Short TNT char tnt_bits = p[0] >> 1; add_tnt_to_hash(&state->ipt_hashes, &tnt_bits, get_tnt_num_bits(&tnt_bits, 6)); IPT_DEBUG_MSG_PACKET("SHORT TNT"); p++; continue; } #define TIP_TYPE_TIP 0xd #define TIP_TYPE_TIP_PGE 0x11 #define TIP_TYPE_TIP_PGD 0x1 #define TIP_TYPE_FUP 0x1d char tip_type = p[0] & 0x1f; if(tip_type == TIP_TYPE_TIP || tip_type == TIP_TYPE_TIP_PGE || tip_type == TIP_TYPE_TIP_PGD || tip_type == TIP_TYPE_FUP) { ip_address = handle_ip_packet(&p, end, &state->last_ip); IPT_DEBUG_MSG_PACKET("TIP/PGE/PGD/FUP"); if(tip_type == TIP_TYPE_TIP) add_tip_to_hash(state, ip_address); p++; continue; } if (p[0] == 0x99 && BYTES_LEFT(2)) { // MODE IPT_DEBUG_MSG_PACKET("MODE"); p += 2; continue; } if (p[0] == 0x19 && BYTES_LEFT(8)) { // TSC IPT_DEBUG_MSG_PACKET("TSC"); p+=8; continue; } if (p[0] == 0x59 && BYTES_LEFT(2)) { // MTC IPT_DEBUG_MSG_PACKET("MTC"); p += 2; continue; } if ((p[0] & 3) == 3) { // CYC IPT_DEBUG_MSG_PACKET("CYC"); if ((p[0] & 4) && BYTES_LEFT(1)) { do { p++; } while ((p[0] & 1) && BYTES_LEFT(1)); } p++; continue; } WARNING_MSG("Hit unknown packet type at offset 0x%lx", p - start); unknown_packet_hit = 1; break; } } //Create a hashtable entry to lookup/add finish_tnt_hash(&state->ipt_hashes); memset(hashes, 0, sizeof(struct ipt_hashtable_entry)); hashes->id.tip = XXH64_digest(state->ipt_hashes.tip); hashes->id.tnt = XXH64_digest(state->ipt_hashes.tnt); DEBUG_MSG("Got TIP hash 0x%llx and TNT hash 0x%llx", hashes->id.tip, hashes->id.tnt); //Look for our hashes in the hashtable, and add them if they're not already in it HASH_FIND(hh, state->head, &hashes->id, sizeof(struct ipt_hashtable_key), match); if(!match) HASH_ADD(hh, state->head, id, sizeof(struct ipt_hashtable_key), hashes); else free(hashes); return match == NULL; } //////////////////////////////////////////////////////////////// // Private methods ///////////////////////////////////////////// //////////////////////////////////////////////////////////////// /** * This function wraps the perf_event_open syscall, which does not have one in libc */ static long perf_event_open(struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { return syscall(__NR_perf_event_open, hw_event, (uintptr_t)pid, (uintptr_t)cpu, (uintptr_t)group_fd, (uintptr_t)flags); } /** * This function cleans up the IPT related file descriptor and memory mappings * @param state - The linux_ipt_state_t object containing this instrumentation's state */ static void cleanup_ipt(linux_ipt_state_t * state) { if(state->perf_aux_buf && state->perf_aux_buf != MAP_FAILED && state->pem && state->pem != MAP_FAILED) { munmap(state->perf_aux_buf, state->pem->aux_size); state->perf_aux_buf = NULL; munmap(state->pem, state->ipt_mmap_size + getpagesize()); state->pem = NULL; } if(state->perf_fd >= 0) close(state->perf_fd); state->perf_fd = -1; } /** * This function determines the size used in an IPT filter for the specified filename. * @param filename - the filename determine the IPT filter size for * @return - the size that should be specified in an IPT filter for the given filename */ static size_t get_file_filter_size(char * filename) { struct stat statbuf; size_t pagesize = getpagesize(); size_t ret; if(stat(filename, &statbuf)) FATAL_MSG("Couldn't get size of \"%s\"", filename); ret = statbuf.st_size; if(ret % pagesize != 0) ret = (((ret + pagesize) / pagesize) * pagesize); return ret; } /** * This function creates an IPT filter for the the coverage libraries specified in a linux_ipt_state * @param state - The linux_ipt_state_t object containing this instrumentation's state * @return - an IPT filter that can be passed to the Linux perf subsystem, which will instruction IPT to only * generate IPT packets for the regions defined in the linux_ipt_state. */ static char * create_ipt_filter(linux_ipt_state_t * state) { char item_filter[1000], filter[4096]; size_t i; //See https://elixir.bootlin.com/linux/v4.17.8/source/kernel/events/core.c#L8806 for the filter format //start is autodetected by the kernel memset(filter, 0, sizeof(filter)); if(state->num_coverage_libraries) { for(i = 0; i < state->num_coverage_libraries; i++) { snprintf(item_filter, sizeof(item_filter), "filter 0/%ld@%s%s", get_file_filter_size(state->coverage_libraries[i]), state->coverage_libraries[i], i != state->num_coverage_libraries - 1 ? "\n" : ""); strncat(filter, item_filter, sizeof(filter) - (strlen(filter) + 1)); } } else snprintf(filter, sizeof(filter), "filter 0/%ld@%s", get_file_filter_size(state->target_path), state->target_path); IPT_DEBUG_MSG("Using filter: %s", filter); return strdup(filter); } /** * This function sets up IPT tracing for the specified process * @param state - The linux_ipt_state_t object containing this instrumentation's state * @param pid - The process ID of the process to trace * @return - 0 on success, non-zero on failure */ static int setup_ipt(linux_ipt_state_t * state, pid_t pid) { struct perf_event_attr pe; state->last_ip = 0; memset(&pe, 0, sizeof(struct perf_event_attr)); pe.size = sizeof(struct perf_event_attr); pe.config = (1U << 11); // Disable RET compression, makes parsing easier pe.disabled = 0; pe.enable_on_exec = 0; pe.exclude_hv = 1; pe.exclude_kernel = 1; pe.type = state->intel_pt_type; state->perf_fd = perf_event_open(&pe, pid, -1, -1, PERF_FLAG_FD_CLOEXEC); if(state->perf_fd < 0) { ERROR_MSG("Could not open the perf event file system (perf_event_open failed with errno %d (%s))", errno, strerror(errno)); ERROR_MSG("Try adjusting the perf system permissions with: echo 1 | sudo tee /proc/sys/kernel/perf_event_paranoid"); return 1; } if(!state->filter) state->filter = create_ipt_filter(state); if(ioctl(state->perf_fd, PERF_EVENT_IOC_SET_FILTER, state->filter)) { ERROR_MSG("perf filter failed! (errno %d: %s)", errno, strerror(errno)); return 1; } state->pem = mmap(NULL, state->ipt_mmap_size + getpagesize(), PROT_READ|PROT_WRITE, MAP_SHARED, state->perf_fd, 0); if(state->pem == MAP_FAILED) { ERROR_MSG("Perf mmap failed (ipt_mmap_size=%d)\n", state->ipt_mmap_size); return 1; } state->pem->aux_offset = state->pem->data_offset + state->pem->data_size; state->pem->aux_size = state->ipt_mmap_size; state->perf_aux_buf = mmap(NULL, state->pem->aux_size, PROT_READ, MAP_SHARED, state->perf_fd, state->pem->aux_offset); if(state->perf_aux_buf == MAP_FAILED) { ERROR_MSG("Perf AUX mmap failed (ipt_mmap_size=%d)\n", state->ipt_mmap_size); return 1; } return 0; } /** * This function records the address information for the traced libraries or executable * inside of the fork server (which will have the same addresses as all target processes). * @param state - The linux_ipt_state_t object containing this instrumentation's state */ static void record_fork_server_address_info(linux_ipt_state_t * state) { char filename[64], line[1024+MAX_PATH], map_filename[MAX_PATH], last_filename[MAX_PATH]; FILE * fp; uint64_t start, end; int count, index, file_len; size_t i; char * file_buffer; XXH32_state_t * hash; //Allocate the library start/end arrays if(state->num_coverage_libraries) { state->library_starts = calloc(state->num_coverage_libraries, sizeof(uint64_t)); state->library_ends = calloc(state->num_coverage_libraries, sizeof(uint64_t)); state->library_hashes = calloc(state->num_coverage_libraries, sizeof(uint32_t)); if(!state->library_starts || !state->library_ends || !state->library_hashes) FATAL_MSG("Failed allocating memory for library address ranges and hashes"); } //Open /proc/$pid/maps snprintf(filename, sizeof(filename), "/proc/%d/maps", state->fs.pid); fp = fopen(filename, "r"); if(!fp) FATAL_MSG("Failed to open the fork server's maps file (%s)", filename); //Parse the maps file line by line, looking for the libraries or main executable while (fgets(line, sizeof(line), fp) != NULL) { memset(map_filename, 0, sizeof(map_filename)); count = sscanf(line, "%16lx-%16lx %*4s %*8s %*s %*d %1024s\n", &start, &end, map_filename); if(count != 3) continue; if(state->num_coverage_libraries) { for(i = 0; i < state->num_coverage_libraries; i++) { if(strcmp(map_filename, state->coverage_libraries[i]) == 0) { if(state->library_starts[i] == 0) state->library_starts[i] = start; state->library_ends[i] = end; } } } else if(strcmp(map_filename, state->target_path) == 0) { if(state->target_start == 0) state->target_start = start; state->target_end = end; } } fclose(fp); //Give a warning if we weren't able to find a library's or the main executable's start/end address if(state->num_coverage_libraries) { for(i = 0; i < state->num_coverage_libraries; i++) { if(!state->library_starts[i] || !state->library_ends[i]) { WARNING_MSG("Could not determine the address of the %s library in memory. The generated hashes will be specific to " "this run if ASLR is enabled.", state->coverage_libraries[i]); state->library_starts[i] = state->library_ends[i] = 0; } else { //Read the library file_len = read_file(state->coverage_libraries[i], &file_buffer); if(file_len < 0) FATAL_MSG("Couldn't open the library %s to calculate its hash", state->coverage_libraries[i]); //Calculate the library's hash hash = XXH32_createState(); if(XXH32_reset(hash, 0) == XXH_ERROR || XXH32_update(hash, file_buffer, file_len) == XXH_ERROR) FATAL_MSG("Failed calculating hash of library %s", state->coverage_libraries[i]); //Should never happen state->library_hashes[i] = XXH32_digest(hash); //Deallocate the hash and file contents XXH32_freeState(hash); free(file_buffer); } } } else if(!state->target_start || !state->target_end) { WARNING_MSG("Could not determine the address of the target executable in memory. The generated hashes will be specific to " "this run if ASLR is enabled and the executable is PIE."); state->target_start = state->target_end = 0; } } /** * This function terminates the fuzzed process. * @param state - The linux_ipt_state_t object containing this instrumentation's state */ static void destroy_target_process(linux_ipt_state_t * state, int force) { if(state->child_pid && state->child_pid != -1) { if(!state->persistence_max_cnt || force) { kill(state->child_pid, SIGKILL); state->child_pid = 0; } state->last_status = fork_server_get_status(&state->fs, 1); } } /** * This function starts the fuzzed process * @param state - The linux_ipt_state_t object containing this instrumentation's state * @param cmd_line - the command line of the fuzzed process to start * @param stdin_input - the input to pass to the fuzzed process's stdin * @param stdin_length - the length of the stdin_input parameter * @return - zero on success, non-zero on failure. */ static int create_target_process(linux_ipt_state_t * state, char* cmd_line, char * stdin_input, size_t stdin_length) { char ** argv; char * temp_path; int i, pid; if(!state->fork_server_setup) { if(split_command_line(cmd_line, &temp_path, &argv)) return -1; //Get the absolute path for the target state->target_path = realpath(temp_path, NULL); if(state->target_path) { fork_server_init(&state->fs, state->target_path, argv, 1, state->persistence_max_cnt, stdin_length != 0); record_fork_server_address_info(state); state->fork_server_setup = 1; } //Free the split up command line for(i = 0; argv[i]; i++) free(argv[i]); free(argv); free(temp_path); //if realpath failed, return failure if(!state->target_path) return -1; } pid = fork_server_fork(&state->fs); if(pid < 0) return -1; if(pid != state->child_pid) { //New target process, cleanup the old IPT state and set it up for the new target state->child_pid = pid; cleanup_ipt(state); if(setup_ipt(state, state->child_pid)) return -1; } else { //Persistence mode with the same target process being used, adjust the ring buffers and reenable IPT __sync_synchronize(); //smp_mb() __atomic_store_n(&state->pem->aux_tail, state->pem->aux_head, __ATOMIC_SEQ_CST); ioctl(state->perf_fd, PERF_EVENT_IOC_ENABLE, 0); } if(state->fs.target_stdin != -1) { //Take care of the stdin input, write over the file, then truncate it accordingly lseek(state->fs.target_stdin, 0, SEEK_SET); if(stdin_input != NULL && stdin_length != 0) { if(write(state->fs.target_stdin, stdin_input, stdin_length) != stdin_length) FATAL_MSG("Short write to target's stdin file"); } if(ftruncate(state->fs.target_stdin, stdin_length)) FATAL_MSG("ftruncate() failed"); lseek(state->fs.target_stdin, 0, SEEK_SET); } return 0; } /** * This function reads a number from the given file * @param filename - the path to the file to read a number from. * @return - The number that was in the specified file, or -1 on error */ static int get_file_int(char * filename) { int ret, fd; char buffer[16]; fd = open(filename, O_RDONLY); if(fd < 0) return -1; memset(buffer, 0, sizeof(buffer)); ret = read(fd, buffer, sizeof(buffer)-1); if(ret > 0) ret = atoi(buffer); else ret = -1; close(fd); return ret; } /** * This function reads the Intel PT state of the current processor from the sys filesystem * @param state - The linux_ipt_state_t object containing this instrumentation's state * @return - 0 on success, non-zero if IPT or IP address filtering are not supported */ static int get_ipt_system_info(linux_ipt_state_t * state) { int ret; if(access("/sys/devices/intel_pt/", F_OK)) { INFO_MSG("Intel PT not supported (/sys/devices/intel_pt/ does not exist)"); return -1; } ret = get_file_int("/sys/devices/intel_pt/type"); if(ret <= 0) { INFO_MSG("Intel PT not supported"); return -1; } state->intel_pt_type = ret; //For the moment, we'll only support Intel PT with address filtering ret = get_file_int("/sys/devices/intel_pt/caps/ip_filtering"); if(ret <= 0) { INFO_MSG("Intel PT address filtering not supported"); return -1; } ret = get_file_int("/sys/devices/intel_pt/caps/num_address_ranges"); if(ret <= 0) { INFO_MSG("Intel PT address filtering not supported"); return -1; } if(ret < state->num_coverage_libraries) { INFO_MSG("Too many coverage libraries specified. Intel PT address filtering on " "this system only supports %d, but %d were specified.", ret, state->num_coverage_libraries); return -1; } state->num_address_ranges = ret; return 0; } //////////////////////////////////////////////////////////////// // Instrumentation methods ///////////////////////////////////// //////////////////////////////////////////////////////////////// /** * This function creates a linux_ipt_state_t object based on the given options. * @param options - A JSON string of the options to set in the new linux_ipt_state_t. See the * help function for more information on the specific options available. * @return the linux_ipt_state_t generated from the options in the JSON options string, or NULL on failure */ static linux_ipt_state_t * setup_options(char * options) { linux_ipt_state_t * state; char * temp_path; size_t i; size_t pagesize = getpagesize(); state = malloc(sizeof(linux_ipt_state_t)); if(!state) return NULL; memset(state, 0, sizeof(linux_ipt_state_t)); //Setup defaults state->ipt_mmap_size = 1024*1024; //1MB //Parse the options if(options) { PARSE_OPTION_INT(state, options, persistence_max_cnt, "persistence_max_cnt", linux_ipt_cleanup); PARSE_OPTION_INT(state, options, ipt_mmap_size, "ipt_mmap_size", linux_ipt_cleanup); PARSE_OPTION_ARRAY(state, options, coverage_libraries, num_coverage_libraries, "coverage_libraries", linux_ipt_cleanup); } for(i = 0; i < state->num_coverage_libraries; i++) { if(!file_exists(state->coverage_libraries[i])) { ERROR_MSG("Could not access the specified coverage library \"%s\" does not exist", state->coverage_libraries[i]); linux_ipt_cleanup(state); return NULL; } //Get the absolute path for the library temp_path = realpath(state->coverage_libraries[i], NULL); if(!temp_path) { ERROR_MSG("Could not determine the absolute address of the specified coverage library \"%s\"", state->coverage_libraries[i]); linux_ipt_cleanup(state); return NULL; } free(state->coverage_libraries[i]); state->coverage_libraries[i] = temp_path; } //Fix up the IPT mmap size if it's not page aligned if(state->ipt_mmap_size % pagesize != 0) state->ipt_mmap_size = (((state->ipt_mmap_size + pagesize) / pagesize) * pagesize); //If we're in persistence mode, allocate the reorder buffer if(state->persistence_max_cnt) { state->reorder_buffer = malloc(state->ipt_mmap_size); if(!state->reorder_buffer) { linux_ipt_cleanup(state); return NULL; } } return state; } /** * This function allocates and initializes a new instrumentation specific state object based on the given options. * @param options - a JSON string that contains the instrumentation specific string of options * @param state - an instrumentation specific JSON string previously returned from linux_ipt_get_state that should be loaded * @return - An instrumentation specific state object on success or NULL on failure */ void * linux_ipt_create(char * options, char * state) { linux_ipt_state_t * linux_ipt_state = setup_options(options); if(!linux_ipt_state) return NULL; if(get_ipt_system_info(linux_ipt_state)) { linux_ipt_cleanup(linux_ipt_state); return NULL; } linux_ipt_state->ipt_hashes.tip = XXH64_createState(); linux_ipt_state->ipt_hashes.tnt = XXH64_createState(); if(!linux_ipt_state->ipt_hashes.tip || !linux_ipt_state->ipt_hashes.tnt) { linux_ipt_cleanup(linux_ipt_state); return NULL; } if(state && linux_ipt_set_state(linux_ipt_state, state)) { linux_ipt_cleanup(linux_ipt_state); return NULL; } return linux_ipt_state; } /** * This function cleans up all resources with the passed in instrumentation state. * @param instrumentation_state - an instrumentation specific state object previously created by the linux_ipt_create function * This state object should not be referenced after this function returns. */ void linux_ipt_cleanup(void * instrumentation_state) { struct ipt_hashtable_entry * hash, * tmp; size_t i; linux_ipt_state_t * state = (linux_ipt_state_t *)instrumentation_state; //Kill any remaining target processes destroy_target_process(state, 1); //Cleanup the fork server if(state->fork_server_setup) { fork_server_exit(&state->fs); state->fork_server_setup = 0; } //Cleanup our xxhashes if(state->ipt_hashes.tnt != NULL) XXH64_freeState(state->ipt_hashes.tip); if(state->ipt_hashes.tnt != NULL) XXH64_freeState(state->ipt_hashes.tnt); //Cleanup the perf IPT fd and mmaps cleanup_ipt(state); //Cleanup the hashtable entries HASH_ITER(hh, state->head, hash, tmp) { HASH_DEL(state->head, hash); free(hash); } for(i = 0; i < state->num_coverage_libraries; i++) free(state->coverage_libraries[i]); free(state->library_starts); free(state->library_ends); free(state->library_hashes); free(state->coverage_libraries); free(state->reorder_buffer); free(state->filter); free(state->target_path); free(state); } /** * This function merges the coverage information from two instrumentation states. * @param instrumentation_state - an instrumentation specific state object previously created by the linux_ipt_create function * @param other_instrumentation_state - an instrumentation specific state object previously created by the linux_ipt_create function * @return - An instrumentation specific state object that contains the combination of both of the passed in instrumentation states * on success, or NULL on failure */ void * linux_ipt_merge(void * instrumentation_state, void * other_instrumentation_state) { linux_ipt_state_t * merged; struct ipt_hashtable_entry * entry = NULL, * hash = NULL, * tmp = NULL, * match = NULL; linux_ipt_state_t * first = (linux_ipt_state_t *)instrumentation_state; linux_ipt_state_t * second = (linux_ipt_state_t *)other_instrumentation_state; merged = linux_ipt_create(NULL, NULL); if (!merged) return NULL; //Add the first state's entries HASH_ITER(hh, first->head, hash, tmp) { entry = malloc(sizeof(struct ipt_hashtable_entry)); memset(entry, 0, sizeof(entry)); entry->id.tip = hash->id.tip; entry->id.tnt = hash->id.tnt; HASH_ADD(hh, merged->head, id, sizeof(struct ipt_hashtable_key), entry); } //Add the second state's entries HASH_ITER(hh, second->head, hash, tmp) { entry = malloc(sizeof(struct ipt_hashtable_entry)); memset(entry, 0, sizeof(entry)); entry->id.tip = hash->id.tip; entry->id.tnt = hash->id.tnt; HASH_FIND(hh, merged->head, &entry->id, sizeof(struct ipt_hashtable_key), match); if(!match) HASH_ADD(hh, merged->head, id, sizeof(struct ipt_hashtable_key), entry); } return merged; } /** * This function returns the state information holding the previous execution path info. The returned value can later be passed to * linux_ipt_create or linux_ipt_set_state to load the state. * @param instrumentation_state - an instrumentation specific state object previously created by the linux_ipt_create function * @return - A JSON string that holds the instrumentation specific state object information on success, or NULL on failure */ char * linux_ipt_get_state(void * instrumentation_state) { linux_ipt_state_t * state = (linux_ipt_state_t *)instrumentation_state; json_t *state_obj, *hash_obj, *hash_list, *temp; struct ipt_hashtable_entry * hash = NULL, * tmp = NULL; char * ret; state_obj = json_object(); if (!state_obj) return NULL; ADD_INT(temp, state->last_status, state_obj, "last_status"); ADD_INT(temp, state->process_finished, state_obj, "process_finished"); ADD_INT(temp, state->last_fuzz_result, state_obj, "last_fuzz_result"); ADD_INT(temp, state->fuzz_results_set, state_obj, "fuzz_results_set"); ADD_INT(temp, state->last_is_new_path, state_obj, "last_is_new_path"); hash_list = json_array(); if (!hash_list) return NULL; HASH_ITER(hh, state->head, hash, tmp) { hash_obj = json_mem((const char *)&hash->id, sizeof(struct ipt_hashtable_key)); if (!hash_obj) return NULL; json_array_append_new(hash_list, hash_obj); } json_object_set_new(state_obj, "hash_list", hash_list); ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } /** * This function frees an instrumentation state previously obtained via linux_ipt_get_state. * @param state - the instrumentation state to free */ void linux_ipt_free_state(char * state) { free(state); } /** * This function sets the instrumentation state to the passed in state previously obtained via linux_ipt_get_state. * @param instrumentation_state - an instrumentation specific state object previously created by the linux_ipt_create function * @param state - an instrumentation state previously obtained via linux_ipt_get_state * @return - 0 on success, non-zero on failure. */ int linux_ipt_set_state(void * instrumentation_state, char * state) { linux_ipt_state_t * current_state = (linux_ipt_state_t *)instrumentation_state; struct ipt_hashtable_entry * entry = NULL, * hash = NULL, * tmp = NULL, * match = NULL; json_t * hash_obj; int result, temp_int; size_t length; if(!state) return 1; //If a child process is running when the state is being set destroy_target_process(current_state, 0); //kill it so we don't orphan it //Free any existing hashes already in the hashtable HASH_ITER(hh, current_state->head, hash, tmp) { HASH_DEL(current_state->head, hash); free(hash); } GET_INT(temp_int, state, current_state->last_status, "last_status", result); GET_INT(temp_int, state, current_state->process_finished, "process_finished", result); GET_INT(temp_int, state, current_state->last_fuzz_result, "last_fuzz_result", result); GET_INT(temp_int, state, current_state->fuzz_results_set, "fuzz_results_set", result); GET_INT(temp_int, state, current_state->last_is_new_path, "last_is_new_path", result); FOREACH_OBJECT_JSON_ARRAY_ITEM_BEGIN(state, hash_list, "hash_list", hash_obj, result) length = json_mem_length(hash_obj); if(length != sizeof(struct ipt_hashtable_key)) return 1; entry = malloc(sizeof(struct ipt_hashtable_entry)); if(!entry) return 1; memset(entry, 0, sizeof(entry)); memcpy(&entry->id, json_mem_value(hash_obj), sizeof(struct ipt_hashtable_key)); HASH_FIND(hh, current_state->head, &entry->id, sizeof(struct ipt_hashtable_key), match); if(!match) HASH_ADD(hh, current_state->head, id, sizeof(struct ipt_hashtable_key), entry); FOREACH_OBJECT_JSON_ARRAY_ITEM_END(hash_list) return 0; //No state to set, so just return success } /** * This function enables the instrumentation and runs the fuzzed process. * @param instrumentation_state - an instrumentation specific state object previously created by the linux_ipt_create function * @process - a pointer to return a handle to the process that the instrumentation was enabled on * @cmd_line - the command line of the fuzzed process to enable instrumentation on * @input - a buffer to the input that should be sent to the fuzzed process * @input_length - the length of the input parameter * returns 0 on success, -1 on failure */ int linux_ipt_enable(void * instrumentation_state, pid_t * process, char * cmd_line, char * input, size_t input_length) { linux_ipt_state_t * state = (linux_ipt_state_t *)instrumentation_state; if(state->child_pid) destroy_target_process(state, 0); if(create_target_process(state, cmd_line, input, input_length)) return -1; state->process_finished = 0; state->fuzz_results_set = 0; if(fork_server_run(&state->fs)) return -1; *process = state->child_pid; return 0; } static int finish_fuzz_round(linux_ipt_state_t * state) { if(!state->fuzz_results_set) { //if it's still alive, it's a hang if(!linux_ipt_is_process_done(state)) { destroy_target_process(state, 1); state->last_fuzz_result = FUZZ_HANG; } //If it died from a signal (and it wasn't SIGKILL, that we send), it's a crash else if(WIFSIGNALED(state->last_status) && WTERMSIG(state->last_status) != SIGKILL) state->last_fuzz_result = FUZZ_CRASH; //Otherwise, just set FUZZ_NONE else state->last_fuzz_result = FUZZ_NONE; state->fuzz_results_set = 1; } return state->last_fuzz_result; } /** * This function determines whether the process being instrumented has taken a new path. Calling this function will stop the * process if it is not yet finished. * @param instrumentation_state - an instrumentation specific state object previously created by the linux_ipt_create function * @return - 1 if the previously setup process (via the enable function) took a new path, 0 if it did not, or -1 on failure. */ int linux_ipt_is_new_path(void * instrumentation_state) { linux_ipt_state_t * state = (linux_ipt_state_t *)instrumentation_state; //Ensure that the process has finished parsing the input (or stop it if it's not) finish_fuzz_round(state); //If we haven't cleaned up the IPT state, then it must not have been if(state->perf_fd >= 0) //analyzed. Analyze it now and cleanup the IPT state state->last_is_new_path = analyze_ipt(state); return state->last_is_new_path; } /** * This function will return the result of the fuzz job. It should be called * after the process has finished processing the tested input. * @param instrumentation_state - an instrumentation specific structure previously created by the linux_ipt_create function * @return - either FUZZ_NONE, FUZZ_HANG, FUZZ_CRASH, or -1 on error. */ int linux_ipt_get_fuzz_result(void * instrumentation_state) { return finish_fuzz_round((linux_ipt_state_t *)instrumentation_state); } /** * Checks if the target process is done fuzzing the inputs yet. * @param state - The linux_ipt_state_t object containing this instrumentation's state * @return - 0 if the process is not done testing the fuzzed input, non-zero if the process is done. */ int linux_ipt_is_process_done(void * instrumentation_state) { int status; linux_ipt_state_t * state = (linux_ipt_state_t *)instrumentation_state; if(state->process_finished) return 1; status = fork_server_get_status(&state->fs, 0); //it's still alive or an error occurred and we can't tell if(status < 0 || status == FORKSERVER_NO_RESULTS_READY) return 0; state->last_status = status; state->process_finished = 1; return 1; } /** * This function returns help text for the Linux IPT instrumentation. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ int linux_ipt_help(char ** help_str) { *help_str = strdup( "ipt - Linux IPT instrumentation\n" "Options:\n" " persistence_max_cnt The number of executions to run in one process while\n" " fuzzing in persistence mode\n" " ipt_mmap_size The amount of memory to use for the IPT trace data\n" " buffer\n" " coverage_libraries An array of library or executable filenames that IPT\n" " should record trace information. By default, only\n" " the executable is traced.\n" "\n" ); if (*help_str == NULL) return -1; return 0; } ================================================ FILE: instrumentation/linux_ipt_instrumentation.h ================================================ #pragma once #include "forkserver_internal.h" #include "uthash.h" #include "xxhash.h" void * linux_ipt_create(char * options, char * state); void linux_ipt_cleanup(void * instrumentation_state); void * linux_ipt_merge(void * instrumentation_state, void * other_instrumentation_state); char * linux_ipt_get_state(void * instrumentation_state); void linux_ipt_free_state(char * state); int linux_ipt_set_state(void * instrumentation_state, char * state); int linux_ipt_enable(void * instrumentation_state, pid_t * process, char * cmd_line, char * input, size_t input_length); int linux_ipt_is_new_path(void * instrumentation_state); int linux_ipt_is_process_done(void * instrumentation_state); int linux_ipt_get_fuzz_result(void * instrumentation_state); int linux_ipt_help(char ** help_str); struct ipt_hashtable_key { uint64_t tip; uint64_t tnt; }; struct ipt_hashtable_entry { struct ipt_hashtable_key id; UT_hash_handle hh; }; struct ipt_hash_state { uint64_t tnt_bits; uint64_t num_bits; uint64_t total_num_bits; XXH64_state_t * tnt; XXH64_state_t * tip; }; struct linux_ipt_state { int persistence_max_cnt; int ipt_mmap_size; char ** coverage_libraries; uint64_t * library_starts; uint64_t * library_ends; uint32_t * library_hashes; size_t num_coverage_libraries; char * target_path; uint64_t target_start; uint64_t target_end; int num_address_ranges; int fork_server_setup; int intel_pt_type; int perf_fd; struct perf_event_mmap_page * pem; void * perf_aux_buf; char * reorder_buffer; uint64_t last_ip; char * filter; struct ipt_hash_state ipt_hashes; struct ipt_hashtable_entry * head; pid_t child_pid; forkserver_t fs; int last_status; int process_finished; int last_fuzz_result; int fuzz_results_set; int last_is_new_path; }; typedef struct linux_ipt_state linux_ipt_state_t; ================================================ FILE: instrumentation/return_code_instrumentation.c ================================================ // Linux-only return code instrumentation. #include // kill #include // memset #include #include #include "instrumentation.h" #include "return_code_instrumentation.h" #include "forkserver_internal.h" #include #include //////////////////////////////////////////////////////////////// // Private methods ///////////////////////////////////////////// //////////////////////////////////////////////////////////////// /** * This function terminates the fuzzed process and sets the result in the * instrumentation state. * * @param state - The return_code_state_t object containing this * instrumentation's state */ static void destroy_target_process(return_code_state_t * state) { if(state->child_pid && state->child_pid != -1) { if(!state->use_fork_server) state->last_status = get_process_status(state->child_pid); kill(state->child_pid, SIGKILL); state->child_pid = 0; if(state->use_fork_server) state->last_status = fork_server_get_status(&state->fs, 1); } } /** * This function starts the fuzzed process * @param state - The return_code_state_t object containing this instrumentation's state * @param cmd_line - the command line of the fuzzed process to start * @param stdin_input - the input to pass to the fuzzed process's stdin * @param stdin_length - the length of the stdin_input parameter * @return - zero on success, non-zero on failure. */ static int create_target_process(return_code_state_t * state, char* cmd_line, char * stdin_input, size_t stdin_length) { int i; char ** argv; char * target_path; state->last_status = FUZZ_RUNNING; state->process_reaped = 0; if(state->use_fork_server) { if(!state->fork_server_setup) { if(split_command_line(cmd_line, &target_path, &argv)) return -1; //Start the fork server fork_server_init(&state->fs, target_path, argv, 1, 0, stdin_length != 0); state->fork_server_setup = 1; //Free the split up command line for(i = 0; argv[i]; i++) free(argv[i]); free(argv); free(target_path); } if(state->fs.target_stdin != -1) { //Take care of the stdin input, write over the file, then truncate it accordingly lseek(state->fs.target_stdin, 0, SEEK_SET); if(stdin_input != NULL && stdin_length != 0) { if(write(state->fs.target_stdin, stdin_input, stdin_length) != stdin_length) FATAL_MSG("Short write to target's stdin file"); } if(ftruncate(state->fs.target_stdin, stdin_length)) FATAL_MSG("ftruncate() failed"); lseek(state->fs.target_stdin, 0, SEEK_SET); } //Start the new child and tell it to go state->child_pid = fork_server_fork_run(&state->fs); if(state->child_pid < 0) { ERROR_MSG("Fork server failed to fork a new child\n"); return -1; } } else { if (start_process_and_write_to_stdin(cmd_line, stdin_input, stdin_length, &state->child_pid)) { state->child_pid = 0; ERROR_MSG("Failed to create process with command line: %s\n", cmd_line); return -1; } } return 0; } /** * This function creates a return_code_state_t object based on the given options. * @param options - A JSON string of the options to set in the new * return_code_state_t. See the help function for more information on * the specific options available. * @return the return_code_state_t generated from the options in the JSON options * string, or NULL on failure */ static return_code_state_t * setup_options(char *options) { return_code_state_t * state; state = malloc(sizeof(return_code_state_t)); if(!state) return NULL; memset(state, 0, sizeof(return_code_state_t)); state->use_fork_server = 1; // default to use the fork server if(options) { PARSE_OPTION_INT(state, options, use_fork_server, "use_fork_server", return_code_cleanup); } return state; } //////////////////////////////////////////////////////////////// // Instrumentation methods ///////////////////////////////////// //////////////////////////////////////////////////////////////// /** * This function allocates and initializes a new instrumentation specific state object based on the given options. * @param options - a JSON string that contains the instrumentation specific string of options * @param state - an instrumentation specific JSON string previously returned from return_code_get_state that should be loaded * @return - An instrumentation specific state object on success or NULL on failure */ void * return_code_create(char * options, char * state) { return_code_state_t * return_code_state = setup_options(options); if (!return_code_state) return NULL; if (state && return_code_set_state(return_code_state, state)) { return_code_cleanup(return_code_state); return NULL; } return return_code_state; } /** * This function cleans up all resources with the passed in instrumentation state. * @param instrumentation_state - an instrumentation specific state object previously created by the return_code_create function * This state object should not be referenced after this function returns. */ void return_code_cleanup(void * instrumentation_state) { return_code_state_t * state = (return_code_state_t *)instrumentation_state; destroy_target_process(state); free(state); } /** * This function merges the coverage information from two instrumentation states. This will always fail for the * return_code instrumentation, since it does not record instrumentation data. * @param instrumentation_state - an instrumentation specific state object previously created by the return_code_create function * @param other_instrumentation_state - an instrumentation specific state object previously created by the return_code_create function * @return - An instrumentation specific state object that contains the combination of both of the passed in instrumentation states * on success, or NULL on failure */ void * return_code_merge(void * instrumentation_state, void * other_instrumentation_state) { return NULL; // No instrumentation data, so we can't ever merge } /** * This function returns the state information holding the previous execution path info. The returned value can later be passed to * return_code_create or return_code_set_state to load the state. * @param instrumentation_state - an instrumentation specific state object previously created by the return_code_create function * @return - A JSON string that holds the instrumentation specific state object information on success, or NULL on failure */ char * return_code_get_state(void * instrumentation_state) { return_code_state_t * state = (return_code_state_t *)instrumentation_state; json_t *state_obj, *temp; char * ret; state_obj = json_object(); ADD_INT(temp, state->last_status, state_obj, "last_status"); ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } /** * This function frees an instrumentation state previously obtained via return_code_get_state. * @param state - the instrumentation state to free */ void return_code_free_state(char * state) { free(state); } /** * This function sets the instrumentation state to the passed in state previously obtained via return_code_get_state. * @param instrumentation_state - an instrumentation specific state object previously created by the return_code_create function * @param state - an instrumentation state previously obtained via return_code_get_state * @return - 0 on success, non-zero on failure. */ int return_code_set_state(void * instrumentation_state, char * state) { return_code_state_t * current_state = (return_code_state_t *)instrumentation_state; int result, temp_int; if (!state) return 1; GET_INT(temp_int, state, current_state->last_status, "last_status", result); return 0; } /** * This function enables the instrumentation and runs the fuzzed process. If the process needs to be restarted, it will be. * @param instrumentation_state - an instrumentation specific state object previously created by the return_code_create function * @process - a pointer to return a handle to the process that instrumentation was enabled on * @cmd_line - the command line of the fuzzed process to enable instrumentation on * @input - a buffer to the input that should be sent to the fuzzed process on stdin * @input_length - the length of the input parameter * returns 0 on success, -1 on failure */ int return_code_enable(void * instrumentation_state, pid_t * process, char * cmd_line, char * input, size_t input_length) { return_code_state_t * state = (return_code_state_t *)instrumentation_state; if(state->child_pid) destroy_target_process(state); if (create_target_process(state, cmd_line, input, input_length)) return -1; state->enable_called = 1; *process = state->child_pid; return 0; } /** * This function determines whether the process being instrumented has taken a new path. The return_code instrumentation does * not track the fuzzed process's path, so it is unable to determine if the process took a new path. * @param instrumentation_state - an instrumentation specific state object previously created by the return_code_create function * @return - 0 when a new path wasn't detected (as it always won't be with the return_code instrumentation), or -1 on failure. */ int return_code_is_new_path(void * instrumentation_state) { return_code_state_t * state = (return_code_state_t *)instrumentation_state; if(!state->enable_called) return -1; return 0; //We don't gather instrumentation data, so we can't ever tell if we hit a new path. } /** * This function will return the result of the fuzz job. It should be called * after the process has finished processing the tested input, which should always be the case * with the return_code instrumentation, since test_next_input should always wait for the process to finish. * @param instrumentation_state - an instrumentation specific structure previously created by the create() function * @return - either FUZZ_NONE, FUZZ_HANG, FUZZ_CRASH, or -1 on error. */ int return_code_get_fuzz_result(void * instrumentation_state) { return_code_state_t * state = (return_code_state_t *)instrumentation_state; if(!state->enable_called) return -1; return state->last_status; } /** * Checks if the target process is done fuzzing the inputs yet. If it has finished, it will have * written last_status, the result of the fuzz job. * * @param state - The return_code_state_t object containing this instrumentation's state * @return - 0 if the process has not done testing the fuzzed input, 1 if the process is done, -1 on error */ int return_code_is_process_done(void * instrumentation_state) { int status; return_code_state_t * state = (return_code_state_t *)instrumentation_state; if(!state->enable_called) return -1; if (state->process_reaped == 1) { return state->last_status; } else { if(state->use_fork_server) { status = fork_server_get_status(&state->fs, 0); //it's still alive or an error occurred and we can't tell if(status < 0 || status == FORKSERVER_NO_RESULTS_READY) return 0; if(WIFSIGNALED(status) && WTERMSIG(status) != SIGKILL) state->last_status = FUZZ_CRASH; else state->last_status = FUZZ_NONE; state->process_reaped = 1; return 1; } else { int fuzz_result = get_process_status(state->child_pid); // expects 2, 1, 0, or -1 if (fuzz_result == FUZZ_RUNNING) // it's aliiiiive // don't set last_status here, because hangs are handled by the timeout in the driver. return 0; else if (fuzz_result == FUZZ_CRASH || fuzz_result == FUZZ_NONE) // crash or clean exit { state->last_status = fuzz_result; state->process_reaped = 1; return 1; } else // get_process_status returned an error { state->last_status = fuzz_result; return -1; } } } } /** * This function returns help text for this instrumentation. This help text will describe the instrumentation and any options * that can be passed to return_code_create. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ int return_code_help(char ** help_str) { *help_str = strdup( "return_code - Linux/Mac return_code \"instrumentation\"\n" "Options:\n" " use_fork_server Whether to inject the fork server library; 1=yes, 0=no (default=1)\n" "\n" ); if (*help_str == NULL) return -1; return 0; } ================================================ FILE: instrumentation/return_code_instrumentation.h ================================================ #pragma once #include "forkserver_internal.h" void * return_code_create(char * options, char * state); void return_code_cleanup(void * instrumentation_state); void * return_code_merge(void * instrumentation_state, void * other_instrumentation_state); char * return_code_get_state(void * instrumentation_state); void return_code_free_state(char * state); int return_code_set_state(void * instrumentation_state, char * state); int return_code_enable(void * instrumentation_state, pid_t * process, char * cmd_line, char * input, size_t input_length); int return_code_is_new_path(void * instrumentation_state); int return_code_get_fuzz_result(void * instrumentation_state); int return_code_is_process_done(void * instrumentation_state); int return_code_help(char ** help_str); struct return_code_state { int fork_server_setup; int use_fork_server; forkserver_t fs; pid_t child_pid; int enable_called; int last_status; int process_reaped; // used to prevent further calls to get_process_status if the process has been reaped }; typedef struct return_code_state return_code_state_t; ================================================ FILE: instrumentation/uthash.h ================================================ /* Copyright (c) 2003-2018, Troy D. Hanson http://troydhanson.github.com/uthash/ All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef UTHASH_H #define UTHASH_H #define UTHASH_VERSION 2.0.2 #include /* memcmp, memset, strlen */ #include /* ptrdiff_t */ #include /* exit */ /* These macros use decltype or the earlier __typeof GNU extension. As decltype is only available in newer compilers (VS2010 or gcc 4.3+ when compiling c++ source) this code uses whatever method is needed or, for VS2008 where neither is available, uses casting workarounds. */ #if !defined(DECLTYPE) && !defined(NO_DECLTYPE) #if defined(_MSC_VER) /* MS compiler */ #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ #define DECLTYPE(x) (decltype(x)) #else /* VS2008 or older (or VS2010 in C mode) */ #define NO_DECLTYPE #endif #elif defined(__BORLANDC__) || defined(__ICCARM__) || defined(__LCC__) || defined(__WATCOMC__) #define NO_DECLTYPE #else /* GNU, Sun and other compilers */ #define DECLTYPE(x) (__typeof(x)) #endif #endif #ifdef NO_DECLTYPE #define DECLTYPE(x) #define DECLTYPE_ASSIGN(dst,src) \ do { \ char **_da_dst = (char**)(&(dst)); \ *_da_dst = (char*)(src); \ } while (0) #else #define DECLTYPE_ASSIGN(dst,src) \ do { \ (dst) = DECLTYPE(dst)(src); \ } while (0) #endif /* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */ #if defined(_WIN32) #if defined(_MSC_VER) && _MSC_VER >= 1600 #include #elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__) #include #else typedef unsigned int uint32_t; typedef unsigned char uint8_t; #endif #elif defined(__GNUC__) && !defined(__VXWORKS__) #include #else typedef unsigned int uint32_t; typedef unsigned char uint8_t; #endif #ifndef uthash_malloc #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ #endif #ifndef uthash_free #define uthash_free(ptr,sz) free(ptr) /* free fcn */ #endif #ifndef uthash_bzero #define uthash_bzero(a,n) memset(a,'\0',n) #endif #ifndef uthash_memcmp #define uthash_memcmp(a,b,n) memcmp(a,b,n) #endif #ifndef uthash_strlen #define uthash_strlen(s) strlen(s) #endif #ifndef uthash_noexpand_fyi #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ #endif #ifndef uthash_expand_fyi #define uthash_expand_fyi(tbl) /* can be defined to log expands */ #endif #ifndef HASH_NONFATAL_OOM #define HASH_NONFATAL_OOM 0 #endif #if HASH_NONFATAL_OOM /* malloc failures can be recovered from */ #ifndef uthash_nonfatal_oom #define uthash_nonfatal_oom(obj) do {} while (0) /* non-fatal OOM error */ #endif #define HASH_RECORD_OOM(oomed) do { (oomed) = 1; } while (0) #define IF_HASH_NONFATAL_OOM(x) x #else /* malloc failures result in lost memory, hash tables are unusable */ #ifndef uthash_fatal #define uthash_fatal(msg) exit(-1) /* fatal OOM error */ #endif #define HASH_RECORD_OOM(oomed) uthash_fatal("out of memory") #define IF_HASH_NONFATAL_OOM(x) #endif /* initial number of buckets */ #define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ #define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ #define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ /* calculate the element whose hash handle address is hhp */ #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) /* calculate the hash handle from element address elp */ #define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle *)(((char*)(elp)) + ((tbl)->hho))) #define HASH_ROLLBACK_BKT(hh, head, itemptrhh) \ do { \ struct UT_hash_handle *_hd_hh_item = (itemptrhh); \ unsigned _hd_bkt; \ HASH_TO_BKT(_hd_hh_item->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ (head)->hh.tbl->buckets[_hd_bkt].count++; \ _hd_hh_item->hh_next = NULL; \ _hd_hh_item->hh_prev = NULL; \ } while (0) #define HASH_VALUE(keyptr,keylen,hashv) \ do { \ HASH_FCN(keyptr, keylen, hashv); \ } while (0) #define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out) \ do { \ (out) = NULL; \ if (head) { \ unsigned _hf_bkt; \ HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt); \ if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) { \ HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \ } \ } \ } while (0) #define HASH_FIND(hh,head,keyptr,keylen,out) \ do { \ unsigned _hf_hashv; \ HASH_VALUE(keyptr, keylen, _hf_hashv); \ HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ } while (0) #ifdef HASH_BLOOM #define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) #define HASH_BLOOM_MAKE(tbl,oomed) \ do { \ (tbl)->bloom_nbits = HASH_BLOOM; \ (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ if (!(tbl)->bloom_bv) { \ HASH_RECORD_OOM(oomed); \ } else { \ uthash_bzero((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ } \ } while (0) #define HASH_BLOOM_FREE(tbl) \ do { \ uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ } while (0) #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U))) #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U))) #define HASH_BLOOM_ADD(tbl,hashv) \ HASH_BLOOM_BITSET((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) #define HASH_BLOOM_TEST(tbl,hashv) \ HASH_BLOOM_BITTEST((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) #else #define HASH_BLOOM_MAKE(tbl,oomed) #define HASH_BLOOM_FREE(tbl) #define HASH_BLOOM_ADD(tbl,hashv) #define HASH_BLOOM_TEST(tbl,hashv) (1) #define HASH_BLOOM_BYTELEN 0U #endif #define HASH_MAKE_TABLE(hh,head,oomed) \ do { \ (head)->hh.tbl = (UT_hash_table*)uthash_malloc(sizeof(UT_hash_table)); \ if (!(head)->hh.tbl) { \ HASH_RECORD_OOM(oomed); \ } else { \ uthash_bzero((head)->hh.tbl, sizeof(UT_hash_table)); \ (head)->hh.tbl->tail = &((head)->hh); \ (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ (head)->hh.tbl->signature = HASH_SIGNATURE; \ if (!(head)->hh.tbl->buckets) { \ HASH_RECORD_OOM(oomed); \ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ } else { \ uthash_bzero((head)->hh.tbl->buckets, \ HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ HASH_BLOOM_MAKE((head)->hh.tbl, oomed); \ IF_HASH_NONFATAL_OOM( \ if (oomed) { \ uthash_free((head)->hh.tbl->buckets, \ HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ } \ ) \ } \ } \ } while (0) #define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \ do { \ (replaced) = NULL; \ HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ if (replaced) { \ HASH_DELETE(hh, head, replaced); \ } \ HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \ } while (0) #define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \ do { \ (replaced) = NULL; \ HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ if (replaced) { \ HASH_DELETE(hh, head, replaced); \ } \ HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \ } while (0) #define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ do { \ unsigned _hr_hashv; \ HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \ } while (0) #define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn) \ do { \ unsigned _hr_hashv; \ HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \ } while (0) #define HASH_APPEND_LIST(hh, head, add) \ do { \ (add)->hh.next = NULL; \ (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ (head)->hh.tbl->tail->next = (add); \ (head)->hh.tbl->tail = &((add)->hh); \ } while (0) #define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn) \ do { \ do { \ if (cmpfcn(DECLTYPE(head)(_hs_iter), add) > 0) { \ break; \ } \ } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ } while (0) #ifdef NO_DECLTYPE #undef HASH_AKBI_INNER_LOOP #define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn) \ do { \ char *_hs_saved_head = (char*)(head); \ do { \ DECLTYPE_ASSIGN(head, _hs_iter); \ if (cmpfcn(head, add) > 0) { \ DECLTYPE_ASSIGN(head, _hs_saved_head); \ break; \ } \ DECLTYPE_ASSIGN(head, _hs_saved_head); \ } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ } while (0) #endif #if HASH_NONFATAL_OOM #define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed) \ do { \ if (!(oomed)) { \ unsigned _ha_bkt; \ (head)->hh.tbl->num_items++; \ HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ if (oomed) { \ HASH_ROLLBACK_BKT(hh, head, &(add)->hh); \ HASH_DELETE_HH(hh, head, &(add)->hh); \ (add)->hh.tbl = NULL; \ uthash_nonfatal_oom(add); \ } else { \ HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ } \ } else { \ (add)->hh.tbl = NULL; \ uthash_nonfatal_oom(add); \ } \ } while (0) #else #define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed) \ do { \ unsigned _ha_bkt; \ (head)->hh.tbl->num_items++; \ HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ } while (0) #endif #define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \ do { \ IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; ) \ (add)->hh.hashv = (hashval); \ (add)->hh.key = (char*) (keyptr); \ (add)->hh.keylen = (unsigned) (keylen_in); \ if (!(head)) { \ (add)->hh.next = NULL; \ (add)->hh.prev = NULL; \ HASH_MAKE_TABLE(hh, add, _ha_oomed); \ IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { ) \ (head) = (add); \ IF_HASH_NONFATAL_OOM( } ) \ } else { \ void *_hs_iter = (head); \ (add)->hh.tbl = (head)->hh.tbl; \ HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn); \ if (_hs_iter) { \ (add)->hh.next = _hs_iter; \ if (((add)->hh.prev = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev)) { \ HH_FROM_ELMT((head)->hh.tbl, (add)->hh.prev)->next = (add); \ } else { \ (head) = (add); \ } \ HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev = (add); \ } else { \ HASH_APPEND_LIST(hh, head, add); \ } \ } \ HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE_INORDER"); \ } while (0) #define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn) \ do { \ unsigned _hs_hashv; \ HASH_VALUE(keyptr, keylen_in, _hs_hashv); \ HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \ } while (0) #define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \ HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn) #define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn) \ HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn) #define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add) \ do { \ IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; ) \ (add)->hh.hashv = (hashval); \ (add)->hh.key = (char*) (keyptr); \ (add)->hh.keylen = (unsigned) (keylen_in); \ if (!(head)) { \ (add)->hh.next = NULL; \ (add)->hh.prev = NULL; \ HASH_MAKE_TABLE(hh, add, _ha_oomed); \ IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { ) \ (head) = (add); \ IF_HASH_NONFATAL_OOM( } ) \ } else { \ (add)->hh.tbl = (head)->hh.tbl; \ HASH_APPEND_LIST(hh, head, add); \ } \ HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE"); \ } while (0) #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ do { \ unsigned _ha_hashv; \ HASH_VALUE(keyptr, keylen_in, _ha_hashv); \ HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add); \ } while (0) #define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add) \ HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add) #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) #define HASH_TO_BKT(hashv,num_bkts,bkt) \ do { \ bkt = ((hashv) & ((num_bkts) - 1U)); \ } while (0) /* delete "delptr" from the hash table. * "the usual" patch-up process for the app-order doubly-linked-list. * The use of _hd_hh_del below deserves special explanation. * These used to be expressed using (delptr) but that led to a bug * if someone used the same symbol for the head and deletee, like * HASH_DELETE(hh,users,users); * We want that to work, but by changing the head (users) below * we were forfeiting our ability to further refer to the deletee (users) * in the patch-up process. Solution: use scratch space to * copy the deletee pointer, then the latter references are via that * scratch pointer rather than through the repointed (users) symbol. */ #define HASH_DELETE(hh,head,delptr) \ HASH_DELETE_HH(hh, head, &(delptr)->hh) #define HASH_DELETE_HH(hh,head,delptrhh) \ do { \ struct UT_hash_handle *_hd_hh_del = (delptrhh); \ if ((_hd_hh_del->prev == NULL) && (_hd_hh_del->next == NULL)) { \ HASH_BLOOM_FREE((head)->hh.tbl); \ uthash_free((head)->hh.tbl->buckets, \ (head)->hh.tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ (head) = NULL; \ } else { \ unsigned _hd_bkt; \ if (_hd_hh_del == (head)->hh.tbl->tail) { \ (head)->hh.tbl->tail = HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev); \ } \ if (_hd_hh_del->prev != NULL) { \ HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev)->next = _hd_hh_del->next; \ } else { \ DECLTYPE_ASSIGN(head, _hd_hh_del->next); \ } \ if (_hd_hh_del->next != NULL) { \ HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->next)->prev = _hd_hh_del->prev; \ } \ HASH_TO_BKT(_hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ HASH_DEL_IN_BKT((head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ (head)->hh.tbl->num_items--; \ } \ HASH_FSCK(hh, head, "HASH_DELETE_HH"); \ } while (0) /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ #define HASH_FIND_STR(head,findstr,out) \ do { \ unsigned _uthash_hfstr_keylen = (unsigned)uthash_strlen(findstr); \ HASH_FIND(hh, head, findstr, _uthash_hfstr_keylen, out); \ } while (0) #define HASH_ADD_STR(head,strfield,add) \ do { \ unsigned _uthash_hastr_keylen = (unsigned)uthash_strlen((add)->strfield); \ HASH_ADD(hh, head, strfield[0], _uthash_hastr_keylen, add); \ } while (0) #define HASH_REPLACE_STR(head,strfield,add,replaced) \ do { \ unsigned _uthash_hrstr_keylen = (unsigned)uthash_strlen((add)->strfield); \ HASH_REPLACE(hh, head, strfield[0], _uthash_hrstr_keylen, add, replaced); \ } while (0) #define HASH_FIND_INT(head,findint,out) \ HASH_FIND(hh,head,findint,sizeof(int),out) #define HASH_ADD_INT(head,intfield,add) \ HASH_ADD(hh,head,intfield,sizeof(int),add) #define HASH_REPLACE_INT(head,intfield,add,replaced) \ HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) #define HASH_FIND_PTR(head,findptr,out) \ HASH_FIND(hh,head,findptr,sizeof(void *),out) #define HASH_ADD_PTR(head,ptrfield,add) \ HASH_ADD(hh,head,ptrfield,sizeof(void *),add) #define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) #define HASH_DEL(head,delptr) \ HASH_DELETE(hh,head,delptr) /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. */ #ifdef HASH_DEBUG #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) #define HASH_FSCK(hh,head,where) \ do { \ struct UT_hash_handle *_thh; \ if (head) { \ unsigned _bkt_i; \ unsigned _count = 0; \ char *_prev; \ for (_bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; ++_bkt_i) { \ unsigned _bkt_count = 0; \ _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ _prev = NULL; \ while (_thh) { \ if (_prev != (char*)(_thh->hh_prev)) { \ HASH_OOPS("%s: invalid hh_prev %p, actual %p\n", \ (where), (void*)_thh->hh_prev, (void*)_prev); \ } \ _bkt_count++; \ _prev = (char*)(_thh); \ _thh = _thh->hh_next; \ } \ _count += _bkt_count; \ if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ HASH_OOPS("%s: invalid bucket count %u, actual %u\n", \ (where), (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ } \ } \ if (_count != (head)->hh.tbl->num_items) { \ HASH_OOPS("%s: invalid hh item count %u, actual %u\n", \ (where), (head)->hh.tbl->num_items, _count); \ } \ _count = 0; \ _prev = NULL; \ _thh = &(head)->hh; \ while (_thh) { \ _count++; \ if (_prev != (char*)_thh->prev) { \ HASH_OOPS("%s: invalid prev %p, actual %p\n", \ (where), (void*)_thh->prev, (void*)_prev); \ } \ _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ _thh = (_thh->next ? HH_FROM_ELMT((head)->hh.tbl, _thh->next) : NULL); \ } \ if (_count != (head)->hh.tbl->num_items) { \ HASH_OOPS("%s: invalid app item count %u, actual %u\n", \ (where), (head)->hh.tbl->num_items, _count); \ } \ } \ } while (0) #else #define HASH_FSCK(hh,head,where) #endif /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to * the descriptor to which this macro is defined for tuning the hash function. * The app can #include to get the prototype for write(2). */ #ifdef HASH_EMIT_KEYS #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ do { \ unsigned _klen = fieldlen; \ write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ } while (0) #else #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) #endif /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ #ifdef HASH_FUNCTION #define HASH_FCN HASH_FUNCTION #else #define HASH_FCN HASH_JEN #endif /* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ #define HASH_BER(key,keylen,hashv) \ do { \ unsigned _hb_keylen = (unsigned)keylen; \ const unsigned char *_hb_key = (const unsigned char*)(key); \ (hashv) = 0; \ while (_hb_keylen-- != 0U) { \ (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ } \ } while (0) /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ #define HASH_SAX(key,keylen,hashv) \ do { \ unsigned _sx_i; \ const unsigned char *_hs_key = (const unsigned char*)(key); \ hashv = 0; \ for (_sx_i=0; _sx_i < keylen; _sx_i++) { \ hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ } \ } while (0) /* FNV-1a variation */ #define HASH_FNV(key,keylen,hashv) \ do { \ unsigned _fn_i; \ const unsigned char *_hf_key = (const unsigned char*)(key); \ (hashv) = 2166136261U; \ for (_fn_i=0; _fn_i < keylen; _fn_i++) { \ hashv = hashv ^ _hf_key[_fn_i]; \ hashv = hashv * 16777619U; \ } \ } while (0) #define HASH_OAT(key,keylen,hashv) \ do { \ unsigned _ho_i; \ const unsigned char *_ho_key=(const unsigned char*)(key); \ hashv = 0; \ for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ hashv += _ho_key[_ho_i]; \ hashv += (hashv << 10); \ hashv ^= (hashv >> 6); \ } \ hashv += (hashv << 3); \ hashv ^= (hashv >> 11); \ hashv += (hashv << 15); \ } while (0) #define HASH_JEN_MIX(a,b,c) \ do { \ a -= b; a -= c; a ^= ( c >> 13 ); \ b -= c; b -= a; b ^= ( a << 8 ); \ c -= a; c -= b; c ^= ( b >> 13 ); \ a -= b; a -= c; a ^= ( c >> 12 ); \ b -= c; b -= a; b ^= ( a << 16 ); \ c -= a; c -= b; c ^= ( b >> 5 ); \ a -= b; a -= c; a ^= ( c >> 3 ); \ b -= c; b -= a; b ^= ( a << 10 ); \ c -= a; c -= b; c ^= ( b >> 15 ); \ } while (0) #define HASH_JEN(key,keylen,hashv) \ do { \ unsigned _hj_i,_hj_j,_hj_k; \ unsigned const char *_hj_key=(unsigned const char*)(key); \ hashv = 0xfeedbeefu; \ _hj_i = _hj_j = 0x9e3779b9u; \ _hj_k = (unsigned)(keylen); \ while (_hj_k >= 12U) { \ _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + ( (unsigned)_hj_key[2] << 16 ) \ + ( (unsigned)_hj_key[3] << 24 ) ); \ _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + ( (unsigned)_hj_key[6] << 16 ) \ + ( (unsigned)_hj_key[7] << 24 ) ); \ hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + ( (unsigned)_hj_key[10] << 16 ) \ + ( (unsigned)_hj_key[11] << 24 ) ); \ \ HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ \ _hj_key += 12; \ _hj_k -= 12U; \ } \ hashv += (unsigned)(keylen); \ switch ( _hj_k ) { \ case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ case 1: _hj_i += _hj_key[0]; \ } \ HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ } while (0) /* The Paul Hsieh hash function */ #undef get16bits #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) #define get16bits(d) (*((const uint16_t *) (d))) #endif #if !defined (get16bits) #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ +(uint32_t)(((const uint8_t *)(d))[0]) ) #endif #define HASH_SFH(key,keylen,hashv) \ do { \ unsigned const char *_sfh_key=(unsigned const char*)(key); \ uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ \ unsigned _sfh_rem = _sfh_len & 3U; \ _sfh_len >>= 2; \ hashv = 0xcafebabeu; \ \ /* Main loop */ \ for (;_sfh_len > 0U; _sfh_len--) { \ hashv += get16bits (_sfh_key); \ _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \ hashv = (hashv << 16) ^ _sfh_tmp; \ _sfh_key += 2U*sizeof (uint16_t); \ hashv += hashv >> 11; \ } \ \ /* Handle end cases */ \ switch (_sfh_rem) { \ case 3: hashv += get16bits (_sfh_key); \ hashv ^= hashv << 16; \ hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \ hashv += hashv >> 11; \ break; \ case 2: hashv += get16bits (_sfh_key); \ hashv ^= hashv << 11; \ hashv += hashv >> 17; \ break; \ case 1: hashv += *_sfh_key; \ hashv ^= hashv << 10; \ hashv += hashv >> 1; \ } \ \ /* Force "avalanching" of final 127 bits */ \ hashv ^= hashv << 3; \ hashv += hashv >> 5; \ hashv ^= hashv << 4; \ hashv += hashv >> 17; \ hashv ^= hashv << 25; \ hashv += hashv >> 6; \ } while (0) #ifdef HASH_USING_NO_STRICT_ALIASING /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. * MurmurHash uses the faster approach only on CPU's where we know it's safe. * * Note the preprocessor built-in defines can be emitted using: * * gcc -m64 -dM -E - < /dev/null (on gcc) * cc -## a.c (where a.c is a simple test file) (Sun Studio) */ #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) #define MUR_GETBLOCK(p,i) p[i] #else /* non intel */ #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL) #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL) #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL) #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL) #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) #else /* assume little endian non-intel */ #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) #endif #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ MUR_ONE_THREE(p)))) #endif #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) #define MUR_FMIX(_h) \ do { \ _h ^= _h >> 16; \ _h *= 0x85ebca6bu; \ _h ^= _h >> 13; \ _h *= 0xc2b2ae35u; \ _h ^= _h >> 16; \ } while (0) #define HASH_MUR(key,keylen,hashv) \ do { \ const uint8_t *_mur_data = (const uint8_t*)(key); \ const int _mur_nblocks = (int)(keylen) / 4; \ uint32_t _mur_h1 = 0xf88D5353u; \ uint32_t _mur_c1 = 0xcc9e2d51u; \ uint32_t _mur_c2 = 0x1b873593u; \ uint32_t _mur_k1 = 0; \ const uint8_t *_mur_tail; \ const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \ int _mur_i; \ for (_mur_i = -_mur_nblocks; _mur_i != 0; _mur_i++) { \ _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ _mur_k1 *= _mur_c1; \ _mur_k1 = MUR_ROTL32(_mur_k1,15); \ _mur_k1 *= _mur_c2; \ \ _mur_h1 ^= _mur_k1; \ _mur_h1 = MUR_ROTL32(_mur_h1,13); \ _mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \ } \ _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \ _mur_k1=0; \ switch ((keylen) & 3U) { \ case 0: break; \ case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \ case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \ case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \ _mur_k1 *= _mur_c1; \ _mur_k1 = MUR_ROTL32(_mur_k1,15); \ _mur_k1 *= _mur_c2; \ _mur_h1 ^= _mur_k1; \ } \ _mur_h1 ^= (uint32_t)(keylen); \ MUR_FMIX(_mur_h1); \ hashv = _mur_h1; \ } while (0) #endif /* HASH_USING_NO_STRICT_ALIASING */ /* iterate over items in a known bucket to find desired item */ #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out) \ do { \ if ((head).hh_head != NULL) { \ DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head)); \ } else { \ (out) = NULL; \ } \ while ((out) != NULL) { \ if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) { \ if (uthash_memcmp((out)->hh.key, keyptr, keylen_in) == 0) { \ break; \ } \ } \ if ((out)->hh.hh_next != NULL) { \ DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ } else { \ (out) = NULL; \ } \ } \ } while (0) /* add an item to a bucket */ #define HASH_ADD_TO_BKT(head,hh,addhh,oomed) \ do { \ UT_hash_bucket *_ha_head = &(head); \ _ha_head->count++; \ (addhh)->hh_next = _ha_head->hh_head; \ (addhh)->hh_prev = NULL; \ if (_ha_head->hh_head != NULL) { \ _ha_head->hh_head->hh_prev = (addhh); \ } \ _ha_head->hh_head = (addhh); \ if ((_ha_head->count >= ((_ha_head->expand_mult + 1U) * HASH_BKT_CAPACITY_THRESH)) \ && !(addhh)->tbl->noexpand) { \ HASH_EXPAND_BUCKETS(addhh,(addhh)->tbl, oomed); \ IF_HASH_NONFATAL_OOM( \ if (oomed) { \ HASH_DEL_IN_BKT(head,addhh); \ } \ ) \ } \ } while (0) /* remove an item from a given bucket */ #define HASH_DEL_IN_BKT(head,delhh) \ do { \ UT_hash_bucket *_hd_head = &(head); \ _hd_head->count--; \ if (_hd_head->hh_head == (delhh)) { \ _hd_head->hh_head = (delhh)->hh_next; \ } \ if ((delhh)->hh_prev) { \ (delhh)->hh_prev->hh_next = (delhh)->hh_next; \ } \ if ((delhh)->hh_next) { \ (delhh)->hh_next->hh_prev = (delhh)->hh_prev; \ } \ } while (0) /* Bucket expansion has the effect of doubling the number of buckets * and redistributing the items into the new buckets. Ideally the * items will distribute more or less evenly into the new buckets * (the extent to which this is true is a measure of the quality of * the hash function as it applies to the key domain). * * With the items distributed into more buckets, the chain length * (item count) in each bucket is reduced. Thus by expanding buckets * the hash keeps a bound on the chain length. This bounded chain * length is the essence of how a hash provides constant time lookup. * * The calculation of tbl->ideal_chain_maxlen below deserves some * explanation. First, keep in mind that we're calculating the ideal * maximum chain length based on the *new* (doubled) bucket count. * In fractions this is just n/b (n=number of items,b=new num buckets). * Since the ideal chain length is an integer, we want to calculate * ceil(n/b). We don't depend on floating point arithmetic in this * hash, so to calculate ceil(n/b) with integers we could write * * ceil(n/b) = (n/b) + ((n%b)?1:0) * * and in fact a previous version of this hash did just that. * But now we have improved things a bit by recognizing that b is * always a power of two. We keep its base 2 log handy (call it lb), * so now we can write this with a bit shift and logical AND: * * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) * */ #define HASH_EXPAND_BUCKETS(hh,tbl,oomed) \ do { \ unsigned _he_bkt; \ unsigned _he_bkt_i; \ struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 2UL * (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ if (!_he_new_buckets) { \ HASH_RECORD_OOM(oomed); \ } else { \ uthash_bzero(_he_new_buckets, \ 2UL * (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ (tbl)->ideal_chain_maxlen = \ ((tbl)->num_items >> ((tbl)->log2_num_buckets+1U)) + \ ((((tbl)->num_items & (((tbl)->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ (tbl)->nonideal_items = 0; \ for (_he_bkt_i = 0; _he_bkt_i < (tbl)->num_buckets; _he_bkt_i++) { \ _he_thh = (tbl)->buckets[ _he_bkt_i ].hh_head; \ while (_he_thh != NULL) { \ _he_hh_nxt = _he_thh->hh_next; \ HASH_TO_BKT(_he_thh->hashv, (tbl)->num_buckets * 2U, _he_bkt); \ _he_newbkt = &(_he_new_buckets[_he_bkt]); \ if (++(_he_newbkt->count) > (tbl)->ideal_chain_maxlen) { \ (tbl)->nonideal_items++; \ _he_newbkt->expand_mult = _he_newbkt->count / (tbl)->ideal_chain_maxlen; \ } \ _he_thh->hh_prev = NULL; \ _he_thh->hh_next = _he_newbkt->hh_head; \ if (_he_newbkt->hh_head != NULL) { \ _he_newbkt->hh_head->hh_prev = _he_thh; \ } \ _he_newbkt->hh_head = _he_thh; \ _he_thh = _he_hh_nxt; \ } \ } \ uthash_free((tbl)->buckets, (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ (tbl)->num_buckets *= 2U; \ (tbl)->log2_num_buckets++; \ (tbl)->buckets = _he_new_buckets; \ (tbl)->ineff_expands = ((tbl)->nonideal_items > ((tbl)->num_items >> 1)) ? \ ((tbl)->ineff_expands+1U) : 0U; \ if ((tbl)->ineff_expands > 1U) { \ (tbl)->noexpand = 1; \ uthash_noexpand_fyi(tbl); \ } \ uthash_expand_fyi(tbl); \ } \ } while (0) /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ /* Note that HASH_SORT assumes the hash handle name to be hh. * HASH_SRT was added to allow the hash handle name to be passed in. */ #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) #define HASH_SRT(hh,head,cmpfcn) \ do { \ unsigned _hs_i; \ unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ if (head != NULL) { \ _hs_insize = 1; \ _hs_looping = 1; \ _hs_list = &((head)->hh); \ while (_hs_looping != 0U) { \ _hs_p = _hs_list; \ _hs_list = NULL; \ _hs_tail = NULL; \ _hs_nmerges = 0; \ while (_hs_p != NULL) { \ _hs_nmerges++; \ _hs_q = _hs_p; \ _hs_psize = 0; \ for (_hs_i = 0; _hs_i < _hs_insize; ++_hs_i) { \ _hs_psize++; \ _hs_q = ((_hs_q->next != NULL) ? \ HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ if (_hs_q == NULL) { \ break; \ } \ } \ _hs_qsize = _hs_insize; \ while ((_hs_psize != 0U) || ((_hs_qsize != 0U) && (_hs_q != NULL))) { \ if (_hs_psize == 0U) { \ _hs_e = _hs_q; \ _hs_q = ((_hs_q->next != NULL) ? \ HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ _hs_qsize--; \ } else if ((_hs_qsize == 0U) || (_hs_q == NULL)) { \ _hs_e = _hs_p; \ if (_hs_p != NULL) { \ _hs_p = ((_hs_p->next != NULL) ? \ HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ } \ _hs_psize--; \ } else if ((cmpfcn( \ DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_p)), \ DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_q)) \ )) <= 0) { \ _hs_e = _hs_p; \ if (_hs_p != NULL) { \ _hs_p = ((_hs_p->next != NULL) ? \ HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ } \ _hs_psize--; \ } else { \ _hs_e = _hs_q; \ _hs_q = ((_hs_q->next != NULL) ? \ HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ _hs_qsize--; \ } \ if ( _hs_tail != NULL ) { \ _hs_tail->next = ((_hs_e != NULL) ? \ ELMT_FROM_HH((head)->hh.tbl, _hs_e) : NULL); \ } else { \ _hs_list = _hs_e; \ } \ if (_hs_e != NULL) { \ _hs_e->prev = ((_hs_tail != NULL) ? \ ELMT_FROM_HH((head)->hh.tbl, _hs_tail) : NULL); \ } \ _hs_tail = _hs_e; \ } \ _hs_p = _hs_q; \ } \ if (_hs_tail != NULL) { \ _hs_tail->next = NULL; \ } \ if (_hs_nmerges <= 1U) { \ _hs_looping = 0; \ (head)->hh.tbl->tail = _hs_tail; \ DECLTYPE_ASSIGN(head, ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ } \ _hs_insize *= 2U; \ } \ HASH_FSCK(hh, head, "HASH_SRT"); \ } \ } while (0) /* This function selects items from one hash into another hash. * The end result is that the selected items have dual presence * in both hashes. There is no copy of the items made; rather * they are added into the new hash through a secondary hash * hash handle that must be present in the structure. */ #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ do { \ unsigned _src_bkt, _dst_bkt; \ void *_last_elt = NULL, *_elt; \ UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ if ((src) != NULL) { \ for (_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ for (_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ _src_hh != NULL; \ _src_hh = _src_hh->hh_next) { \ _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ if (cond(_elt)) { \ IF_HASH_NONFATAL_OOM( int _hs_oomed = 0; ) \ _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ _dst_hh->key = _src_hh->key; \ _dst_hh->keylen = _src_hh->keylen; \ _dst_hh->hashv = _src_hh->hashv; \ _dst_hh->prev = _last_elt; \ _dst_hh->next = NULL; \ if (_last_elt_hh != NULL) { \ _last_elt_hh->next = _elt; \ } \ if ((dst) == NULL) { \ DECLTYPE_ASSIGN(dst, _elt); \ HASH_MAKE_TABLE(hh_dst, dst, _hs_oomed); \ IF_HASH_NONFATAL_OOM( \ if (_hs_oomed) { \ uthash_nonfatal_oom(_elt); \ (dst) = NULL; \ continue; \ } \ ) \ } else { \ _dst_hh->tbl = (dst)->hh_dst.tbl; \ } \ HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt], hh_dst, _dst_hh, _hs_oomed); \ (dst)->hh_dst.tbl->num_items++; \ IF_HASH_NONFATAL_OOM( \ if (_hs_oomed) { \ HASH_ROLLBACK_BKT(hh_dst, dst, _dst_hh); \ HASH_DELETE_HH(hh_dst, dst, _dst_hh); \ _dst_hh->tbl = NULL; \ uthash_nonfatal_oom(_elt); \ continue; \ } \ ) \ HASH_BLOOM_ADD(_dst_hh->tbl, _dst_hh->hashv); \ _last_elt = _elt; \ _last_elt_hh = _dst_hh; \ } \ } \ } \ } \ HASH_FSCK(hh_dst, dst, "HASH_SELECT"); \ } while (0) #define HASH_CLEAR(hh,head) \ do { \ if ((head) != NULL) { \ HASH_BLOOM_FREE((head)->hh.tbl); \ uthash_free((head)->hh.tbl->buckets, \ (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ (head) = NULL; \ } \ } while (0) #define HASH_OVERHEAD(hh,head) \ (((head) != NULL) ? ( \ (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ sizeof(UT_hash_table) + \ (HASH_BLOOM_BYTELEN))) : 0U) #ifdef NO_DECLTYPE #define HASH_ITER(hh,head,el,tmp) \ for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \ (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL))) #else #define HASH_ITER(hh,head,el,tmp) \ for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \ (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL))) #endif /* obtain a count of items in the hash */ #define HASH_COUNT(head) HASH_CNT(hh,head) #define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U) typedef struct UT_hash_bucket { struct UT_hash_handle *hh_head; unsigned count; /* expand_mult is normally set to 0. In this situation, the max chain length * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If * the bucket's chain exceeds this length, bucket expansion is triggered). * However, setting expand_mult to a non-zero value delays bucket expansion * (that would be triggered by additions to this particular bucket) * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. * (The multiplier is simply expand_mult+1). The whole idea of this * multiplier is to reduce bucket expansions, since they are expensive, in * situations where we know that a particular bucket tends to be overused. * It is better to let its chain length grow to a longer yet-still-bounded * value, than to do an O(n) bucket expansion too often. */ unsigned expand_mult; } UT_hash_bucket; /* random signature used only to find hash tables in external analysis */ #define HASH_SIGNATURE 0xa0111fe1u #define HASH_BLOOM_SIGNATURE 0xb12220f2u typedef struct UT_hash_table { UT_hash_bucket *buckets; unsigned num_buckets, log2_num_buckets; unsigned num_items; struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ /* in an ideal situation (all buckets used equally), no bucket would have * more than ceil(#items/#buckets) items. that's the ideal chain length. */ unsigned ideal_chain_maxlen; /* nonideal_items is the number of items in the hash whose chain position * exceeds the ideal chain maxlen. these items pay the penalty for an uneven * hash distribution; reaching them in a chain traversal takes >ideal steps */ unsigned nonideal_items; /* ineffective expands occur when a bucket doubling was performed, but * afterward, more than half the items in the hash had nonideal chain * positions. If this happens on two consecutive expansions we inhibit any * further expansion, as it's not helping; this happens when the hash * function isn't a good fit for the key domain. When expansion is inhibited * the hash will still work, albeit no longer in constant time. */ unsigned ineff_expands, noexpand; uint32_t signature; /* used only to find hash tables in external analysis */ #ifdef HASH_BLOOM uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ uint8_t *bloom_bv; uint8_t bloom_nbits; #endif } UT_hash_table; typedef struct UT_hash_handle { struct UT_hash_table *tbl; void *prev; /* prev element in app order */ void *next; /* next element in app order */ struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ struct UT_hash_handle *hh_next; /* next hh in bucket order */ void *key; /* ptr to enclosing struct's key */ unsigned keylen; /* enclosing struct's key len */ unsigned hashv; /* result of hash-fcn(key) */ } UT_hash_handle; #endif /* UTHASH_H */ ================================================ FILE: instrumentation/winafl_alloc_inl.h ================================================ /* american fuzzy lop - error-checking, memory-zeroing alloc routines ------------------------------------------------------------------ Original AFL code written by Michal Zalewski Windows fork written and maintained by Ivan Fratric Copyright 2016 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. This file has been modified from the original to suit the purposes of this project. */ #ifndef _HAVE_ALLOC_INL_H #define _HAVE_ALLOC_INL_H #include #include #include #include "winafl_config.h" #include "winafl_types.h" #include "winafl_debug.h" /* User-facing macro to sprintf() to a dynamically allocated buffer. */ /* #define alloc_printf(_str, ...) ({ \ u8* _tmp; \ s32 _len = snprintf(NULL, 0, _str); \ if (_len < 0) FATAL("Whoa, snprintf() fails?!"); \ _tmp = ck_alloc(_len + 1); \ snprintf((char*)_tmp, _len + 1, _str); \ _tmp; \ }) */ /* Macro to enforce allocation limits as a last-resort defense against integer overflows. */ #define ALLOC_CHECK_SIZE(_s) do { \ if ((_s) > MAX_ALLOC) \ ABORT("Bad alloc request: %u bytes", (_s)); \ } while (0) /* Macro to check malloc() failures and the like. */ #define ALLOC_CHECK_RESULT(_r, _s) do { \ if (!(_r)) \ ABORT("Out of memory: can't allocate %u bytes", (_s)); \ } while (0) /* Magic tokens used to mark used / freed chunks. */ #define ALLOC_MAGIC_C1 0xFF00FF00 /* Used head (dword) */ #define ALLOC_MAGIC_F 0xFE00FE00 /* Freed head (dword) */ #define ALLOC_MAGIC_C2 0xF0 /* Used tail (byte) */ /* Positions of guard tokens in relation to the user-visible pointer. */ #define ALLOC_C1(_ptr) (((u32*)(_ptr))[-2]) #define ALLOC_S(_ptr) (((u32*)(_ptr))[-1]) #define ALLOC_C2(_ptr) (((u8*)(_ptr))[ALLOC_S(_ptr)]) #define ALLOC_OFF_HEAD 8 #define ALLOC_OFF_TOTAL (ALLOC_OFF_HEAD + 1) /* Allocator increments for ck_realloc_block(). */ #define ALLOC_BLK_INC 256 /* Sanity-checking macros for pointers. */ #define CHECK_PTR(_p) do { \ if (_p) { \ if (ALLOC_C1(_p) ^ ALLOC_MAGIC_C1) {\ if (ALLOC_C1(_p) == ALLOC_MAGIC_F) \ ABORT("Use after free."); \ else ABORT("Corrupted head alloc canary."); \ } \ if (ALLOC_C2(_p) ^ ALLOC_MAGIC_C2) \ ABORT("Corrupted tail alloc canary."); \ } \ } while (0) #define CHECK_PTR_EXPR(_p) ({ \ typeof (_p) _tmp = (_p); \ CHECK_PTR(_tmp); \ _tmp; \ }) /* Allocate a buffer, explicitly not zeroing it. Returns NULL for zero-sized requests. */ static inline void* DFL_ck_alloc_nozero(u32 size) { char* ret; if (!size) return NULL; ALLOC_CHECK_SIZE(size); ret = (char *)malloc(size + ALLOC_OFF_TOTAL); ALLOC_CHECK_RESULT(ret, size); ret += ALLOC_OFF_HEAD; ALLOC_C1(ret) = ALLOC_MAGIC_C1; ALLOC_S(ret) = size; ALLOC_C2(ret) = ALLOC_MAGIC_C2; return ret; } /* Allocate a buffer, returning zeroed memory. */ static inline void* DFL_ck_alloc(u32 size) { void* mem; if (!size) return NULL; mem = DFL_ck_alloc_nozero(size); return memset(mem, 0, size); } /* Free memory, checking for double free and corrupted heap. When DEBUG_BUILD is set, the old memory will be also clobbered with 0xFF. */ static inline void DFL_ck_free(char* mem) { if (!mem) return; CHECK_PTR(mem); #ifdef DEBUG_BUILD /* Catch pointer issues sooner. */ memset(mem, 0xFF, ALLOC_S(mem)); #endif /* DEBUG_BUILD */ ALLOC_C1(mem) = ALLOC_MAGIC_F; free(mem - ALLOC_OFF_HEAD); } /* Re-allocate a buffer, checking for issues and zeroing any newly-added tail. With DEBUG_BUILD, the buffer is always reallocated to a new addresses and the old memory is clobbered with 0xFF. */ static inline void* DFL_ck_realloc(char* orig, u32 size) { char* ret; u32 old_size = 0; if (!size) { DFL_ck_free(orig); return NULL; } if (orig) { CHECK_PTR(orig); #ifndef DEBUG_BUILD ALLOC_C1(orig) = ALLOC_MAGIC_F; #endif /* !DEBUG_BUILD */ old_size = ALLOC_S(orig); orig -= ALLOC_OFF_HEAD; ALLOC_CHECK_SIZE(old_size); } ALLOC_CHECK_SIZE(size); #ifndef DEBUG_BUILD ret = (char *)realloc(orig, size + ALLOC_OFF_TOTAL); ALLOC_CHECK_RESULT(ret, size); #else /* Catch pointer issues sooner: force relocation and make sure that the original buffer is wiped. */ ret = malloc(size + ALLOC_OFF_TOTAL); ALLOC_CHECK_RESULT(ret, size); if (orig) { memcpy(ret + ALLOC_OFF_HEAD, orig + ALLOC_OFF_HEAD, MIN(size, old_size)); memset(orig + ALLOC_OFF_HEAD, 0xFF, old_size); ALLOC_C1(orig + ALLOC_OFF_HEAD) = ALLOC_MAGIC_F; free(orig); } #endif /* ^!DEBUG_BUILD */ ret += ALLOC_OFF_HEAD; ALLOC_C1(ret) = ALLOC_MAGIC_C1; ALLOC_S(ret) = size; ALLOC_C2(ret) = ALLOC_MAGIC_C2; if (size > old_size) memset(ret + old_size, 0, size - old_size); return ret; } /* Re-allocate a buffer with ALLOC_BLK_INC increments (used to speed up repeated small reallocs without complicating the user code). */ static inline void* DFL_ck_realloc_block(void* orig, u32 size) { #ifndef DEBUG_BUILD if (orig) { CHECK_PTR(orig); if (ALLOC_S(orig) >= size) return orig; size += ALLOC_BLK_INC; } #endif /* !DEBUG_BUILD */ return DFL_ck_realloc((char *)orig, size); } /* Create a buffer with a copy of a string. Returns NULL for NULL inputs. */ static inline u8* DFL_ck_strdup(u8* str) { char* ret; u32 size; if (!str) return NULL; size = strlen((char*)str) + 1; ALLOC_CHECK_SIZE(size); ret = (char *)malloc(size + ALLOC_OFF_TOTAL); ALLOC_CHECK_RESULT(ret, size); ret += ALLOC_OFF_HEAD; ALLOC_C1(ret) = ALLOC_MAGIC_C1; ALLOC_S(ret) = size; ALLOC_C2(ret) = ALLOC_MAGIC_C2; return (u8 *)memcpy(ret, str, size); } /* Create a buffer with a copy of a memory block. Returns NULL for zero-sized or NULL inputs. */ static inline void* DFL_ck_memdup(void* mem, u32 size) { char* ret; if (!mem || !size) return NULL; ALLOC_CHECK_SIZE(size); ret = (char *)malloc(size + ALLOC_OFF_TOTAL); ALLOC_CHECK_RESULT(ret, size); ret += ALLOC_OFF_HEAD; ALLOC_C1(ret) = ALLOC_MAGIC_C1; ALLOC_S(ret) = size; ALLOC_C2(ret) = ALLOC_MAGIC_C2; return memcpy(ret, mem, size); } /* Create a buffer with a block of text, appending a NUL terminator at the end. Returns NULL for zero-sized or NULL inputs. */ static inline u8* DFL_ck_memdup_str(u8* mem, u32 size) { u8* ret; if (!mem || !size) return NULL; ALLOC_CHECK_SIZE(size); ret = (u8*)malloc(size + ALLOC_OFF_TOTAL + 1); ALLOC_CHECK_RESULT(ret, size); ret += ALLOC_OFF_HEAD; ALLOC_C1(ret) = ALLOC_MAGIC_C1; ALLOC_S(ret) = size; ALLOC_C2(ret) = ALLOC_MAGIC_C2; memcpy(ret, mem, size); ret[size] = 0; return ret; } #ifndef DEBUG_BUILD /* In non-debug mode, we just do straightforward aliasing of the above functions to user-visible names such as ck_alloc(). */ #define ck_alloc DFL_ck_alloc #define ck_alloc_nozero DFL_ck_alloc_nozero #define ck_realloc DFL_ck_realloc #define ck_realloc_block DFL_ck_realloc_block #define ck_strdup DFL_ck_strdup #define ck_memdup DFL_ck_memdup #define ck_memdup_str DFL_ck_memdup_str #define ck_free DFL_ck_free #define alloc_report() #else /* In debugging mode, we also track allocations to detect memory leaks, and the flow goes through one more layer of indirection. */ /* Alloc tracking data structures: */ #define ALLOC_BUCKETS 4096 struct TRK_obj { void *ptr; char *file, *func; u32 line; }; #ifdef AFL_MAIN struct TRK_obj* TRK[ALLOC_BUCKETS]; u32 TRK_cnt[ALLOC_BUCKETS]; # define alloc_report() TRK_report() #else extern struct TRK_obj* TRK[ALLOC_BUCKETS]; extern u32 TRK_cnt[ALLOC_BUCKETS]; # define alloc_report() #endif /* ^AFL_MAIN */ /* Bucket-assigning function for a given pointer: */ #define TRKH(_ptr) (((((u32)(_ptr)) >> 16) ^ ((u32)(_ptr))) % ALLOC_BUCKETS) /* Add a new entry to the list of allocated objects. */ static inline void TRK_alloc_buf(void* ptr, const char* file, const char* func, u32 line) { u32 i, bucket; if (!ptr) return; bucket = TRKH(ptr); /* Find a free slot in the list of entries for that bucket. */ for (i = 0; i < TRK_cnt[bucket]; i++) if (!TRK[bucket][i].ptr) { TRK[bucket][i].ptr = ptr; TRK[bucket][i].file = (char*)file; TRK[bucket][i].func = (char*)func; TRK[bucket][i].line = line; return; } /* No space available - allocate more. */ TRK[bucket] = DFL_ck_realloc_block(TRK[bucket], (TRK_cnt[bucket] + 1) * sizeof(struct TRK_obj)); TRK[bucket][i].ptr = ptr; TRK[bucket][i].file = (char*)file; TRK[bucket][i].func = (char*)func; TRK[bucket][i].line = line; TRK_cnt[bucket]++; } /* Remove entry from the list of allocated objects. */ static inline void TRK_free_buf(void* ptr, const char* file, const char* func, u32 line) { u32 i, bucket; if (!ptr) return; bucket = TRKH(ptr); /* Find the element on the list... */ for (i = 0; i < TRK_cnt[bucket]; i++) if (TRK[bucket][i].ptr == ptr) { TRK[bucket][i].ptr = 0; return; } WARNF("ALLOC: Attempt to free non-allocated memory in %s (%s:%u)", func, file, line); } /* Do a final report on all non-deallocated objects. */ static inline void TRK_report(void) { u32 i, bucket; fflush(0); for (bucket = 0; bucket < ALLOC_BUCKETS; bucket++) for (i = 0; i < TRK_cnt[bucket]; i++) if (TRK[bucket][i].ptr) WARNF("ALLOC: Memory never freed, created in %s (%s:%u)", TRK[bucket][i].func, TRK[bucket][i].file, TRK[bucket][i].line); } /* Simple wrappers for non-debugging functions: */ static inline void* TRK_ck_alloc(u32 size, const char* file, const char* func, u32 line) { void* ret = DFL_ck_alloc(size); TRK_alloc_buf(ret, file, func, line); return ret; } static inline void* TRK_ck_realloc(void* orig, u32 size, const char* file, const char* func, u32 line) { void* ret = DFL_ck_realloc(orig, size); TRK_free_buf(orig, file, func, line); TRK_alloc_buf(ret, file, func, line); return ret; } static inline void* TRK_ck_realloc_block(void* orig, u32 size, const char* file, const char* func, u32 line) { void* ret = DFL_ck_realloc_block(orig, size); TRK_free_buf(orig, file, func, line); TRK_alloc_buf(ret, file, func, line); return ret; } static inline void* TRK_ck_strdup(u8* str, const char* file, const char* func, u32 line) { void* ret = DFL_ck_strdup(str); TRK_alloc_buf(ret, file, func, line); return ret; } static inline void* TRK_ck_memdup(void* mem, u32 size, const char* file, const char* func, u32 line) { void* ret = DFL_ck_memdup(mem, size); TRK_alloc_buf(ret, file, func, line); return ret; } static inline void* TRK_ck_memdup_str(void* mem, u32 size, const char* file, const char* func, u32 line) { void* ret = DFL_ck_memdup_str(mem, size); TRK_alloc_buf(ret, file, func, line); return ret; } static inline void TRK_ck_free(void* ptr, const char* file, const char* func, u32 line) { TRK_free_buf(ptr, file, func, line); DFL_ck_free(ptr); } /* Aliasing user-facing names to tracking functions: */ #define ck_alloc(_p1) \ TRK_ck_alloc(_p1, __FILE__, __FUNCTION__, __LINE__) #define ck_alloc_nozero(_p1) \ TRK_ck_alloc(_p1, __FILE__, __FUNCTION__, __LINE__) #define ck_realloc(_p1, _p2) \ TRK_ck_realloc(_p1, _p2, __FILE__, __FUNCTION__, __LINE__) #define ck_realloc_block(_p1, _p2) \ TRK_ck_realloc_block(_p1, _p2, __FILE__, __FUNCTION__, __LINE__) #define ck_strdup(_p1) \ TRK_ck_strdup(_p1, __FILE__, __FUNCTION__, __LINE__) #define ck_memdup(_p1, _p2) \ TRK_ck_memdup(_p1, _p2, __FILE__, __FUNCTION__, __LINE__) #define ck_memdup_str(_p1, _p2) \ TRK_ck_memdup_str(_p1, _p2, __FILE__, __FUNCTION__, __LINE__) #define ck_free(_p1) \ TRK_ck_free(_p1, __FILE__, __FUNCTION__, __LINE__) #endif /* ^!DEBUG_BUILD */ #endif /* ! _HAVE_ALLOC_INL_H */ ================================================ FILE: instrumentation/winafl_config.h ================================================ /* american fuzzy lop - vaguely configurable bits ---------------------------------------------- Original AFL code written by Michal Zalewski Windows fork written and maintained by Ivan Fratric Copyright 2016 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. This file has been modified from the original to suit the purposes of this project. */ #ifndef _HAVE_CONFIG_H #define _HAVE_CONFIG_H #define inline __inline #include "winafl_types.h" /****************************************************** * * * Settings that may be of interest to power users: * * * ******************************************************/ /* Comment out to disable terminal colors: */ // #define USE_COLOR /* Comment out to disable fancy ANSI boxes and use poor man's 7-bit UI: */ //#define FANCY_BOXES /* Default timeout for fuzzed code (milliseconds): */ #define EXEC_TIMEOUT 1000 /* Timeout rounding factor when auto-scaling (milliseconds): */ #define EXEC_TM_ROUND 20 /* Default memory limit for child process (MB): */ #ifndef __x86_64__ # define MEM_LIMIT 25 #else # define MEM_LIMIT 50 #endif /* ^!__x86_64__ */ /* Default memory limit when running in QEMU mode (MB): */ #define MEM_LIMIT_QEMU 200 /* Number of calibration cycles per every new test case (and for test cases that show variable behavior): */ #define CAL_CYCLES 10 #define CAL_CYCLES_LONG 40 /* The same, but when AFL_NO_VAR_CHECK is set in the environment: */ #define CAL_CYCLES_NO_VAR 4 /* Number of subsequent hangs before abandoning an input file: */ #define HANG_LIMIT 250 /* Maximum number of unique hangs or crashes to record: */ #define KEEP_UNIQUE_HANG 500 #define KEEP_UNIQUE_CRASH 5000 /* Baseline number of random tweaks during a single 'havoc' stage: */ #define HAVOC_CYCLES 5000 /* Maximum multiplier for the above (should be a power of two, beware of 32-bit int overflows): */ #define HAVOC_MAX_MULT 16 /* Absolute minimum number of havoc cycles (after all adjustments): */ #define HAVOC_MIN 10 /* Maximum stacking for havoc-stage tweaks. The actual value is calculated like this: n = random between 1 and HAVOC_STACK_POW2 stacking = 2^n In other words, the default (n = 7) produces 2, 4, 8, 16, 32, 64, or 128 stacked tweaks: */ #define HAVOC_STACK_POW2 7 /* Caps on block sizes for cloning and deletion operations. Each of these ranges has a 33% probability of getting picked, except for the first two cycles where smaller blocks are favored: */ #define HAVOC_BLK_SMALL 32 #define HAVOC_BLK_MEDIUM 128 #define HAVOC_BLK_LARGE 1500 /* Probabilities of skipping non-favored entries in the queue, expressed as percentages: */ #define SKIP_TO_NEW_PROB 99 /* ...when there are new, pending favorites */ #define SKIP_NFAV_OLD_PROB 95 /* ...no new favs, cur entry already fuzzed */ #define SKIP_NFAV_NEW_PROB 75 /* ...no new favs, cur entry not fuzzed yet */ /* Splicing cycle count: */ #define SPLICE_CYCLES 20 /* Nominal per-splice havoc cycle length: */ #define SPLICE_HAVOC 500 /* Maximum offset for integer addition / subtraction stages: */ #define ARITH_MAX 35 /* Limits for the test case trimmer. The absolute minimum chunk size; and the starting and ending divisors for chopping up the input file: */ #define TRIM_MIN_BYTES 4 #define TRIM_START_STEPS 16 #define TRIM_END_STEPS 1024 /* Maximum size of input file, in bytes (keep under 100MB): */ #define MAX_FILE (1 * 1024 * 1024) /* The same, for the test case minimizer: */ #define TMIN_MAX_FILE (10 * 1024 * 1024) /* Block normalization steps for afl-tmin: */ #define TMIN_SET_MIN_SIZE 4 #define TMIN_SET_STEPS 128 /* Maximum dictionary token size (-x), in bytes: */ #define MAX_DICT_FILE 128 /* Length limits for auto-detected dictionary tokens: */ #define MIN_AUTO_EXTRA 3 #define MAX_AUTO_EXTRA 32 /* Maximum number of user-specified dictionary tokens to use in deterministic steps; past this point, the "extras/user" step will be still carried out, but with proportionally lower odds: */ #define MAX_DET_EXTRAS 200 /* Maximum number of auto-extracted dictionary tokens to actually use in fuzzing (first value), and to keep in memory as candidates. The latter should be much higher than the former. */ #define USE_AUTO_EXTRAS 50 #define MAX_AUTO_EXTRAS (USE_AUTO_EXTRAS * 10) /* Scaling factor for the effector map used to skip some of the more expensive deterministic steps. The actual divisor is set to 2^EFF_MAP_SCALE2 bytes: */ #define EFF_MAP_SCALE2 3 /* Minimum input file length at which the effector logic kicks in: */ #define EFF_MIN_LEN 128 /* Maximum effector density past which everything is just fuzzed unconditionally (%): */ #define EFF_MAX_PERC 90 /* UI refresh frequency (Hz): */ #define UI_TARGET_HZ 1 /* Fuzzer stats file and plot update intervals (sec): */ #define STATS_UPDATE_SEC 60 #define PLOT_UPDATE_SEC 5 /* Smoothing divisor for CPU load and exec speed stats (1 - no smoothing). */ #define AVG_SMOOTHING 16 /* Sync interval (every n havoc cycles): */ #define SYNC_INTERVAL 5 /* Output directory reuse grace period (minutes): */ #define OUTPUT_GRACE 25 /* Uncomment to use simple file names (id_NNNNNN): */ #define SIMPLE_FILES /* List of interesting values to use in fuzzing. */ #define INTERESTING_8 \ -128, /* Overflow signed 8-bit when decremented */ \ -1, /* */ \ 0, /* */ \ 1, /* */ \ 16, /* One-off with common buffer size */ \ 32, /* One-off with common buffer size */ \ 64, /* One-off with common buffer size */ \ 100, /* One-off with common buffer size */ \ 127 /* Overflow signed 8-bit when incremented */ #define INTERESTING_16 \ -32768, /* Overflow signed 16-bit when decremented */ \ -129, /* Overflow signed 8-bit */ \ 128, /* Overflow signed 8-bit */ \ 255, /* Overflow unsig 8-bit when incremented */ \ 256, /* Overflow unsig 8-bit */ \ 512, /* One-off with common buffer size */ \ 1000, /* One-off with common buffer size */ \ 1024, /* One-off with common buffer size */ \ 4096, /* One-off with common buffer size */ \ 32767 /* Overflow signed 16-bit when incremented */ #define INTERESTING_32 \ -2147483648LL, /* Overflow signed 32-bit when decremented */ \ -100663046, /* Large negative number (endian-agnostic) */ \ -32769, /* Overflow signed 16-bit */ \ 32768, /* Overflow signed 16-bit */ \ 65535, /* Overflow unsig 16-bit when incremented */ \ 65536, /* Overflow unsig 16 bit */ \ 100663045, /* Large positive number (endian-agnostic) */ \ 2147483647 /* Overflow signed 32-bit when incremented */ /*********************************************************** * * * Really exotic stuff you probably don't want to touch: * * * ***********************************************************/ /* Call count interval between reseeding the libc PRNG from /dev/urandom: */ #define RESEED_RNG 10000 /* Maximum line length passed from GCC to 'as' and used for parsing configuration files: */ #define MAX_LINE 8192 /* Environment variable used to pass SHM ID to the called program. */ #define SHM_ENV_VAR "__AFL_SHM_ID" /* Other less interesting, internal-only variables. */ #define CLANG_ENV_VAR "__AFL_CLANG_MODE" #define AS_LOOP_ENV_VAR "__AFL_AS_LOOPCHECK" #define PERSIST_ENV_VAR "__AFL_PERSISTENT" #define DEFER_ENV_VAR "__AFL_DEFER_FORKSRV" /* In-code signatures for deferred and persistent mode. */ #define PERSIST_SIG "##SIG_AFL_PERSISTENT##" #define DEFER_SIG "##SIG_AFL_DEFER_FORKSRV##" /* Distinctive bitmap signature used to indicate failed execution: */ #define EXEC_FAIL_SIG 0xfee1dead /* Distinctive exit code used to indicate MSAN trip condition: */ #define MSAN_ERROR 86 /* Designated file descriptors for forkserver commands (the application will use FORKSRV_FD and FORKSRV_FD + 1): */ #define FORKSRV_FD 198 /* Fork server init timeout multiplier: we'll wait the user-selected timeout plus this much for the fork server to spin up. */ #define FORK_WAIT_MULT 10 /* Calibration timeout adjustments, to be a bit more generous when resuming fuzzing sessions or trying to calibrate already-added internal finds. The first value is a percentage, the other is in milliseconds: */ #define CAL_TMOUT_PERC 125 #define CAL_TMOUT_ADD 50 /* Number of chances to calibrate a case before giving up: */ #define CAL_CHANCES 3 /* Map size for the traced binary (2^MAP_SIZE_POW2). Must be greater than 2; you probably want to keep it under 18 or so for performance reasons (adjusting AFL_INST_RATIO when compiling is probably a better way to solve problems with complex programs). You need to recompile the target binary after changing this - otherwise, SEGVs may ensue. */ #define MAP_SIZE_POW2 16 #define MAP_SIZE (1 << MAP_SIZE_POW2) /* Maximum allocator request size (keep well under INT_MAX): */ #define MAX_ALLOC 0x40000000 /* A made-up hashing seed: */ #define HASH_CONST 0xa5b35705 /* Constants for afl-gotcpu to control busy loop timing: */ #define CTEST_TARGET_MS 5000 #define CTEST_BUSY_CYCLES (10 * 1000 * 1000) /* Uncomment this to use inferior block-coverage-based instrumentation. Note that you need to recompile the target binary for this to have any effect: */ // #define COVERAGE_ONLY /* Uncomment this to ignore hit counts and output just one bit per tuple. As with the previous setting, you will need to recompile the target binary: */ // #define SKIP_COUNTS /* Uncomment this to use instrumentation data to record newly discovered paths, but do not use them as seeds for fuzzing. This is useful for conveniently measuring coverage that could be attained by a "dumb" fuzzing algorithm: */ // #define IGNORE_FINDS #define EDGES_SHM_SIZE (100 * 1024 * 1024) //100MB #endif /* ! _HAVE_CONFIG_H */ ================================================ FILE: instrumentation/winafl_debug.h ================================================ /* american fuzzy lop - debug / error handling macros -------------------------------------------------- Original AFL code written by Michal Zalewski Windows fork written and maintained by Ivan Fratric Copyright 2016 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. This file has been modified from the original to suit the purposes of this project. */ #ifndef _HAVE_DEBUG_H #define _HAVE_DEBUG_H #include #include "winafl_types.h" #include "winafl_config.h" /******************* * Terminal colors * *******************/ #ifdef USE_COLOR # define cBLK "\x1b[0;30m" # define cRED "\x1b[0;31m" # define cGRN "\x1b[0;32m" # define cBRN "\x1b[0;33m" # define cBLU "\x1b[0;34m" # define cMGN "\x1b[0;35m" # define cCYA "\x1b[0;36m" # define cNOR "\x1b[0;37m" # define cGRA "\x1b[1;30m" # define cLRD "\x1b[1;31m" # define cLGN "\x1b[1;32m" # define cYEL "\x1b[1;33m" # define cLBL "\x1b[1;34m" # define cPIN "\x1b[1;35m" # define cLCY "\x1b[1;36m" # define cBRI "\x1b[1;37m" # define cRST "\x1b[0m" #else # define cBLK "" # define cRED "" # define cGRN "" # define cBRN "" # define cBLU "" # define cMGN "" # define cCYA "" # define cNOR "" # define cGRA "" # define cLRD "" # define cLGN "" # define cYEL "" # define cLBL "" # define cPIN "" # define cLCY "" # define cBRI "" # define cRST "" #endif /* ^USE_COLOR */ /************************* * Box drawing sequences * *************************/ #ifdef FANCY_BOXES # define SET_G1 "\x1b)0" /* Set G1 for box drawing */ # define RESET_G1 "\x1b)B" /* Reset G1 to ASCII */ # define bSTART "\x0e" /* Enter G1 drawing mode */ # define bSTOP "\x0f" /* Leave G1 drawing mode */ # define bH "q" /* Horizontal line */ # define bV "x" /* Vertical line */ # define bLT "l" /* Left top corner */ # define bRT "k" /* Right top corner */ # define bLB "m" /* Left bottom corner */ # define bRB "j" /* Right bottom corner */ # define bX "n" /* Cross */ # define bVR "t" /* Vertical, branch right */ # define bVL "u" /* Vertical, branch left */ # define bHT "v" /* Horizontal, branch top */ # define bHB "w" /* Horizontal, branch bottom */ #else # define SET_G1 "" # define RESET_G1 "" # define bSTART "" # define bSTOP "" # define bH "-" # define bV "|" # define bLT "+" # define bRT "+" # define bLB "+" # define bRB "+" # define bX "+" # define bVR "+" # define bVL "+" # define bHT "+" # define bHB "+" #endif /* ^FANCY_BOXES */ /*********************** * Misc terminal codes * ***********************/ //#define TERM_HOME "\x1b[H" //#define TERM_CLEAR TERM_HOME "\x1b[2J" //#define cEOL "\x1b[0K" //#define CURSOR_HIDE "\x1b[?25l" //#define CURSOR_SHOW "\x1b[?25h" #define TERM_HOME "" #define TERM_CLEAR "" #define cEOL "" #define CURSOR_HIDE "" #define CURSOR_SHOW "" /************************ * Debug & error macros * ************************/ /* Just print stuff to the appropriate stream. */ #ifdef MESSAGES_TO_STDOUT # define SAYF(...) printf(__VA_ARGS__) #else # define SAYF(...) fprintf(stderr, __VA_ARGS__) #endif /* ^MESSAGES_TO_STDOUT */ /* Show a prefixed warning. */ #define WARNF(...) do { \ SAYF(cYEL "[!] " cBRI "WARNING: " cRST __VA_ARGS__); \ SAYF(cRST "\n"); \ } while (0) /* Show a prefixed "doing something" message. */ #define ACTF(...) do { \ SAYF(cLBL "[*] " cRST __VA_ARGS__); \ SAYF(cRST "\n"); \ } while (0) /* Show a prefixed "success" message. */ #define OKF(...) do { \ SAYF(cLGN "[+] " cRST __VA_ARGS__); \ SAYF(cRST "\n"); \ } while (0) /* Show a prefixed fatal error message (not used in afl). */ #define BADF(...) do { \ SAYF(cLRD "\n[-] " cRST __VA_ARGS__); \ SAYF(cRST "\n"); \ } while (0) /* Die with a verbose non-OS fatal error message. */ #define FATAL(...) do { \ SAYF(bSTOP RESET_G1 CURSOR_SHOW cLRD "\n[-] PROGRAM ABORT : " cBRI __VA_ARGS__); \ SAYF(cLRD "\n Location : " cRST "%s(), %s:%u\n\n", \ __FUNCTION__, __FILE__, __LINE__); \ exit(1); \ } while (0) /* Die by calling abort() to provide a core dump. */ #define ABORT(...) do { \ SAYF(bSTOP RESET_G1 CURSOR_SHOW cLRD "\n[-] PROGRAM ABORT : " cBRI __VA_ARGS__); \ SAYF(cLRD "\n Stop location : " cRST "%s(), %s:%u\n\n", \ __FUNCTION__, __FILE__, __LINE__); \ abort(); \ } while (0) /* Die while also including the output of perror(). */ #define PFATAL(...) do { \ fflush(stdout); \ SAYF(bSTOP RESET_G1 CURSOR_SHOW cLRD "\n[-] SYSTEM ERROR : " cBRI __VA_ARGS__); \ SAYF(cLRD "\n Stop location : " cRST "%s(), %s:%u\n", \ __FUNCTION__, __FILE__, __LINE__); \ SAYF(cLRD " OS message : " cRST "%s\n", strerror(errno)); \ exit(1); \ } while (0) /* Die with FAULT() or PFAULT() depending on the value of res (used to interpret different failure modes for read(), write(), etc). */ #define RPFATAL(res, ...) do { \ if (res < 0) PFATAL(__VA_ARGS__); else FATAL(__VA_ARGS__); \ } while (0) /* Error-checking versions of read() and write() that call RPFATAL() as appropriate. */ #define ck_write(fd, buf, len, fn) do { \ u32 _len = (len); \ s32 _res = _write(fd, buf, _len); \ if (_res != _len) RPFATAL(_res, "Short write to %s", fn); \ } while (0) #define ck_read(fd, buf, len, fn) do { \ u32 _len = (len); \ s32 _res = _read(fd, buf, _len); \ if (_res != _len) RPFATAL(_res, "Short read from %s", fn); \ } while (0) #endif /* ! _HAVE_DEBUG_H */ ================================================ FILE: instrumentation/winafl_hash.h ================================================ /* american fuzzy lop - hashing function ------------------------------------- Original AFL code written by Michal Zalewski Windows fork written and maintained by Ivan Fratric Copyright 2016 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. This file has been modified from the original to suit the purposes of this project. */ /* The hash32() function is a variant of MurmurHash3, a good non-cryptosafe hashing function developed by Austin Appleby. For simplicity, this variant does *NOT* accept buffer lengths that are not divisible by 8 bytes. The 32-bit version is otherwise similar to the original; the 64-bit one is a custom hack with mostly-unproven properties. Austin's original code is public domain. */ #ifndef _HAVE_HASH_H #define _HAVE_HASH_H #include "winafl_types.h" #if defined(_M_X64) || defined(__x86_64__) #define ROL64(_x, _r) ((((u64)(_x)) << (_r)) | (((u64)(_x)) >> (64 - (_r)))) static inline u32 hash32(const void* key, u32 len, u32 seed) { const u64* data = (u64*)key; u64 h1 = seed ^ len; len >>= 3; while (len--) { u64 k1 = *data++; k1 *= 0x87c37b91114253d5ULL; k1 = ROL64(k1, 31); k1 *= 0x4cf5ad432745937fULL; h1 ^= k1; h1 = ROL64(h1, 27); h1 = h1 * 5 + 0x52dce729; } h1 ^= h1 >> 33; h1 *= 0xff51afd7ed558ccdULL; h1 ^= h1 >> 33; h1 *= 0xc4ceb9fe1a85ec53ULL; h1 ^= h1 >> 33; return h1; } static inline u32 hash32_with_ignore(const void* key, const void * ignore, u32 len, u32 seed) { const u64* data = (u64*)key; const u64* ignore_data = (u64*)ignore; u64 h1 = seed ^ len; len >>= 3; while (len--) { u64 k1 = *data++; u64 i1 = *ignore_data++; k1 = k1 & i1; //mask off any ignore'd bytes k1 *= 0x87c37b91114253d5ULL; k1 = ROL64(k1, 31); k1 *= 0x4cf5ad432745937fULL; h1 ^= k1; h1 = ROL64(h1, 27); h1 = h1 * 5 + 0x52dce729; } h1 ^= h1 >> 33; h1 *= 0xff51afd7ed558ccdULL; h1 ^= h1 >> 33; h1 *= 0xc4ceb9fe1a85ec53ULL; h1 ^= h1 >> 33; return h1; } #else #define ROL32(_x, _r) ((((u32)(_x)) << (_r)) | (((u32)(_x)) >> (32 - (_r)))) static inline u32 hash32(const void* key, u32 len, u32 seed) { const u32* data = (u32*)key; u32 h1 = seed ^ len; len >>= 2; while (len--) { u32 k1 = *data++; k1 *= 0xcc9e2d51; k1 = ROL32(k1, 15); k1 *= 0x1b873593; h1 ^= k1; h1 = ROL32(h1, 13); h1 = h1 * 5 + 0xe6546b64; } h1 ^= h1 >> 16; h1 *= 0x85ebca6b; h1 ^= h1 >> 13; h1 *= 0xc2b2ae35; h1 ^= h1 >> 16; return h1; } static inline u32 hash32_with_ignore(const void* key, const void * ignore, u32 len, u32 seed) { const u32* data = (u32*)key; const u32* ignore_data = (u32*)ignore; const u8* ignore_byte = (u8*)ignore; u32 h1 = seed ^ len; len >>= 2; while (len--) { u32 k1 = *data++; /* u32 i1 = *ignore_data++; k1 = k1 & i1; //mask off any ignore'd bytes */ u8 i1; i1 = *ignore_byte; ignore_byte++; if (i1) k1 = k1 & 0xffffff00; i1 = *ignore_byte; ignore_byte++; if (i1) k1 = k1 & 0xffff00ff; i1 = *ignore_byte; ignore_byte++; if (i1) k1 = k1 & 0xff00ffff; i1 = *ignore_byte; ignore_byte++; if (i1) k1 = k1 & 0x00ffffff; k1 *= 0xcc9e2d51; k1 = ROL32(k1, 15); k1 *= 0x1b873593; h1 ^= k1; h1 = ROL32(h1, 13); h1 = h1 * 5 + 0xe6546b64; } h1 ^= h1 >> 16; h1 *= 0x85ebca6b; h1 ^= h1 >> 13; h1 *= 0xc2b2ae35; h1 ^= h1 >> 16; return h1; } #endif /* ^__x86_64__ */ #endif /* !_HAVE_HASH_H */ ================================================ FILE: instrumentation/winafl_types.h ================================================ /* american fuzzy lop - type definitions and minor macros ------------------------------------------------------ Original AFL code written by Michal Zalewski Windows fork written and maintained by Ivan Fratric Copyright 2016 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. This file has been modified from the original to suit the purposes of this project. */ #ifndef _HAVE_TYPES_H #define _HAVE_TYPES_H #include #include typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; /* Ugh. There is an unintended compiler / glibc #include glitch caused by combining the u64 type an %llu in format strings, necessitating a workaround. In essence, the compiler is always looking for 'unsigned long long' for %llu. On 32-bit systems, the u64 type (aliased to uint64_t) is expanded to 'unsigned long long' in , so everything checks out. But on 64-bit systems, it is #ifdef'ed in the same file as 'unsigned long'. Now, it only happens in circumstances where the type happens to have the expected bit width, *but* the compiler does not know that... and complains about 'unsigned long' being unsafe to pass to %llu. */ #ifdef __x86_64__ typedef unsigned long long u64; #else typedef uint64_t u64; #endif /* ^sizeof(...) */ typedef int8_t s8; typedef int16_t s16; typedef int32_t s32; typedef int64_t s64; #ifndef MIN # define MIN(_a,_b) ((_a) > (_b) ? (_b) : (_a)) # define MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b)) #endif /* !MIN */ #define SWAP16(x) (((x) >> 8) | ((x) << 8)) #define SWAP32(x) (((x) >> 24) | (((x) & 0x00FF0000) >> 8) | (((x) & 0x0000FF00) << 8) | ((x) << 24)) #define R(x) (random() % (x)) #define STRINGIFY_INTERNAL(x) #x #define STRINGIFY(x) STRINGIFY_INTERNAL(x) #define MEM_BARRIER() \ asm volatile("" ::: "memory") #endif /* ! _HAVE_TYPES_H */ ================================================ FILE: instrumentation/wingui.c ================================================ #include /* * Given a ProcessId this function will attempt to determine if it is * "stuck" on a modal dialog. These are usualy some sport of error * and flow stoppage without a full crash, forcing us to wait for * timeout otherwise. We don't read the message, only know that it is * there incase the other process hangs. We don't want to get stuck. */ BOOL IsProcessInModalDialog( DWORD dwTargetProcessId ) { HWND hwndFoundDialog = NULL; HWND hwndDialogOwner = NULL; DWORD dwThreadId; DWORD dwProcessId; do { hwndFoundDialog = FindWindowExA( // Find a "Dialog" class window GetDesktopWindow(), hwndFoundDialog, MAKEINTATOM(32770), // "#32770 (Dialog)" NULL ); if ( hwndFoundDialog ) { hwndDialogOwner = GetWindow( hwndFoundDialog, GW_OWNER ); // Fetch it's owner if ( !IsWindowEnabled( hwndDialogOwner ) ) // If owner is disabled, possibly modal dialog { dwThreadId = GetWindowThreadProcessId( hwndDialogOwner, &dwProcessId ); if ( dwProcessId == dwTargetProcessId ) return TRUE; } } } while ( hwndFoundDialog != NULL ); return FALSE; } ================================================ FILE: instrumentation/wingui.h ================================================ #pragma once #include extern "C" BOOL IsProcessInModalDialog( DWORD dwTargetProcessId ); ================================================ FILE: instrumentation/xxhash.c ================================================ /* * xxHash - Fast Hash algorithm * Copyright (C) 2012-2016, Yann Collet * * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You can contact the author at : * - xxHash homepage: http://www.xxhash.com * - xxHash source repository : https://github.com/Cyan4973/xxHash */ /* ************************************* * Tuning parameters ***************************************/ /*!XXH_FORCE_MEMORY_ACCESS : * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. * The below switch allow to select different access method for improved performance. * Method 0 (default) : use `memcpy()`. Safe and portable. * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. * It can generate buggy code on targets which do not support unaligned memory accesses. * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) * See http://stackoverflow.com/a/32095106/646947 for details. * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define XXH_FORCE_MEMORY_ACCESS 2 # elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ || defined(__ARM_ARCH_7S__) )) # define XXH_FORCE_MEMORY_ACCESS 1 # endif #endif /*!XXH_ACCEPT_NULL_INPUT_POINTER : * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault. * When this macro is enabled, xxHash actively checks input for null pointer. * It it is, result for null input pointers is the same as a null-length input. */ #ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ # define XXH_ACCEPT_NULL_INPUT_POINTER 0 #endif /*!XXH_FORCE_NATIVE_FORMAT : * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. * Results are therefore identical for little-endian and big-endian CPU. * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. * Should endian-independence be of no importance for your application, you may set the #define below to 1, * to improve speed for Big-endian CPU. * This option has no impact on Little_Endian CPU. */ #ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ # define XXH_FORCE_NATIVE_FORMAT 0 #endif /*!XXH_FORCE_ALIGN_CHECK : * This is a minor performance trick, only useful with lots of very small keys. * It means : check for aligned/unaligned input. * The check costs one initial branch per hash; * set it to 0 when the input is guaranteed to be aligned, * or when alignment doesn't matter for performance. */ #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ # if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) # define XXH_FORCE_ALIGN_CHECK 0 # else # define XXH_FORCE_ALIGN_CHECK 1 # endif #endif /* ************************************* * Includes & Memory related functions ***************************************/ /*! Modify the local functions below should you wish to use some other memory routines * for malloc(), free() */ #include static void* XXH_malloc(size_t s) { return malloc(s); } static void XXH_free (void* p) { free(p); } /*! and for memcpy() */ #include static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } #include /* assert */ #define XXH_STATIC_LINKING_ONLY #include "xxhash.h" /* ************************************* * Compiler Specific Options ***************************************/ #ifdef _MSC_VER /* Visual Studio */ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ # define FORCE_INLINE static __forceinline #else # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ # ifdef __GNUC__ # define FORCE_INLINE static inline __attribute__((always_inline)) # else # define FORCE_INLINE static inline # endif # else # define FORCE_INLINE static # endif /* __STDC_VERSION__ */ #endif /* ************************************* * Basic Types ***************************************/ #ifndef MEM_MODULE # if !defined (__VMS) \ && (defined (__cplusplus) \ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; # else typedef unsigned char BYTE; typedef unsigned short U16; typedef unsigned int U32; # endif #endif #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ typedef union { U32 u32; } __attribute__((packed)) unalign; static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } #else /* portable and safe solution. Generally efficient. * see : http://stackoverflow.com/a/32095106/646947 */ static U32 XXH_read32(const void* memPtr) { U32 val; memcpy(&val, memPtr, sizeof(val)); return val; } #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ /* **************************************** * Compiler-specific Functions and Macros ******************************************/ #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ #if defined(_MSC_VER) # define XXH_rotl32(x,r) _rotl(x,r) # define XXH_rotl64(x,r) _rotl64(x,r) #else # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) #endif #if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap32 _byteswap_ulong #elif XXH_GCC_VERSION >= 403 # define XXH_swap32 __builtin_bswap32 #else static U32 XXH_swap32 (U32 x) { return ((x << 24) & 0xff000000 ) | ((x << 8) & 0x00ff0000 ) | ((x >> 8) & 0x0000ff00 ) | ((x >> 24) & 0x000000ff ); } #endif /* ************************************* * Architecture Macros ***************************************/ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ #ifndef XXH_CPU_LITTLE_ENDIAN static int XXH_isLittleEndian(void) { const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ return one.c[0]; } # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() #endif /* *************************** * Memory reads *****************************/ typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); else return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); } FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } static U32 XXH_readBE32(const void* ptr) { return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); } /* ************************************* * Macros ***************************************/ #define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } /* ******************************************************************* * 32-bit hash functions *********************************************************************/ static const U32 PRIME32_1 = 2654435761U; static const U32 PRIME32_2 = 2246822519U; static const U32 PRIME32_3 = 3266489917U; static const U32 PRIME32_4 = 668265263U; static const U32 PRIME32_5 = 374761393U; static U32 XXH32_round(U32 seed, U32 input) { seed += input * PRIME32_2; seed = XXH_rotl32(seed, 13); seed *= PRIME32_1; return seed; } /* mix all bits */ static U32 XXH32_avalanche(U32 h32) { h32 ^= h32 >> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; return(h32); } #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) static U32 XXH32_finalize(U32 h32, const void* ptr, size_t len, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)ptr; #define PROCESS1 \ h32 += (*p) * PRIME32_5; \ p++; \ h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; #define PROCESS4 \ h32 += XXH_get32bits(p) * PRIME32_3; \ p+=4; \ h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; switch(len&15) /* or switch(bEnd - p) */ { case 12: PROCESS4; /* fallthrough */ case 8: PROCESS4; /* fallthrough */ case 4: PROCESS4; return XXH32_avalanche(h32); case 13: PROCESS4; /* fallthrough */ case 9: PROCESS4; /* fallthrough */ case 5: PROCESS4; PROCESS1; return XXH32_avalanche(h32); case 14: PROCESS4; /* fallthrough */ case 10: PROCESS4; /* fallthrough */ case 6: PROCESS4; PROCESS1; PROCESS1; return XXH32_avalanche(h32); case 15: PROCESS4; /* fallthrough */ case 11: PROCESS4; /* fallthrough */ case 7: PROCESS4; /* fallthrough */ case 3: PROCESS1; /* fallthrough */ case 2: PROCESS1; /* fallthrough */ case 1: PROCESS1; /* fallthrough */ case 0: return XXH32_avalanche(h32); } assert(0); return h32; /* reaching this point is deemed impossible */ } FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U32 h32; #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; } #endif if (len>=16) { const BYTE* const limit = bEnd - 15; U32 v1 = seed + PRIME32_1 + PRIME32_2; U32 v2 = seed + PRIME32_2; U32 v3 = seed + 0; U32 v4 = seed - PRIME32_1; do { v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; } while (p < limit); h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); } else { h32 = seed + PRIME32_5; } h32 += (U32)len; return XXH32_finalize(h32, p, len&15, endian, align); } XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) { #if 0 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ XXH32_state_t state; XXH32_reset(&state, seed); XXH32_update(&state, input, len); return XXH32_digest(&state); #else XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if (XXH_FORCE_ALIGN_CHECK) { if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); else return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); } } if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); else return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); #endif } /*====== Hash streaming ======*/ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) { return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); } XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) { XXH_free(statePtr); return XXH_OK; } XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) { memcpy(dstState, srcState, sizeof(*dstState)); } XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) { XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME32_1 + PRIME32_2; state.v2 = seed + PRIME32_2; state.v3 = seed + 0; state.v4 = seed - PRIME32_1; /* do not write into reserved, planned to be removed in a future version */ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); return XXH_OK; } FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; if (input==NULL) #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) return XXH_OK; #else return XXH_ERROR; #endif state->total_len_32 += (unsigned)len; state->large_len |= (len>=16) | (state->total_len_32>=16); if (state->memsize + len < 16) { /* fill in tmp buffer */ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); state->memsize += (unsigned)len; return XXH_OK; } if (state->memsize) { /* some data left from previous update */ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); { const U32* p32 = state->mem32; state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); } p += 16-state->memsize; state->memsize = 0; } if (p <= bEnd-16) { const BYTE* const limit = bEnd - 16; U32 v1 = state->v1; U32 v2 = state->v2; U32 v3 = state->v3; U32 v4 = state->v4; do { v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; } while (p<=limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < bEnd) { XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); state->memsize = (unsigned)(bEnd-p); } return XXH_OK; } XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_update_endian(state_in, input, len, XXH_littleEndian); else return XXH32_update_endian(state_in, input, len, XXH_bigEndian); } FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) { U32 h32; if (state->large_len) { h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); } else { h32 = state->v3 /* == seed */ + PRIME32_5; } h32 += state->total_len_32; return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned); } XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_digest_endian(state_in, XXH_littleEndian); else return XXH32_digest_endian(state_in, XXH_bigEndian); } /*====== Canonical representation ======*/ /*! Default XXH result types are basic unsigned 32 and 64 bits. * The canonical representation follows human-readable write convention, aka big-endian (large digits first). * These functions allow transformation of hash result into and from its canonical format. * This way, hash values can be written into a file or buffer, remaining comparable across different systems. */ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) { XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); memcpy(dst, &hash, sizeof(*dst)); } XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) { return XXH_readBE32(src); } #ifndef XXH_NO_LONG_LONG /* ******************************************************************* * 64-bit hash functions *********************************************************************/ /*====== Memory access ======*/ #ifndef MEM_MODULE # define MEM_MODULE # if !defined (__VMS) \ && (defined (__cplusplus) \ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint64_t U64; # else /* if compiler doesn't support unsigned long long, replace by another 64-bit type */ typedef unsigned long long U64; # endif #endif #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64; static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } #else /* portable and safe solution. Generally efficient. * see : http://stackoverflow.com/a/32095106/646947 */ static U64 XXH_read64(const void* memPtr) { U64 val; memcpy(&val, memPtr, sizeof(val)); return val; } #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ #if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap64 _byteswap_uint64 #elif XXH_GCC_VERSION >= 403 # define XXH_swap64 __builtin_bswap64 #else static U64 XXH_swap64 (U64 x) { return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | ((x << 24) & 0x0000ff0000000000ULL) | ((x << 8) & 0x000000ff00000000ULL) | ((x >> 8) & 0x00000000ff000000ULL) | ((x >> 24) & 0x0000000000ff0000ULL) | ((x >> 40) & 0x000000000000ff00ULL) | ((x >> 56) & 0x00000000000000ffULL); } #endif FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); else return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); } FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } static U64 XXH_readBE64(const void* ptr) { return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); } /*====== xxh64 ======*/ static const U64 PRIME64_1 = 11400714785074694791ULL; static const U64 PRIME64_2 = 14029467366897019727ULL; static const U64 PRIME64_3 = 1609587929392839161ULL; static const U64 PRIME64_4 = 9650029242287828579ULL; static const U64 PRIME64_5 = 2870177450012600261ULL; static U64 XXH64_round(U64 acc, U64 input) { acc += input * PRIME64_2; acc = XXH_rotl64(acc, 31); acc *= PRIME64_1; return acc; } static U64 XXH64_mergeRound(U64 acc, U64 val) { val = XXH64_round(0, val); acc ^= val; acc = acc * PRIME64_1 + PRIME64_4; return acc; } static U64 XXH64_avalanche(U64 h64) { h64 ^= h64 >> 33; h64 *= PRIME64_2; h64 ^= h64 >> 29; h64 *= PRIME64_3; h64 ^= h64 >> 32; return h64; } #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) static U64 XXH64_finalize(U64 h64, const void* ptr, size_t len, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)ptr; #define PROCESS1_64 \ h64 ^= (*p) * PRIME64_5; \ p++; \ h64 = XXH_rotl64(h64, 11) * PRIME64_1; #define PROCESS4_64 \ h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \ p+=4; \ h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; #define PROCESS8_64 { \ U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \ p+=8; \ h64 ^= k1; \ h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \ } switch(len&31) { case 24: PROCESS8_64; /* fallthrough */ case 16: PROCESS8_64; /* fallthrough */ case 8: PROCESS8_64; return XXH64_avalanche(h64); case 28: PROCESS8_64; /* fallthrough */ case 20: PROCESS8_64; /* fallthrough */ case 12: PROCESS8_64; /* fallthrough */ case 4: PROCESS4_64; return XXH64_avalanche(h64); case 25: PROCESS8_64; /* fallthrough */ case 17: PROCESS8_64; /* fallthrough */ case 9: PROCESS8_64; PROCESS1_64; return XXH64_avalanche(h64); case 29: PROCESS8_64; /* fallthrough */ case 21: PROCESS8_64; /* fallthrough */ case 13: PROCESS8_64; /* fallthrough */ case 5: PROCESS4_64; PROCESS1_64; return XXH64_avalanche(h64); case 26: PROCESS8_64; /* fallthrough */ case 18: PROCESS8_64; /* fallthrough */ case 10: PROCESS8_64; PROCESS1_64; PROCESS1_64; return XXH64_avalanche(h64); case 30: PROCESS8_64; /* fallthrough */ case 22: PROCESS8_64; /* fallthrough */ case 14: PROCESS8_64; /* fallthrough */ case 6: PROCESS4_64; PROCESS1_64; PROCESS1_64; return XXH64_avalanche(h64); case 27: PROCESS8_64; /* fallthrough */ case 19: PROCESS8_64; /* fallthrough */ case 11: PROCESS8_64; PROCESS1_64; PROCESS1_64; PROCESS1_64; return XXH64_avalanche(h64); case 31: PROCESS8_64; /* fallthrough */ case 23: PROCESS8_64; /* fallthrough */ case 15: PROCESS8_64; /* fallthrough */ case 7: PROCESS4_64; /* fallthrough */ case 3: PROCESS1_64; /* fallthrough */ case 2: PROCESS1_64; /* fallthrough */ case 1: PROCESS1_64; /* fallthrough */ case 0: return XXH64_avalanche(h64); } /* impossible to reach */ assert(0); return 0; /* unreachable, but some compilers complain without it */ } FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U64 h64; #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)32; } #endif if (len>=32) { const BYTE* const limit = bEnd - 32; U64 v1 = seed + PRIME64_1 + PRIME64_2; U64 v2 = seed + PRIME64_2; U64 v3 = seed + 0; U64 v4 = seed - PRIME64_1; do { v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; } while (p<=limit); h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); h64 = XXH64_mergeRound(h64, v1); h64 = XXH64_mergeRound(h64, v2); h64 = XXH64_mergeRound(h64, v3); h64 = XXH64_mergeRound(h64, v4); } else { h64 = seed + PRIME64_5; } h64 += (U64) len; return XXH64_finalize(h64, p, len, endian, align); } XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) { #if 0 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ XXH64_state_t state; XXH64_reset(&state, seed); XXH64_update(&state, input, len); return XXH64_digest(&state); #else XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if (XXH_FORCE_ALIGN_CHECK) { if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); else return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); } } if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); else return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); #endif } /*====== Hash Streaming ======*/ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) { return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); } XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) { XXH_free(statePtr); return XXH_OK; } XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) { memcpy(dstState, srcState, sizeof(*dstState)); } XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) { XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME64_1 + PRIME64_2; state.v2 = seed + PRIME64_2; state.v3 = seed + 0; state.v4 = seed - PRIME64_1; /* do not write into reserved, planned to be removed in a future version */ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); return XXH_OK; } FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; if (input==NULL) #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) return XXH_OK; #else return XXH_ERROR; #endif state->total_len += len; if (state->memsize + len < 32) { /* fill in tmp buffer */ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); state->memsize += (U32)len; return XXH_OK; } if (state->memsize) { /* tmp buffer is full */ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); p += 32-state->memsize; state->memsize = 0; } if (p+32 <= bEnd) { const BYTE* const limit = bEnd - 32; U64 v1 = state->v1; U64 v2 = state->v2; U64 v3 = state->v3; U64 v4 = state->v4; do { v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; } while (p<=limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < bEnd) { XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); state->memsize = (unsigned)(bEnd-p); } return XXH_OK; } XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_update_endian(state_in, input, len, XXH_littleEndian); else return XXH64_update_endian(state_in, input, len, XXH_bigEndian); } FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) { U64 h64; if (state->total_len >= 32) { U64 const v1 = state->v1; U64 const v2 = state->v2; U64 const v3 = state->v3; U64 const v4 = state->v4; h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); h64 = XXH64_mergeRound(h64, v1); h64 = XXH64_mergeRound(h64, v2); h64 = XXH64_mergeRound(h64, v3); h64 = XXH64_mergeRound(h64, v4); } else { h64 = state->v3 /*seed*/ + PRIME64_5; } h64 += (U64) state->total_len; return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned); } XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_digest_endian(state_in, XXH_littleEndian); else return XXH64_digest_endian(state_in, XXH_bigEndian); } /*====== Canonical representation ======*/ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) { XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); memcpy(dst, &hash, sizeof(*dst)); } XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) { return XXH_readBE64(src); } #endif /* XXH_NO_LONG_LONG */ ================================================ FILE: instrumentation/xxhash.h ================================================ /* xxHash - Extremely Fast Hash algorithm Header File Copyright (C) 2012-2016, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - xxHash source repository : https://github.com/Cyan4973/xxHash */ /* Notice extracted from xxHash homepage : xxHash is an extremely fast Hash algorithm, running at RAM speed limits. It also successfully passes all tests from the SMHasher suite. Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) Name Speed Q.Score Author xxHash 5.4 GB/s 10 CrapWow 3.2 GB/s 2 Andrew MumurHash 3a 2.7 GB/s 10 Austin Appleby SpookyHash 2.0 GB/s 10 Bob Jenkins SBox 1.4 GB/s 9 Bret Mulvey Lookup3 1.2 GB/s 9 Bob Jenkins SuperFastHash 1.2 GB/s 1 Paul Hsieh CityHash64 1.05 GB/s 10 Pike & Alakuijala FNV 0.55 GB/s 5 Fowler, Noll, Vo CRC32 0.43 GB/s 9 MD5-32 0.33 GB/s 10 Ronald L. Rivest SHA1-32 0.28 GB/s 10 Q.Score is a measure of quality of the hash function. It depends on successfully passing SMHasher test set. 10 is a perfect score. A 64-bit version, named XXH64, is available since r35. It offers much better speed, but for 64-bit applications only. Name Speed on 64 bits Speed on 32 bits XXH64 13.8 GB/s 1.9 GB/s XXH32 6.8 GB/s 6.0 GB/s */ #ifndef XXHASH_H_5627135585666179 #define XXHASH_H_5627135585666179 1 #if defined (__cplusplus) extern "C" { #endif /* **************************** * Definitions ******************************/ #include /* size_t */ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; /* **************************** * API modifier ******************************/ /** XXH_INLINE_ALL (and XXH_PRIVATE_API) * This is useful to include xxhash functions in `static` mode * in order to inline them, and remove their symbol from the public list. * Inlining can offer dramatic performance improvement on small keys. * Methodology : * #define XXH_INLINE_ALL * #include "xxhash.h" * `xxhash.c` is automatically included. * It's not useful to compile and link it as a separate module. */ #define XXH_INLINE_ALL #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) # ifndef XXH_STATIC_LINKING_ONLY # define XXH_STATIC_LINKING_ONLY # endif # if defined(__GNUC__) # define XXH_PUBLIC_API static __inline __attribute__((unused)) # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # define XXH_PUBLIC_API static inline # elif defined(_MSC_VER) # define XXH_PUBLIC_API static __inline # else /* this version may generate warnings for unused static functions */ # define XXH_PUBLIC_API static # endif #else # define XXH_PUBLIC_API /* do nothing */ #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ /*! XXH_NAMESPACE, aka Namespace Emulation : * * If you want to include _and expose_ xxHash functions from within your own library, * but also want to avoid symbol collisions with other libraries which may also include xxHash, * * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). * * Note that no change is required within the calling program as long as it includes `xxhash.h` : * regular symbol name will be automatically translated by this header. */ #ifdef XXH_NAMESPACE # define XXH_CAT(A,B) A##B # define XXH_NAME2(A,B) XXH_CAT(A,B) # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) #endif /* ************************************* * Version ***************************************/ #define XXH_VERSION_MAJOR 0 #define XXH_VERSION_MINOR 6 #define XXH_VERSION_RELEASE 5 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) XXH_PUBLIC_API unsigned XXH_versionNumber (void); /*-********************************************************************** * 32-bit hash ************************************************************************/ typedef unsigned int XXH32_hash_t; /*! XXH32() : Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". The memory between input & input+length must be valid (allocated and read-accessible). "seed" can be used to alter the result predictably. Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); /*====== Streaming ======*/ typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); /* * Streaming functions generate the xxHash of an input provided in multiple segments. * Note that, for small input, they are slower than single-call functions, due to state management. * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. * * XXH state must first be allocated, using XXH*_createState() . * * Start a new hash by initializing state with a seed, using XXH*_reset(). * * Then, feed the hash state by calling XXH*_update() as many times as necessary. * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. * * Finally, a hash value can be produced anytime, by using XXH*_digest(). * This function returns the nn-bits hash as an int or long long. * * It's still possible to continue inserting input into the hash state after a digest, * and generate some new hashes later on, by calling again XXH*_digest(). * * When done, free XXH state space if it was allocated dynamically. */ /*====== Canonical representation ======*/ typedef struct { unsigned char digest[4]; } XXH32_canonical_t; XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); /* Default result type for XXH functions are primitive unsigned 32 and 64 bits. * The canonical representation uses human-readable write convention, aka big-endian (large digits first). * These functions allow transformation of hash result into and from its canonical format. * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. */ #ifndef XXH_NO_LONG_LONG /*-********************************************************************** * 64-bit hash ************************************************************************/ typedef unsigned long long XXH64_hash_t; /*! XXH64() : Calculate the 64-bit hash of sequence of length "len" stored at memory address "input". "seed" can be used to alter the result predictably. This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark). */ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); /*====== Streaming ======*/ typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); /*====== Canonical representation ======*/ typedef struct { unsigned char digest[8]; } XXH64_canonical_t; XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); #endif /* XXH_NO_LONG_LONG */ #ifdef XXH_STATIC_LINKING_ONLY /* ================================================================================================ This section contains declarations which are not guaranteed to remain stable. They may change in future versions, becoming incompatible with a different version of the library. These declarations should only be used with static linking. Never use them in association with dynamic linking ! =================================================================================================== */ /* These definitions are only present to allow * static allocation of XXH state, on stack or in a struct for example. * Never **ever** use members directly. */ #if !defined (__VMS) \ && (defined (__cplusplus) \ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include struct XXH32_state_s { uint32_t total_len_32; uint32_t large_len; uint32_t v1; uint32_t v2; uint32_t v3; uint32_t v4; uint32_t mem32[4]; uint32_t memsize; uint32_t reserved; /* never read nor write, might be removed in a future version */ }; /* typedef'd to XXH32_state_t */ struct XXH64_state_s { uint64_t total_len; uint64_t v1; uint64_t v2; uint64_t v3; uint64_t v4; uint64_t mem64[4]; uint32_t memsize; uint32_t reserved[2]; /* never read nor write, might be removed in a future version */ }; /* typedef'd to XXH64_state_t */ # else struct XXH32_state_s { unsigned total_len_32; unsigned large_len; unsigned v1; unsigned v2; unsigned v3; unsigned v4; unsigned mem32[4]; unsigned memsize; unsigned reserved; /* never read nor write, might be removed in a future version */ }; /* typedef'd to XXH32_state_t */ # ifndef XXH_NO_LONG_LONG /* remove 64-bit support */ struct XXH64_state_s { unsigned long long total_len; unsigned long long v1; unsigned long long v2; unsigned long long v3; unsigned long long v4; unsigned long long mem64[4]; unsigned memsize; unsigned reserved[2]; /* never read nor write, might be removed in a future version */ }; /* typedef'd to XXH64_state_t */ # endif # endif #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) # include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ #endif #endif /* XXH_STATIC_LINKING_ONLY */ #if defined (__cplusplus) } #endif #endif /* XXHASH_H_5627135585666179 */ ================================================ FILE: jansson/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (jansson) include_directories (${PROJECT_SOURCE_DIR}/) SET(JANSSON_SRC ${PROJECT_SOURCE_DIR}/dump.c ${PROJECT_SOURCE_DIR}/error.c ${PROJECT_SOURCE_DIR}/hashtable.c ${PROJECT_SOURCE_DIR}/hashtable_seed.c ${PROJECT_SOURCE_DIR}/jansson_helper.c ${PROJECT_SOURCE_DIR}/load.c ${PROJECT_SOURCE_DIR}/memory.c ${PROJECT_SOURCE_DIR}/pack_unpack.c ${PROJECT_SOURCE_DIR}/strbuffer.c ${PROJECT_SOURCE_DIR}/strconv.c ${PROJECT_SOURCE_DIR}/utf.c ${PROJECT_SOURCE_DIR}/value.c ) source_group("Library Sources" FILES ${JANSSON_SRC}) add_library(jansson SHARED ${JANSSON_SRC}) target_compile_definitions(jansson PUBLIC JANSSON_EXPORTS) add_library(jansson_object OBJECT ${JANSSON_SRC}) if (NOT WIN32) set_target_properties(jansson_object PROPERTIES COMPILE_FLAGS "-fPIC") endif (NOT WIN32) target_compile_definitions(jansson_object PUBLIC JANSSON_NO_IMPORT) add_library(jansson_static STATIC ${JANSSON_SRC}) target_compile_definitions(jansson_static PUBLIC JANSSON_NO_IMPORT) ================================================ FILE: jansson/dump.c ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include "jansson_private.h" #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include "jansson.h" #include "strbuffer.h" #include "utf.h" #define MAX_INTEGER_STR_LENGTH 100 #define MAX_REAL_STR_LENGTH 100 #define FLAGS_TO_INDENT(f) ((f) & 0x1F) #define FLAGS_TO_PRECISION(f) (((f) >> 11) & 0x1F) struct buffer { const size_t size; size_t used; char *data; }; static int dump_to_strbuffer(const char *buffer, size_t size, void *data) { return strbuffer_append_bytes((strbuffer_t *)data, buffer, size); } static int dump_to_buffer(const char *buffer, size_t size, void *data) { struct buffer *buf = (struct buffer *)data; if (buf->used + size <= buf->size) memcpy(&buf->data[buf->used], buffer, size); buf->used += size; return 0; } static int dump_to_file(const char *buffer, size_t size, void *data) { FILE *dest = (FILE *)data; if (fwrite(buffer, size, 1, dest) != 1) return -1; return 0; } static int dump_to_fd(const char *buffer, size_t size, void *data) { int *dest = (int *)data; #ifdef HAVE_UNISTD_H if (write(*dest, buffer, size) == (ssize_t)size) return 0; #endif return -1; } /* 32 spaces (the maximum indentation size) */ static const char whitespace[] = " "; static int dump_indent(size_t flags, int depth, int space, json_dump_callback_t dump, void *data) { if (FLAGS_TO_INDENT(flags) > 0) { unsigned int ws_count = FLAGS_TO_INDENT(flags), n_spaces = depth * ws_count; if (dump("\n", 1, data)) return -1; while (n_spaces > 0) { int cur_n = n_spaces < sizeof whitespace - 1 ? n_spaces : sizeof whitespace - 1; if (dump(whitespace, cur_n, data)) return -1; n_spaces -= cur_n; } } else if (space && !(flags & JSON_COMPACT)) { return dump(" ", 1, data); } return 0; } static int dump_mem(const char *mem, size_t len, json_dump_callback_t dump, void *data) { const unsigned char *pos; unsigned char buffer[200]; int num_chars; if (dump("\"" MEM_TOKEN, 1 + strlen(MEM_TOKEN), data)) return -1; for (pos = (const unsigned char *)mem; pos < (const unsigned char *)(mem + len); pos += num_chars / 2) { if (pos + 0x20 < (const unsigned char *)(mem + len)) num_chars = snprintf(buffer, sizeof(buffer), "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X" "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X", *pos, *(pos + 1), *(pos + 2), *(pos + 3), *(pos + 4), *(pos + 5), *(pos + 6), *(pos + 7), *(pos + 8), *(pos + 9), *(pos + 10), *(pos + 11), *(pos + 12), *(pos + 13), *(pos + 14), *(pos + 15), *(pos + 16), *(pos + 17), *(pos + 18), *(pos + 19), *(pos + 20), *(pos + 21), *(pos + 22), *(pos + 23), *(pos + 24), *(pos + 25), *(pos + 26), *(pos + 27), *(pos + 28), *(pos + 29), *(pos + 30), *(pos + 31)); else num_chars = snprintf(buffer, sizeof(buffer), "%02X", *pos); if (dump(buffer, num_chars, data)) return -1; } return dump("\"", 1, data); } static int dump_string(const char *str, size_t len, json_dump_callback_t dump, void *data, size_t flags) { const char *pos, *end, *lim; int32_t codepoint; if (dump("\"", 1, data)) return -1; end = pos = str; lim = str + len; while (1) { const char *text; char seq[13]; int length; while (end < lim) { end = utf8_iterate(pos, lim - pos, &codepoint); if (!end) return -1; /* mandatory escape or control char */ if (codepoint == '\\' || codepoint == '"' || codepoint < 0x20) break; /* slash */ if ((flags & JSON_ESCAPE_SLASH) && codepoint == '/') break; /* non-ASCII */ if ((flags & JSON_ENSURE_ASCII) && codepoint > 0x7F) break; pos = end; } if (pos != str) { if (dump(str, pos - str, data)) return -1; } if (end == pos) break; /* handle \, /, ", and control codes */ length = 2; switch (codepoint) { case '\\': text = "\\\\"; break; case '\"': text = "\\\""; break; case '\b': text = "\\b"; break; case '\f': text = "\\f"; break; case '\n': text = "\\n"; break; case '\r': text = "\\r"; break; case '\t': text = "\\t"; break; case '/': text = "\\/"; break; default: { /* codepoint is in BMP */ if (codepoint < 0x10000) { snprintf(seq, sizeof(seq), "\\u%04X", (unsigned int)codepoint); length = 6; } /* not in BMP -> construct a UTF-16 surrogate pair */ else { int32_t first, last; codepoint -= 0x10000; first = 0xD800 | ((codepoint & 0xffc00) >> 10); last = 0xDC00 | (codepoint & 0x003ff); snprintf(seq, sizeof(seq), "\\u%04X\\u%04X", (unsigned int)first, (unsigned int)last); length = 12; } text = seq; break; } } if (dump(text, length, data)) return -1; str = pos = end; } return dump("\"", 1, data); } static int compare_keys(const void *key1, const void *key2) { return strcmp(*(const char **)key1, *(const char **)key2); } static int do_dump(const json_t *json, size_t flags, int depth, json_dump_callback_t dump, void *data) { int embed = flags & JSON_EMBED; flags &= ~JSON_EMBED; if (!json) return -1; switch (json_typeof(json)) { case JSON_NULL: return dump("null", 4, data); case JSON_TRUE: return dump("true", 4, data); case JSON_FALSE: return dump("false", 5, data); case JSON_INTEGER: { char buffer[MAX_INTEGER_STR_LENGTH]; int size; size = snprintf(buffer, MAX_INTEGER_STR_LENGTH, "%" JSON_INTEGER_FORMAT, json_integer_value(json)); if (size < 0 || size >= MAX_INTEGER_STR_LENGTH) return -1; return dump(buffer, size, data); } case JSON_REAL: { char buffer[MAX_REAL_STR_LENGTH]; int size; double value = json_real_value(json); size = jsonp_dtostr(buffer, MAX_REAL_STR_LENGTH, value, FLAGS_TO_PRECISION(flags)); if (size < 0) return -1; return dump(buffer, size, data); } case JSON_MEM: return dump_mem(json_mem_value(json), json_mem_length(json), dump, data); case JSON_STRING: return dump_string(json_string_value(json), json_string_length(json), dump, data, flags); case JSON_ARRAY: { size_t n; size_t i; json_array_t *array; /* detect circular references */ array = json_to_array(json); if (array->visited) goto array_error; array->visited = 1; n = json_array_size(json); if (!embed && dump("[", 1, data)) goto array_error; if (n == 0) { array->visited = 0; return embed ? 0 : dump("]", 1, data); } if (dump_indent(flags, depth + 1, 0, dump, data)) goto array_error; for (i = 0; i < n; ++i) { if (do_dump(json_array_get(json, i), flags, depth + 1, dump, data)) goto array_error; if (i < n - 1) { if (dump(",", 1, data) || dump_indent(flags, depth + 1, 1, dump, data)) goto array_error; } else { if (dump_indent(flags, depth, 0, dump, data)) goto array_error; } } array->visited = 0; return embed ? 0 : dump("]", 1, data); array_error: array->visited = 0; return -1; } case JSON_OBJECT: { json_object_t *object; void *iter; const char *separator; int separator_length; if (flags & JSON_COMPACT) { separator = ":"; separator_length = 1; } else { separator = ": "; separator_length = 2; } /* detect circular references */ object = json_to_object(json); if (object->visited) goto object_error; object->visited = 1; iter = json_object_iter((json_t *)json); if (!embed && dump("{", 1, data)) goto object_error; if (!iter) { object->visited = 0; return embed ? 0 : dump("}", 1, data); } if (dump_indent(flags, depth + 1, 0, dump, data)) goto object_error; if (flags & JSON_SORT_KEYS) { const char **keys; size_t size, i; size = json_object_size(json); keys = jsonp_malloc(size * sizeof(const char *)); if (!keys) goto object_error; i = 0; while (iter) { keys[i] = json_object_iter_key(iter); iter = json_object_iter_next((json_t *)json, iter); i++; } assert(i == size); qsort(keys, size, sizeof(const char *), compare_keys); for (i = 0; i < size; i++) { const char *key; json_t *value; key = keys[i]; value = json_object_get(json, key); assert(value); dump_string(key, strlen(key), dump, data, flags); if (dump(separator, separator_length, data) || do_dump(value, flags, depth + 1, dump, data)) { jsonp_free(keys); goto object_error; } if (i < size - 1) { if (dump(",", 1, data) || dump_indent(flags, depth + 1, 1, dump, data)) { jsonp_free(keys); goto object_error; } } else { if (dump_indent(flags, depth, 0, dump, data)) { jsonp_free(keys); goto object_error; } } } jsonp_free(keys); } else { /* Don't sort keys */ while (iter) { void *next = json_object_iter_next((json_t *)json, iter); const char *key = json_object_iter_key(iter); dump_string(key, strlen(key), dump, data, flags); if (dump(separator, separator_length, data) || do_dump(json_object_iter_value(iter), flags, depth + 1, dump, data)) goto object_error; if (next) { if (dump(",", 1, data) || dump_indent(flags, depth + 1, 1, dump, data)) goto object_error; } else { if (dump_indent(flags, depth, 0, dump, data)) goto object_error; } iter = next; } } object->visited = 0; return embed ? 0 : dump("}", 1, data); object_error: object->visited = 0; return -1; } default: /* not reached */ return -1; } } char *json_dumps(const json_t *json, size_t flags) { strbuffer_t strbuff; char *result; if (strbuffer_init(&strbuff)) return NULL; if (json_dump_callback(json, dump_to_strbuffer, (void *)&strbuff, flags)) result = NULL; else result = jsonp_strdup(strbuffer_value(&strbuff)); strbuffer_close(&strbuff); return result; } size_t json_dumpb(const json_t *json, char *buffer, size_t size, size_t flags) { struct buffer buf = { size, 0, buffer }; if (json_dump_callback(json, dump_to_buffer, (void *)&buf, flags)) return 0; return buf.used; } int json_dumpf(const json_t *json, FILE *output, size_t flags) { return json_dump_callback(json, dump_to_file, (void *)output, flags); } int json_dumpfd(const json_t *json, int output, size_t flags) { return json_dump_callback(json, dump_to_fd, (void *)&output, flags); } int json_dump_file(const json_t *json, const char *path, size_t flags) { int result; FILE *output = fopen(path, "w"); if (!output) return -1; result = json_dumpf(json, output, flags); fclose(output); return result; } int json_dump_callback(const json_t *json, json_dump_callback_t callback, void *data, size_t flags) { if (!(flags & JSON_ENCODE_ANY)) { if (!json_is_array(json) && !json_is_object(json)) return -1; } return do_dump(json, flags, 0, callback, data); } ================================================ FILE: jansson/error.c ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #include #include "jansson_private.h" void jsonp_error_init(json_error_t *error, const char *source) { if(error) { error->text[0] = '\0'; error->line = -1; error->column = -1; error->position = 0; if(source) jsonp_error_set_source(error, source); else error->source[0] = '\0'; } } void jsonp_error_set_source(json_error_t *error, const char *source) { size_t length; if(!error || !source) return; length = strlen(source); if(length < JSON_ERROR_SOURCE_LENGTH) strncpy(error->source, source, length + 1); else { size_t extra = length - JSON_ERROR_SOURCE_LENGTH + 4; strncpy(error->source, "...", 3); strncpy(error->source + 3, source + extra, length - extra + 1); } } void jsonp_error_set(json_error_t *error, int line, int column, size_t position, const char *msg, ...) { va_list ap; va_start(ap, msg); jsonp_error_vset(error, line, column, position, msg, ap); va_end(ap); } void jsonp_error_vset(json_error_t *error, int line, int column, size_t position, const char *msg, va_list ap) { if(!error) return; if(error->text[0] != '\0') { /* error already set */ return; } error->line = line; error->column = column; error->position = (int)position; vsnprintf(error->text, JSON_ERROR_TEXT_LENGTH, msg, ap); error->text[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; } ================================================ FILE: jansson/hashtable.c ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * This library is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #if HAVE_CONFIG_H #include #endif #include #include #if HAVE_STDINT_H #include #endif #include "jansson_config.h" /* for JSON_INLINE */ #include "jansson_private.h" /* for container_of() */ #include "hashtable.h" #ifndef INITIAL_HASHTABLE_ORDER #define INITIAL_HASHTABLE_ORDER 3 #endif typedef struct hashtable_list list_t; typedef struct hashtable_pair pair_t; typedef struct hashtable_bucket bucket_t; extern volatile uint32_t hashtable_seed; /* Implementation of the hash function */ #include "lookup3.h" #define list_to_pair(list_) container_of(list_, pair_t, list) #define ordered_list_to_pair(list_) container_of(list_, pair_t, ordered_list) #define hash_str(key) ((size_t)hashlittle((key), strlen(key), hashtable_seed)) static JSON_INLINE void list_init(list_t *list) { list->next = list; list->prev = list; } static JSON_INLINE void list_insert(list_t *list, list_t *node) { node->next = list; node->prev = list->prev; list->prev->next = node; list->prev = node; } static JSON_INLINE void list_remove(list_t *list) { list->prev->next = list->next; list->next->prev = list->prev; } static JSON_INLINE int bucket_is_empty(hashtable_t *hashtable, bucket_t *bucket) { return bucket->first == &hashtable->list && bucket->first == bucket->last; } static void insert_to_bucket(hashtable_t *hashtable, bucket_t *bucket, list_t *list) { if(bucket_is_empty(hashtable, bucket)) { list_insert(&hashtable->list, list); bucket->first = bucket->last = list; } else { list_insert(bucket->first, list); bucket->first = list; } } static pair_t *hashtable_find_pair(hashtable_t *hashtable, bucket_t *bucket, const char *key, size_t hash) { list_t *list; pair_t *pair; if(bucket_is_empty(hashtable, bucket)) return NULL; list = bucket->first; while(1) { pair = list_to_pair(list); if(pair->hash == hash && strcmp(pair->key, key) == 0) return pair; if(list == bucket->last) break; list = list->next; } return NULL; } /* returns 0 on success, -1 if key was not found */ static int hashtable_do_del(hashtable_t *hashtable, const char *key, size_t hash) { pair_t *pair; bucket_t *bucket; size_t index; index = hash & hashmask(hashtable->order); bucket = &hashtable->buckets[index]; pair = hashtable_find_pair(hashtable, bucket, key, hash); if(!pair) return -1; if(&pair->list == bucket->first && &pair->list == bucket->last) bucket->first = bucket->last = &hashtable->list; else if(&pair->list == bucket->first) bucket->first = pair->list.next; else if(&pair->list == bucket->last) bucket->last = pair->list.prev; list_remove(&pair->list); list_remove(&pair->ordered_list); json_decref(pair->value); jsonp_free(pair); hashtable->size--; return 0; } static void hashtable_do_clear(hashtable_t *hashtable) { list_t *list, *next; pair_t *pair; for(list = hashtable->list.next; list != &hashtable->list; list = next) { next = list->next; pair = list_to_pair(list); json_decref(pair->value); jsonp_free(pair); } } static int hashtable_do_rehash(hashtable_t *hashtable) { list_t *list, *next; pair_t *pair; size_t i, index, new_size, new_order; struct hashtable_bucket *new_buckets; new_order = hashtable->order + 1; new_size = hashsize(new_order); new_buckets = jsonp_malloc(new_size * sizeof(bucket_t)); if(!new_buckets) return -1; jsonp_free(hashtable->buckets); hashtable->buckets = new_buckets; hashtable->order = new_order; for(i = 0; i < hashsize(hashtable->order); i++) { hashtable->buckets[i].first = hashtable->buckets[i].last = &hashtable->list; } list = hashtable->list.next; list_init(&hashtable->list); for(; list != &hashtable->list; list = next) { next = list->next; pair = list_to_pair(list); index = pair->hash % new_size; insert_to_bucket(hashtable, &hashtable->buckets[index], &pair->list); } return 0; } int hashtable_init(hashtable_t *hashtable) { size_t i; hashtable->size = 0; hashtable->order = INITIAL_HASHTABLE_ORDER; hashtable->buckets = jsonp_malloc(hashsize(hashtable->order) * sizeof(bucket_t)); if(!hashtable->buckets) return -1; list_init(&hashtable->list); list_init(&hashtable->ordered_list); for(i = 0; i < hashsize(hashtable->order); i++) { hashtable->buckets[i].first = hashtable->buckets[i].last = &hashtable->list; } return 0; } void hashtable_close(hashtable_t *hashtable) { hashtable_do_clear(hashtable); jsonp_free(hashtable->buckets); } int hashtable_set(hashtable_t *hashtable, const char *key, json_t *value) { pair_t *pair; bucket_t *bucket; size_t hash, index; /* rehash if the load ratio exceeds 1 */ if(hashtable->size >= hashsize(hashtable->order)) if(hashtable_do_rehash(hashtable)) return -1; hash = hash_str(key); index = hash & hashmask(hashtable->order); bucket = &hashtable->buckets[index]; pair = hashtable_find_pair(hashtable, bucket, key, hash); if(pair) { json_decref(pair->value); pair->value = value; } else { /* offsetof(...) returns the size of pair_t without the last, flexible member. This way, the correct amount is allocated. */ size_t len = strlen(key); if(len >= (size_t)-1 - offsetof(pair_t, key)) { /* Avoid an overflow if the key is very long */ return -1; } pair = jsonp_malloc(offsetof(pair_t, key) + len + 1); if(!pair) return -1; pair->hash = hash; strncpy(pair->key, key, len + 1); pair->value = value; list_init(&pair->list); list_init(&pair->ordered_list); insert_to_bucket(hashtable, bucket, &pair->list); list_insert(&hashtable->ordered_list, &pair->ordered_list); hashtable->size++; } return 0; } void *hashtable_get(hashtable_t *hashtable, const char *key) { pair_t *pair; size_t hash; bucket_t *bucket; hash = hash_str(key); bucket = &hashtable->buckets[hash & hashmask(hashtable->order)]; pair = hashtable_find_pair(hashtable, bucket, key, hash); if(!pair) return NULL; return pair->value; } int hashtable_del(hashtable_t *hashtable, const char *key) { size_t hash = hash_str(key); return hashtable_do_del(hashtable, key, hash); } void hashtable_clear(hashtable_t *hashtable) { size_t i; hashtable_do_clear(hashtable); for(i = 0; i < hashsize(hashtable->order); i++) { hashtable->buckets[i].first = hashtable->buckets[i].last = &hashtable->list; } list_init(&hashtable->list); list_init(&hashtable->ordered_list); hashtable->size = 0; } void *hashtable_iter(hashtable_t *hashtable) { return hashtable_iter_next(hashtable, &hashtable->ordered_list); } void *hashtable_iter_at(hashtable_t *hashtable, const char *key) { pair_t *pair; size_t hash; bucket_t *bucket; hash = hash_str(key); bucket = &hashtable->buckets[hash & hashmask(hashtable->order)]; pair = hashtable_find_pair(hashtable, bucket, key, hash); if(!pair) return NULL; return &pair->ordered_list; } void *hashtable_iter_next(hashtable_t *hashtable, void *iter) { list_t *list = (list_t *)iter; if(list->next == &hashtable->ordered_list) return NULL; return list->next; } void *hashtable_iter_key(void *iter) { pair_t *pair = ordered_list_to_pair((list_t *)iter); return pair->key; } void *hashtable_iter_value(void *iter) { pair_t *pair = ordered_list_to_pair((list_t *)iter); return pair->value; } void hashtable_iter_set(void *iter, json_t *value) { pair_t *pair = ordered_list_to_pair((list_t *)iter); json_decref(pair->value); pair->value = value; } ================================================ FILE: jansson/hashtable.h ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * This library is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #ifndef HASHTABLE_H #define HASHTABLE_H #include #include "jansson.h" struct hashtable_list { struct hashtable_list *prev; struct hashtable_list *next; }; /* "pair" may be a bit confusing a name, but think of it as a key-value pair. In this case, it just encodes some extra data, too */ struct hashtable_pair { struct hashtable_list list; struct hashtable_list ordered_list; size_t hash; json_t *value; char key[1]; }; struct hashtable_bucket { struct hashtable_list *first; struct hashtable_list *last; }; typedef struct hashtable { size_t size; struct hashtable_bucket *buckets; size_t order; /* hashtable has pow(2, order) buckets */ struct hashtable_list list; struct hashtable_list ordered_list; } hashtable_t; #define hashtable_key_to_iter(key_) \ (&(container_of(key_, struct hashtable_pair, key)->ordered_list)) /** * hashtable_init - Initialize a hashtable object * * @hashtable: The (statically allocated) hashtable object * * Initializes a statically allocated hashtable object. The object * should be cleared with hashtable_close when it's no longer used. * * Returns 0 on success, -1 on error (out of memory). */ int hashtable_init(hashtable_t *hashtable); /** * hashtable_close - Release all resources used by a hashtable object * * @hashtable: The hashtable * * Destroys a statically allocated hashtable object. */ void hashtable_close(hashtable_t *hashtable); /** * hashtable_set - Add/modify value in hashtable * * @hashtable: The hashtable object * @key: The key * @serial: For addition order of keys * @value: The value * * If a value with the given key already exists, its value is replaced * with the new value. Value is "stealed" in the sense that hashtable * doesn't increment its refcount but decreases the refcount when the * value is no longer needed. * * Returns 0 on success, -1 on failure (out of memory). */ int hashtable_set(hashtable_t *hashtable, const char *key, json_t *value); /** * hashtable_get - Get a value associated with a key * * @hashtable: The hashtable object * @key: The key * * Returns value if it is found, or NULL otherwise. */ void *hashtable_get(hashtable_t *hashtable, const char *key); /** * hashtable_del - Remove a value from the hashtable * * @hashtable: The hashtable object * @key: The key * * Returns 0 on success, or -1 if the key was not found. */ int hashtable_del(hashtable_t *hashtable, const char *key); /** * hashtable_clear - Clear hashtable * * @hashtable: The hashtable object * * Removes all items from the hashtable. */ void hashtable_clear(hashtable_t *hashtable); /** * hashtable_iter - Iterate over hashtable * * @hashtable: The hashtable object * * Returns an opaque iterator to the first element in the hashtable. * The iterator should be passed to hashtable_iter_* functions. * The hashtable items are not iterated over in any particular order. * * There's no need to free the iterator in any way. The iterator is * valid as long as the item that is referenced by the iterator is not * deleted. Other values may be added or deleted. In particular, * hashtable_iter_next() may be called on an iterator, and after that * the key/value pair pointed by the old iterator may be deleted. */ void *hashtable_iter(hashtable_t *hashtable); /** * hashtable_iter_at - Return an iterator at a specific key * * @hashtable: The hashtable object * @key: The key that the iterator should point to * * Like hashtable_iter() but returns an iterator pointing to a * specific key. */ void *hashtable_iter_at(hashtable_t *hashtable, const char *key); /** * hashtable_iter_next - Advance an iterator * * @hashtable: The hashtable object * @iter: The iterator * * Returns a new iterator pointing to the next element in the * hashtable or NULL if the whole hastable has been iterated over. */ void *hashtable_iter_next(hashtable_t *hashtable, void *iter); /** * hashtable_iter_key - Retrieve the key pointed by an iterator * * @iter: The iterator */ void *hashtable_iter_key(void *iter); /** * hashtable_iter_value - Retrieve the value pointed by an iterator * * @iter: The iterator */ void *hashtable_iter_value(void *iter); /** * hashtable_iter_set - Set the value pointed by an iterator * * @iter: The iterator * @value: The value to set */ void hashtable_iter_set(void *iter, json_t *value); #endif ================================================ FILE: jansson/hashtable_seed.c ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * This library is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ /* Generate sizeof(uint32_t) bytes of as random data as possible to seed the hash function. */ #ifdef HAVE_CONFIG_H #include #endif #include #include #ifdef HAVE_STDINT_H #include #endif #ifdef HAVE_FCNTL_H #include #endif #ifdef HAVE_SCHED_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_SYS_TIME_H #include #endif #ifdef HAVE_SYS_TYPES_H #include #endif #if defined(_WIN32) /* For GetModuleHandle(), GetProcAddress() and GetCurrentProcessId() */ #include #endif #include "jansson.h" static uint32_t buf_to_uint32(char *data) { size_t i; uint32_t result = 0; for (i = 0; i < sizeof(uint32_t); i++) result = (result << 8) | (unsigned char)data[i]; return result; } /* /dev/urandom */ #if !defined(_WIN32) && defined(USE_URANDOM) static int seed_from_urandom(uint32_t *seed) { /* Use unbuffered I/O if we have open(), close() and read(). Otherwise fall back to fopen() */ char data[sizeof(uint32_t)]; int ok; #if defined(HAVE_OPEN) && defined(HAVE_CLOSE) && defined(HAVE_READ) int urandom; urandom = open("/dev/urandom", O_RDONLY); if (urandom == -1) return 1; ok = read(urandom, data, sizeof(uint32_t)) == sizeof(uint32_t); close(urandom); #else FILE *urandom; urandom = fopen("/dev/urandom", "rb"); if (!urandom) return 1; ok = fread(data, 1, sizeof(uint32_t), urandom) == sizeof(uint32_t); fclose(urandom); #endif if (!ok) return 1; *seed = buf_to_uint32(data); return 0; } #endif /* Windows Crypto API */ #if defined(_WIN32) && defined(USE_WINDOWS_CRYPTOAPI) #include typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv, LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType, DWORD dwFlags); typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen, BYTE *pbBuffer); typedef BOOL (WINAPI *CRYPTRELEASECONTEXT)(HCRYPTPROV hProv, DWORD dwFlags); static int seed_from_windows_cryptoapi(uint32_t *seed) { HINSTANCE hAdvAPI32 = NULL; CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; CRYPTGENRANDOM pCryptGenRandom = NULL; CRYPTRELEASECONTEXT pCryptReleaseContext = NULL; HCRYPTPROV hCryptProv = 0; BYTE data[sizeof(uint32_t)]; int ok; hAdvAPI32 = GetModuleHandle(TEXT("advapi32.dll")); if(hAdvAPI32 == NULL) return 1; pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(hAdvAPI32, "CryptAcquireContextA"); if (!pCryptAcquireContext) return 1; pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32, "CryptGenRandom"); if (!pCryptGenRandom) return 1; pCryptReleaseContext = (CRYPTRELEASECONTEXT)GetProcAddress(hAdvAPI32, "CryptReleaseContext"); if (!pCryptReleaseContext) return 1; if (!pCryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) return 1; ok = pCryptGenRandom(hCryptProv, sizeof(uint32_t), data); pCryptReleaseContext(hCryptProv, 0); if (!ok) return 1; *seed = buf_to_uint32((char *)data); return 0; } #endif /* gettimeofday() and getpid() */ static int seed_from_timestamp_and_pid(uint32_t *seed) { #ifdef HAVE_GETTIMEOFDAY /* XOR of seconds and microseconds */ struct timeval tv; gettimeofday(&tv, NULL); *seed = (uint32_t)tv.tv_sec ^ (uint32_t)tv.tv_usec; #else /* Seconds only */ *seed = (uint32_t)time(NULL); #endif /* XOR with PID for more randomness */ #if defined(_WIN32) *seed ^= (uint32_t)GetCurrentProcessId(); #elif defined(HAVE_GETPID) *seed ^= (uint32_t)getpid(); #endif return 0; } static uint32_t generate_seed() { uint32_t seed; int done = 0; #if !defined(_WIN32) && defined(USE_URANDOM) if (seed_from_urandom(&seed) == 0) done = 1; #endif #if defined(_WIN32) && defined(USE_WINDOWS_CRYPTOAPI) if (seed_from_windows_cryptoapi(&seed) == 0) done = 1; #endif if (!done) { /* Fall back to timestamp and PID if no better randomness is available */ seed_from_timestamp_and_pid(&seed); } /* Make sure the seed is never zero */ if (seed == 0) seed = 1; return seed; } volatile uint32_t hashtable_seed = 0; #if defined(HAVE_ATOMIC_BUILTINS) && (defined(HAVE_SCHED_YIELD) || !defined(_WIN32)) static volatile char seed_initialized = 0; void json_object_seed(size_t seed) { uint32_t new_seed = (uint32_t)seed; if (hashtable_seed == 0) { if (__atomic_test_and_set(&seed_initialized, __ATOMIC_RELAXED) == 0) { /* Do the seeding ourselves */ if (new_seed == 0) new_seed = generate_seed(); __atomic_store_n(&hashtable_seed, new_seed, __ATOMIC_RELEASE); } else { /* Wait for another thread to do the seeding */ do { #ifdef HAVE_SCHED_YIELD sched_yield(); #endif } while(__atomic_load_n(&hashtable_seed, __ATOMIC_ACQUIRE) == 0); } } } #elif defined(HAVE_SYNC_BUILTINS) && (defined(HAVE_SCHED_YIELD) || !defined(_WIN32)) void json_object_seed(size_t seed) { uint32_t new_seed = (uint32_t)seed; if (hashtable_seed == 0) { if (new_seed == 0) { /* Explicit synchronization fences are not supported by the __sync builtins, so every thread getting here has to generate the seed value. */ new_seed = generate_seed(); } do { if (__sync_bool_compare_and_swap(&hashtable_seed, 0, new_seed)) { /* We were the first to seed */ break; } else { /* Wait for another thread to do the seeding */ #ifdef HAVE_SCHED_YIELD sched_yield(); #endif } } while(hashtable_seed == 0); } } #elif defined(_WIN32) static long seed_initialized = 0; void json_object_seed(size_t seed) { uint32_t new_seed = (uint32_t)seed; if (hashtable_seed == 0) { if (InterlockedIncrement(&seed_initialized) == 1) { /* Do the seeding ourselves */ if (new_seed == 0) new_seed = generate_seed(); hashtable_seed = new_seed; } else { /* Wait for another thread to do the seeding */ do { SwitchToThread(); } while (hashtable_seed == 0); } } } #else /* Fall back to a thread-unsafe version */ void json_object_seed(size_t seed) { uint32_t new_seed = (uint32_t)seed; if (hashtable_seed == 0) { if (new_seed == 0) new_seed = generate_seed(); hashtable_seed = new_seed; } } #endif ================================================ FILE: jansson/jansson.h ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #ifndef JANSSON_H #define JANSSON_H #include #include /* for size_t */ #include #include "jansson_config.h" #ifdef _WIN32 #if defined(JANSSON_EXPORTS) #define JANSSON_API __declspec(dllexport) #elif defined(JANSSON_NO_IMPORT) #define JANSSON_API #elif defined(__cplusplus) #define JANSSON_API extern "C" __declspec(dllimport) #else #define JANSSON_API __declspec(dllimport) #endif #else //_WIN32 #define JANSSON_API #endif #ifdef __cplusplus extern "C" { #endif /* version */ #define JANSSON_MAJOR_VERSION 2 #define JANSSON_MINOR_VERSION 10 #define JANSSON_MICRO_VERSION 0 /* Micro version is omitted if it's 0 */ #define JANSSON_VERSION "2.10" /* Version as a 3-byte hex number, e.g. 0x010201 == 1.2.1. Use this for numeric comparisons, e.g. #if JANSSON_VERSION_HEX >= ... */ #define JANSSON_VERSION_HEX ((JANSSON_MAJOR_VERSION << 16) | \ (JANSSON_MINOR_VERSION << 8) | \ (JANSSON_MICRO_VERSION << 0)) /* types */ typedef enum { JSON_OBJECT, JSON_ARRAY, JSON_STRING, JSON_MEM, JSON_INTEGER, JSON_REAL, JSON_TRUE, JSON_FALSE, JSON_NULL } json_type; typedef struct json_t { json_type type; size_t refcount; } json_t; #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */ #if JSON_INTEGER_IS_LONG_LONG #ifdef _WIN32 #define JSON_INTEGER_FORMAT "I64d" #else #define JSON_INTEGER_FORMAT "lld" #endif typedef long long json_int_t; #else #define JSON_INTEGER_FORMAT "ld" typedef long json_int_t; #endif /* JSON_INTEGER_IS_LONG_LONG */ #endif #define json_typeof(json) ((json)->type) #define json_is_object(json) ((json) && json_typeof(json) == JSON_OBJECT) #define json_is_array(json) ((json) && json_typeof(json) == JSON_ARRAY) #define json_is_string(json) ((json) && json_typeof(json) == JSON_STRING) #define json_is_mem(json) ((json) && json_typeof(json) == JSON_MEM) #define json_is_integer(json) ((json) && json_typeof(json) == JSON_INTEGER) #define json_is_real(json) ((json) && json_typeof(json) == JSON_REAL) #define json_is_number(json) (json_is_integer(json) || json_is_real(json)) #define json_is_true(json) ((json) && json_typeof(json) == JSON_TRUE) #define json_is_false(json) ((json) && json_typeof(json) == JSON_FALSE) #define json_boolean_value json_is_true #define json_is_boolean(json) (json_is_true(json) || json_is_false(json)) #define json_is_null(json) ((json) && json_typeof(json) == JSON_NULL) /* construction, destruction, reference counting */ JANSSON_API json_t *json_object(void); JANSSON_API json_t *json_array(void); JANSSON_API json_t *json_string(const char *value); JANSSON_API json_t *json_stringn(const char *value, size_t len); JANSSON_API json_t *json_mem(const char *value, size_t len); JANSSON_API json_t *json_string_nocheck(const char *value); JANSSON_API json_t *json_stringn_nocheck(const char *value, size_t len); JANSSON_API json_t *json_integer(json_int_t value); JANSSON_API json_t *json_real(double value); JANSSON_API json_t *json_true(void); JANSSON_API json_t *json_false(void); #define json_boolean(val) ((val) ? json_true() : json_false()) JANSSON_API json_t *json_null(void); static JSON_INLINE json_t *json_incref(json_t *json) { if (json && json->refcount != (size_t)-1) ++json->refcount; return json; } /* do not call json_delete directly */ JANSSON_API void json_delete(json_t *json); static JSON_INLINE void json_decref(json_t *json) { if (json && json->refcount != (size_t)-1 && --json->refcount == 0) json_delete(json); } #if defined(__GNUC__) || defined(__clang__) static JSON_INLINE void json_decrefp(json_t **json) { if (json) { json_decref(*json); *json = NULL; } } #define json_auto_t json_t __attribute__((cleanup(json_decrefp))) #endif /* error reporting */ #define JSON_ERROR_TEXT_LENGTH 160 #define JSON_ERROR_SOURCE_LENGTH 80 typedef struct json_error_t { int line; int column; int position; char source[JSON_ERROR_SOURCE_LENGTH]; char text[JSON_ERROR_TEXT_LENGTH]; } json_error_t; /* getters, setters, manipulation */ JANSSON_API void json_object_seed(size_t seed); JANSSON_API size_t json_object_size(const json_t *object); JANSSON_API json_t *json_object_get(const json_t *object, const char *key); JANSSON_API int json_object_set_new(json_t *object, const char *key, json_t *value); JANSSON_API int json_object_set_new_nocheck(json_t *object, const char *key, json_t *value); JANSSON_API int json_object_del(json_t *object, const char *key); JANSSON_API int json_object_clear(json_t *object); JANSSON_API int json_object_update(json_t *object, json_t *other); JANSSON_API int json_object_update_existing(json_t *object, json_t *other); JANSSON_API int json_object_update_missing(json_t *object, json_t *other); JANSSON_API void *json_object_iter(json_t *object); JANSSON_API void *json_object_iter_at(json_t *object, const char *key); JANSSON_API void *json_object_key_to_iter(const char *key); JANSSON_API void *json_object_iter_next(json_t *object, void *iter); JANSSON_API const char *json_object_iter_key(void *iter); JANSSON_API json_t *json_object_iter_value(void *iter); JANSSON_API int json_object_iter_set_new(json_t *object, void *iter, json_t *value); #define json_object_foreach(object, key, value) \ for(key = json_object_iter_key(json_object_iter(object)); \ key && (value = json_object_iter_value(json_object_key_to_iter(key))); \ key = json_object_iter_key(json_object_iter_next(object, json_object_key_to_iter(key)))) #define json_object_foreach_safe(object, n, key, value) \ for(key = json_object_iter_key(json_object_iter(object)), \ n = json_object_iter_next(object, json_object_key_to_iter(key)); \ key && (value = json_object_iter_value(json_object_key_to_iter(key))); \ key = json_object_iter_key(n), \ n = json_object_iter_next(object, json_object_key_to_iter(key))) #define json_array_foreach(array, index, value) \ for(index = 0; \ index < json_array_size(array) && (value = json_array_get(array, index)); \ index++) static JSON_INLINE int json_object_set(json_t *object, const char *key, json_t *value) { return json_object_set_new(object, key, json_incref(value)); } static JSON_INLINE int json_object_set_nocheck(json_t *object, const char *key, json_t *value) { return json_object_set_new_nocheck(object, key, json_incref(value)); } static JSON_INLINE int json_object_iter_set(json_t *object, void *iter, json_t *value) { return json_object_iter_set_new(object, iter, json_incref(value)); } JANSSON_API size_t json_array_size(const json_t *array); JANSSON_API json_t *json_array_get(const json_t *array, size_t index); JANSSON_API int json_array_set_new(json_t *array, size_t index, json_t *value); JANSSON_API int json_array_append_new(json_t *array, json_t *value); JANSSON_API int json_array_insert_new(json_t *array, size_t index, json_t *value); JANSSON_API int json_array_remove(json_t *array, size_t index); JANSSON_API int json_array_clear(json_t *array); JANSSON_API int json_array_extend(json_t *array, json_t *other); static JSON_INLINE int json_array_set(json_t *array, size_t ind, json_t *value) { return json_array_set_new(array, ind, json_incref(value)); } static JSON_INLINE int json_array_append(json_t *array, json_t *value) { return json_array_append_new(array, json_incref(value)); } static JSON_INLINE int json_array_insert(json_t *array, size_t ind, json_t *value) { return json_array_insert_new(array, ind, json_incref(value)); } JANSSON_API const char *json_string_value(const json_t *string); JANSSON_API size_t json_string_length(const json_t *string); JANSSON_API const char *json_mem_value(const json_t *mem); JANSSON_API size_t json_mem_length(const json_t *mem); JANSSON_API json_int_t json_integer_value(const json_t *integer); JANSSON_API double json_real_value(const json_t *real); JANSSON_API double json_number_value(const json_t *json); JANSSON_API int json_string_set(json_t *string, const char *value); JANSSON_API int json_string_setn(json_t *string, const char *value, size_t len); JANSSON_API int json_string_set_nocheck(json_t *string, const char *value); JANSSON_API int json_string_setn_nocheck(json_t *string, const char *value, size_t len); JANSSON_API int json_integer_set(json_t *integer, json_int_t value); JANSSON_API int json_real_set(json_t *real, double value); /* pack, unpack */ JANSSON_API json_t *json_pack(const char *fmt, ...); JANSSON_API json_t *json_pack_ex(json_error_t *error, size_t flags, const char *fmt, ...); JANSSON_API json_t *json_vpack_ex(json_error_t *error, size_t flags, const char *fmt, va_list ap); #define JSON_VALIDATE_ONLY 0x1 #define JSON_STRICT 0x2 JANSSON_API int json_unpack(json_t *root, const char *fmt, ...); JANSSON_API int json_unpack_ex(json_t *root, json_error_t *error, size_t flags, const char *fmt, ...); JANSSON_API int json_vunpack_ex(json_t *root, json_error_t *error, size_t flags, const char *fmt, va_list ap); /* equality */ JANSSON_API int json_equal(json_t *value1, json_t *value2); /* copying */ JANSSON_API json_t *json_copy(json_t *value); JANSSON_API json_t *json_deep_copy(const json_t *value); /* decoding */ #define JSON_REJECT_DUPLICATES 0x1 #define JSON_DISABLE_EOF_CHECK 0x2 #define JSON_DECODE_ANY 0x4 #define JSON_DECODE_INT_AS_REAL 0x8 #define JSON_ALLOW_NUL 0x10 typedef size_t(*json_load_callback_t)(void *buffer, size_t buflen, void *data); JANSSON_API json_t *json_loads(const char *input, size_t flags, json_error_t *error); JANSSON_API json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error); JANSSON_API json_t *json_loadf(FILE *input, size_t flags, json_error_t *error); JANSSON_API json_t *json_loadfd(int input, size_t flags, json_error_t *error); JANSSON_API json_t *json_load_file(const char *path, size_t flags, json_error_t *error); JANSSON_API json_t *json_load_callback(json_load_callback_t callback, void *data, size_t flags, json_error_t *error); /* encoding */ #define JSON_MAX_INDENT 0x1F #define JSON_INDENT(n) ((n) & JSON_MAX_INDENT) #define JSON_COMPACT 0x20 #define JSON_ENSURE_ASCII 0x40 #define JSON_SORT_KEYS 0x80 #define JSON_PRESERVE_ORDER 0x100 #define JSON_ENCODE_ANY 0x200 #define JSON_ESCAPE_SLASH 0x400 #define JSON_REAL_PRECISION(n) (((n) & 0x1F) << 11) #define JSON_EMBED 0x10000 typedef int(*json_dump_callback_t)(const char *buffer, size_t size, void *data); JANSSON_API char *json_dumps(const json_t *json, size_t flags); JANSSON_API size_t json_dumpb(const json_t *json, char *buffer, size_t size, size_t flags); JANSSON_API int json_dumpf(const json_t *json, FILE *output, size_t flags); JANSSON_API int json_dumpfd(const json_t *json, int output, size_t flags); JANSSON_API int json_dump_file(const json_t *json, const char *path, size_t flags); JANSSON_API int json_dump_callback(const json_t *json, json_dump_callback_t callback, void *data, size_t flags); /* custom memory allocation */ typedef void *(*json_malloc_t)(size_t); typedef void(*json_free_t)(void *); JANSSON_API void json_set_alloc_funcs(json_malloc_t malloc_fn, json_free_t free_fn); JANSSON_API void json_get_alloc_funcs(json_malloc_t *malloc_fn, json_free_t *free_fn); #ifdef __cplusplus } #endif #endif ================================================ FILE: jansson/jansson_config.h ================================================ /* * Copyright (c) 2010-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. * * * This file specifies a part of the site-specific configuration for * Jansson, namely those things that affect the public API in * jansson.h. * * The CMake system will generate the jansson_config.h file and * copy it to the build and install directories. */ #ifndef JANSSON_CONFIG_H #define JANSSON_CONFIG_H /* Define this so that we can disable scattered automake configuration in source files */ #ifndef JANSSON_USING_CMAKE #define JANSSON_USING_CMAKE #endif /* Note: when using cmake, JSON_INTEGER_IS_LONG_LONG is not defined nor used, * as we will also check for __int64 etc types. * (the definition was used in the automake system) */ /* Bring in the cmake-detected defines */ #define HAVE_STDINT_H 1 /* #undef HAVE_INTTYPES_H */ /* #undef HAVE_SYS_TYPES_H */ /* Include our standard type header for the integer typedef */ #if defined(HAVE_STDINT_H) # include #elif defined(HAVE_INTTYPES_H) # include #elif defined(HAVE_SYS_TYPES_H) # include #endif /* If your compiler supports the inline keyword in C, JSON_INLINE is defined to `inline', otherwise empty. In C++, the inline is always supported. */ #ifdef __cplusplus #define JSON_INLINE inline #else #define JSON_INLINE inline #endif #define json_int_t long long #define json_strtoint strtoll #ifdef _WIN32 #define JSON_INTEGER_FORMAT "I64d" #else #define JSON_INTEGER_FORMAT "lld" #endif /* If locale.h and localeconv() are available, define to 1, otherwise to 0. */ #define JSON_HAVE_LOCALECONV 1 /* Maximum recursion depth for parsing JSON input. This limits the depth of e.g. array-within-array constructions. */ #define JSON_PARSER_MAX_DEPTH 2048 #endif ================================================ FILE: jansson/jansson_helper.c ================================================ #include "jansson.h" #include "jansson_helper.h" #include #include #include /** * This function parses the given JSON string and converts it into a json_t object * @param json_string - a JSON character buffer that should be converted into a json_t * @return - a json_t object on success, or NULL on failure */ json_t * get_root_option_json_object(const char * json_string) { json_t * root; json_error_t error; root = json_loads(json_string, 0, &error); if (!root) { fprintf(stderr, "json error in options: on line %d: %s\n", error.line, error.text); return NULL; } if (!json_is_object(root)) { fprintf(stderr, "json error in options: root is not an object\n"); json_decref(root); return NULL; } return root; } /** * Gets a string attribute value from a json_t object * @param root - the json_t object to get the attribute from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not a string, and 1 if the attribute is found and * a string. * @param should_free - whether or not to free the root json_t parameter after finding * the attribute * @return - the string value of the specified attribute on success, or NULL if the attribute * wasn't found or the wrong type. The return value should be freed by the caller. */ static char * get_string_options_inner(json_t * root, const char * option_name, int * result, int should_free) { json_t *option_item; char * option_value; option_item = json_object_get(root, option_name); if (!option_item) { if (should_free) json_decref(root); *result = 0; return NULL; } if (!json_is_string(option_item)) { *result = -1; fprintf(stderr, "error: option item %s is expected to be a string\n", option_name); if (should_free) json_decref(root); return NULL; } option_value = strdup(json_string_value(option_item)); *result = 1; if (should_free) json_decref(root); return option_value; } /** * Gets a string attribute value from a JSON string * @param json_string - the JSON string to parse and obtain the attribute value from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not a string, and 1 if the attribute is found and * a string. * @return - the string value of the specified attribute on success, or NULL if the json * couldn't be parsed, the attribute wasn't found, or the attribute was the wrong type. * The return value should be freed by the caller. */ char * get_string_options(const char * json_string, const char * option_name, int * result) { json_t * root; *result = -1; root = get_root_option_json_object(json_string); if (!root) return NULL; return get_string_options_inner(root, option_name, result, 1); } /** * Gets a string attribute value from a JSON string * @param root - the json_t object to get the attribute from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not a string, and 1 if the attribute is found and * a string. * @return - the string value of the specified attribute on success, or NULL if the * attribute wasn't found or the wrong type. The return value should be freed by the caller. */ char * get_string_options_from_json(json_t * root, const char * option_name, int * result) { return get_string_options_inner(root, option_name, result, 0); } /** * Gets a mem attribute value from a json_t object * @param root - the json_t object to get the attribute from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not a mem, and 1 if the attribute is found and * a mem. * @param should_free - whether or not to free the root json_t parameter after finding * the attribute * @return - the mem value of the specified attribute on success, or NULL if the attribute * wasn't found or the wrong type. The return value should be freed by the caller. */ static char * get_mem_options_inner(json_t * root, const char * option_name, int * result, int should_free) { json_t *option_item; char * option_value; size_t length; option_item = json_object_get(root, option_name); if (!option_item) { if (should_free) json_decref(root); *result = 0; return NULL; } if (!json_is_mem(option_item)) { *result = -1; fprintf(stderr, "error: option item %s is expected to be a mem\n", option_name); if (should_free) json_decref(root); return NULL; } length = json_mem_length(option_item); option_value = malloc(length); memcpy(option_value, json_mem_value(option_item), length); *result = 1; if (should_free) json_decref(root); return option_value; } /** * Gets a mem attribute value from a JSON string * @param json_string - the JSON string to parse and obtain the attribute value from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not a mem, and 1 if the attribute is found and * a mem. * @return - the mem value of the specified attribute on success, or NULL if the json * couldn't be parsed, the attribute wasn't found, or the attribute was the wrong type. * The return value should be freed by the caller. */ char * get_mem_options(const char * json_string, const char * option_name, int * result) { json_t * root; *result = -1; root = get_root_option_json_object(json_string); if (!root) return NULL; return get_mem_options_inner(root, option_name, result, 1); } /** * Gets a mem attribute value from a json_t object * @param root - the json_t object to get the attribute from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not a mem, and 1 if the attribute is found and * a mem. * @return - the mem value of the specified attribute on success, or NULL if the attribute * wasn't found or the wrong type. The return value should be freed by the caller. */ char * get_mem_options_from_json(json_t * root, const char * option_name, int * result) { return get_mem_options_inner(root, option_name, result, 0); } /** * Gets an integer attribute value from a json_t object * @param root - the json_t object to get the attribute from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not an integer, and 1 if the attribute is found and * an integer. * @param should_free - whether or not to free the root json_t parameter after finding * the attribute * @return - the integer value of the specified attribute on success, or -1 if the attribute * wasn't found or the wrong type. Check the value of the result parameter to differentiate * between the attribute value of -1 or failure to get the value. */ static long long get_int_options_inner(json_t * root, const char * option_name, int * result, int should_free) { json_t *option_item; long long option_value; option_item = json_object_get(root, option_name); if (!option_item) { if (should_free) json_decref(root); *result = 0; return -1; } if (!json_is_integer(option_item)) { *result = -1; fprintf(stderr, "error: option item %s is expected to be an integer\n", option_name); if (should_free) json_decref(root); return -1; } option_value = json_integer_value(option_item); *result = 1; if (should_free) json_decref(root); return option_value; } /** * Gets an integer attribute value from a JSON string * @param json_string - the JSON string to parse and obtain the attribute value from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not an integer, and 1 if the attribute is found and * an integer. * @return - the integer value of the specified attribute on success, or -1 if the json * couldn't be parsed, the attribute wasn't found, or the attribute was the wrong type. * Check the value of the result parameter to differentiate between the attribute value * of -1 or failure to get the value. */ int get_int_options(const char * json_string, const char * option_name, int * result) { json_t * root; *result = -1; root = get_root_option_json_object(json_string); if (!root) return -1; return (int)get_int_options_inner(root, option_name, result, 1); } /** * Gets an integer attribute value from a json_t object * @param root - the json_t object to get the attribute from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not an integer, and 1 if the attribute is found and * an integer. * @return - the integer value of the specified attribute on success, or -1 if the attribute * wasn't found or the wrong type. Check the value of the result parameter to differentiate * between the attribute value of -1 or failure to get the value. */ int get_int_options_from_json(json_t * root, const char * option_name, int * result) { return (int)get_int_options_inner(root, option_name, result, 0); } /** * Gets a uint64_t attribute value from a JSON string * @param json_string - the JSON string to parse and obtain the attribute value from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not an integer, and 1 if the attribute is found and * an integer. * @return - the uint64_t value of the specified attribute on success, or -1 if the json * couldn't be parsed, the attribute wasn't found, or the attribute was the wrong type. * Check the value of the result parameter to differentiate between the attribute value * of -1 or failure to get the value.s */ uint64_t get_uint64t_options(const char * json_string, const char * option_name, int * result) { json_t * root; *result = -1; root = get_root_option_json_object(json_string); if (!root) return -1; return (uint64_t)get_int_options_inner(root, option_name, result, 1); } /** * Gets a uint64_t attribute value from a json_t object * @param root - the json_t object to get the attribute from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not an integer, and 1 if the attribute is found and * an integer. * @return - the uint64_t value of the specified attribute on success, or -1 if the attribute * wasn't found or the wrong type. Check the value of the result parameter to differentiate * between the attribute value of -1 or failure to get the value. */ uint64_t get_uint64t_options_from_json(json_t * root, const char * option_name, int * result) { return (uint64_t)get_int_options_inner(root, option_name, result, 0); } /** * Gets a double attribute value from a json_t object * @param root - the json_t object to get the attribute from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not a number, and 1 if the attribute is found and * a number. * @param should_free - whether or not to free the root json_t parameter after finding * the attribute * @return - the double value of the specified attribute on success, or -1 if the attribute * wasn't found or the wrong type. Check the value of the result parameter to differentiate * between the attribute value of -1 or failure to get the value. */ static double get_double_options_inner(json_t * root, const char * option_name, int * result, int should_free) { json_t *option_item; double option_value; option_item = json_object_get(root, option_name); if (!option_item) { if (should_free) json_decref(root); *result = 0; return -1; } if (!json_is_real(option_item)) { *result = -1; fprintf(stderr, "error: option item %s is expected to be a real\n", option_name); if (should_free) json_decref(root); return -1; } option_value = json_real_value(option_item); *result = 1; if (should_free) json_decref(root); return option_value; } /** * Gets an double attribute value from a JSON string * @param json_string - the JSON string to parse and obtain the attribute value from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not an integer, and 1 if the attribute is found and * an double. * @return - the double value of the specified attribute on success, or -1 if the json * couldn't be parsed, the attribute wasn't found, or the attribute was the wrong type. * Check the value of the result parameter to differentiate between the attribute value * of -1 or failure to get the value. */ double get_double_options(const char * json_string, const char * option_name, int * result) { json_t * root; *result = -1; root = get_root_option_json_object(json_string); if (!root) return -1; return get_double_options_inner(root, option_name, result, 1); } /** * Gets an double attribute value from a json_t object * @param root - the json_t object to get the attribute from * @param option_name - the name of the attribute to get * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found, -1 * if the attribute is found but not an integer, and 1 if the attribute is found and * an double. * @return - the double value of the specified attribute on success, or -1 if the attribute * wasn't found or the wrong type. Check the value of the result parameter to differentiate * between the attribute value of -1 or failure to get the value. */ double get_double_options_from_json(json_t * root, const char * option_name, int * result) { return get_double_options_inner(root, option_name, result, 0); } static int get_array_options_inner(const char * json_string, const char * option_name, size_t * count, char *** string_array, int ** int_array, int is_string_array) { json_t * root, *option_array, *option_item; char ** option_strings; int * option_ints; size_t i; root = get_root_option_json_object(json_string); if (!root) return -1; option_array = json_object_get(root, option_name); if (!option_array) { json_decref(root); return 0; } if (!json_is_array(option_array)) { fprintf(stderr, "error: option item %s is expected to be a array\n", option_name); json_decref(root); return -1; } *count = json_array_size(option_array); if(is_string_array) option_strings = malloc(*count * sizeof(char *)); else option_ints = malloc(*count * sizeof(int)); if ((is_string_array && !option_strings) || (!is_string_array && !option_ints)) { fprintf(stderr, "error: couldn't allocate array for option %s (%zu items)\n", option_name, *count); json_decref(root); return -1; } for (i = 0; i < *count; i++) { option_item = json_array_get(option_array, i); if ((is_string_array && !json_is_string(option_item)) || (!is_string_array && !json_is_integer(option_item))) { fprintf(stderr, "error: option %zu in array %s is expected to be a %s\n", i, option_name, is_string_array ? "string" : "integer"); json_decref(root); free(option_strings); return -1; } if(is_string_array) option_strings[i] = strdup(json_string_value(option_item)); else option_ints[i] = json_integer_value(option_item); } if (is_string_array) *string_array = option_strings; else *int_array = option_ints; json_decref(root); return 1; } /** * Gets a string array attribute value from a JSON string * @param json_string - the JSON string to parse and obtain the attribute value from * @param option_name - the name of the attribute to get * @param count - a pointer to a size_t object used to return the number of items found * in the array attribute value * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found; -1 * if the attribute is found but not an array, contains elements that aren't strings, or * on allocation failures; and 1 if the attribute is found and a string array. * @return - the string array of the specified attribute on success, or NULL if the * attribute wasn't found or the wrong type. The returned array and all of its elements should * be freed by the caller. */ char ** get_array_options(const char * json_string, const char * option_name, size_t * count, int * result) { char **option_strings = NULL; *result = get_array_options_inner(json_string, option_name, count, &option_strings, NULL, 1); return option_strings; } /** * Gets an integer array attribute value from a JSON string * @param json_string - the JSON string to parse and obtain the attribute value from * @param option_name - the name of the attribute to get * @param count - a pointer to a size_t object used to return the number of items found * in the array attribute value * @param result - a pointer to an integer to return the results of trying to get * the attribute value. The value 0 is returned if the attribute isn't found; -1 * if the attribute is found but not an array, contains elements that aren't integers, or * on allocation failures; and 1 if the attribute is found and an integer array. * @return - the integer array of the specified attribute on success, or NULL if the * attribute wasn't found or the wrong type. The returned array should be freed by * the caller. */ int * get_int_array_options(const char * json_string, const char * option_name, size_t * count, int * result) { int *option_ints = NULL; *result = get_array_options_inner(json_string, option_name, count, NULL, &option_ints, 0); return option_ints; } /** * Adds a new attribute to an existing json string. * @param root_options - the JSON string to parse and add an attribute to * @param new_option_name - the name of the attribute to create * @param new_value_string - If specified, the new attribute has the type string * and is given the value of this parameter. * @param new_value_int - If new_value_string is NULL, the new attribute has the * type int and the value specified in this parameter. * @return - NULL on error, or the json string passed in the root_options parameter * with the added attribute as requested */ static char * add_option_to_json(const char * root_options, const char * new_option_name, const char * new_value_string, int new_value_int) { json_t * root, *temp; char * ret; root = get_root_option_json_object(root_options); if (!root) return NULL; //Add the new item if (new_value_string) { ADD_STRING(temp, new_value_string, root, new_option_name); } else { ADD_INT(temp, new_value_int, root, new_option_name); } ret = json_dumps(root, 0); json_decref(root); return ret; } /** * Adds a new string attribute to an existing json string. * @param root_options - the JSON string to parse and add an attribute to * @param new_option_name - the name of the attribute to create * @param new_value - the new attribute's value * @return - NULL on error, or the json string passed in the root_options parameter * with the added attribute as requested */ char * add_string_option_to_json(const char * root_options, const char * new_option_name, const char * new_value) { if (!new_value) return NULL; return add_option_to_json(root_options, new_option_name, new_value, 0); } /** * Adds a new integer attribute to an existing json string. * @param root_options - the JSON string to parse and add an attribute to * @param new_option_name - the name of the attribute to create * @param new_value - the new attribute's value * @return - NULL on error, or the json string passed in the root_options parameter * with the added attribute as requested */ char * add_int_option_to_json(const char * root_options, const char * new_option_name, int new_value) { return add_option_to_json(root_options, new_option_name, NULL, new_value); } /** * Gets an array of buffers out of a JSON string containing an array of * JSON mem items * @param json_string - the JSON string to parse and get the array items from * @param items - a pointer to an array of buffers. This will be used to return * the array of items. * @param item_lengths - a pointer to a size_t array that will be used to return the * lengths of each item returned in the items parameter * @param items_count - a size_t pointer that will be used to return the number of items * returned in the items parameter * @return - non-zero on failure, 0 on success */ int decode_mem_array(const char *json_string, char *** items, size_t ** item_lengths, size_t * items_count) { json_t * items_jsons, *item_json; json_error_t error; size_t count, i, j; char ** items_array; size_t * items_lengths_array; items_jsons = json_loads(json_string, 0, &error); if (!items_jsons) return 1; if (!json_is_array(items_jsons)) { json_decref(items_jsons); return 1; } count = json_array_size(items_jsons); if (!count) { json_decref(items_jsons); *items = NULL; *item_lengths = NULL; *items_count = 0; return 0; } items_array = malloc(sizeof(char *) * count); items_lengths_array = malloc(sizeof(size_t) * count); if (!items_array || !items_lengths_array) { free(items_array); free(items_lengths_array); json_decref(items_jsons); return 1; } memset(items_array, 0, sizeof(char *) * count); for (i = 0; i < count; i++) { item_json = json_array_get(items_jsons, i); if (json_is_mem(item_json)) { items_lengths_array[i] = json_mem_length(item_json); items_array[i] = malloc(items_lengths_array[i]); if (items_array[i]) memcpy(items_array[i], json_mem_value(item_json), items_lengths_array[i]); } if (!items_array[i]) { for (j = 0; j < i; j++) free(items_array[j]); free(items_array); free(items_lengths_array); json_decref(items_jsons); return 1; } } *items = items_array; *item_lengths = items_lengths_array; *items_count = count; return 0; } /** * Transforms an array of buffers into a JSON string containing the array * @param items - an array of buffers that will be put into the returned JSON string * @param item_lengths - an array of integers that list the lengths of the buffers * in the items parameter * @param items_count - the number of items in the items and item_lengths parameters * @param output_length = the length of the returned JSON string * @return - a JSON string containing an array of mem items encoding the buffers on success, or NULL on failure */ char * encode_mem_array(char ** items, size_t * item_lengths, size_t items_count, int * output_length) { json_t *items_obj, *item_obj; size_t i; char * ret; items_obj = json_array(); if (!items_obj) return NULL; for (i = 0; i < items_count; i++) { item_obj = json_mem(items[i], item_lengths[i]); if (!item_obj) { json_decref(items_obj); return NULL; } json_array_append_new(items_obj, item_obj); } ret = json_dumps(items_obj, 0); *output_length = strlen(ret); json_decref(items_obj); return ret; } ================================================ FILE: jansson/jansson_helper.h ================================================ #pragma once #include "jansson.h" #ifdef __cplusplus extern "C" { #endif // Some macros to make parsing options easier #define PARSE_OPTION_INT_TEMP(state, options, name, name_literal, fail_func, temp_name) \ int result_##temp_name = 0; \ int tempi_##temp_name = get_int_options(options, name_literal, &result_##temp_name); \ if (result_##temp_name < 0) \ { \ fail_func(state); \ return NULL; \ } \ else if (result_##temp_name > 0) \ { \ state->name = tempi_##temp_name; \ } #define PARSE_OPTION_INT(state, options, name, name_literal, fail_func) \ PARSE_OPTION_INT_TEMP(state, options, name, name_literal, fail_func, name) #define PARSE_OPTION_UINT64T_TEMP(state, options, name, name_literal, fail_func, temp_name) \ int result_##temp_name = 0; \ uint64_t tempi_##temp_name = get_uint64t_options(options, name_literal, &result_##temp_name); \ if (result_##temp_name < 0) \ { \ fail_func(state); \ return NULL; \ } \ else if (result_##temp_name > 0) \ { \ state->name = tempi_##temp_name; \ } #define PARSE_OPTION_UINT64T(state, options, name, name_literal, fail_func) \ PARSE_OPTION_UINT64T_TEMP(state, options, name, name_literal, fail_func, name) #define PARSE_OPTION_DOUBLE_TEMP(state, options, name, name_literal, fail_func, temp_name) \ int result_##temp_name = 0; \ double tempi_##temp_name = get_double_options(options, name_literal, &result_##temp_name); \ if (result_##temp_name < 0) \ { \ fail_func(state); \ return NULL; \ } \ else if (result_##temp_name > 0) \ { \ state->name = tempi_##temp_name; \ } #define PARSE_OPTION_DOUBLE(state, options, name, name_literal, fail_func) \ PARSE_OPTION_DOUBLE_TEMP(state, options, name, name_literal, fail_func, name) #define PARSE_OPTION_STRING_TEMP(state, options, name, name_literal, fail_func, temp_name) \ int result_##temp_name = 0; \ char * temps_##temp_name = get_string_options(options, name_literal, &result_##temp_name); \ if (result_##temp_name < 0) \ { \ fail_func(state); \ return NULL; \ } \ else if (result_##temp_name > 0) \ { \ if(state->name) \ free(state->name); \ state->name = temps_##temp_name; \ } #define PARSE_OPTION_STRING(state, options, name, name_literal, fail_func) \ PARSE_OPTION_STRING_TEMP(state, options, name, name_literal, fail_func, name) #define PARSE_OPTION_ARRAY_TEMP(state, options, name, count, name_literal, fail_func, temp_name) \ int result_##temp_name = 0; \ size_t count_##temp_name = 0; \ char ** temps_##temp_name = get_array_options(options, name_literal, &count_##temp_name, &result_##temp_name); \ if (result_##temp_name < 0) \ { \ fail_func(state); \ return NULL; \ } \ else if (result_##temp_name > 0) \ { \ if(state->name) \ free(state->name); \ state->name = temps_##temp_name; \ state->count = count_##temp_name; \ } #define PARSE_OPTION_ARRAY(state, options, name, count, name_literal, fail_func) \ PARSE_OPTION_ARRAY_TEMP(state, options, name, count, name_literal, fail_func, name) #define PARSE_OPTION_INT_ARRAY_TEMP(state, options, name, count, name_literal, fail_func, temp_name) \ int result_##temp_name = 0; \ size_t count_##temp_name = 0; \ int * temps_##temp_name = get_int_array_options(options, name_literal, &count_##temp_name, &result_##temp_name); \ if (result_##temp_name < 0) \ { \ fail_func(state); \ return NULL; \ } \ else if (result_##temp_name > 0) \ { \ if(state->name) \ free(state->name); \ state->name = temps_##temp_name; \ state->count = count_##temp_name; \ } #define PARSE_OPTION_INT_ARRAY(state, options, name, count, name_literal, fail_func) \ PARSE_OPTION_INT_ARRAY_TEMP(state, options, name, count, name_literal, fail_func, name) // Some macros to make iterating json arrays easier #define FOREACH_OBJECT_JSON_ARRAY_ITEM_BEGIN(state, name, name_str, item, result) \ do { \ json_t * root##name, *option_array##name; \ size_t i##name; \ \ result = -1; \ root##name = get_root_option_json_object(state); \ if (root##name) \ { \ option_array##name = json_object_get(root##name, name_str); \ if (!option_array##name || !json_is_array(option_array##name)) \ json_decref(root##name); \ else \ { \ result = 1; \ for (i##name = 0; i##name < json_array_size(option_array##name); i##name++) \ { \ item = json_array_get(option_array##name, i##name); #define FOREACH_OBJECT_JSON_ARRAY_ITEM_END(name) \ } \ } \ json_decref(root##name); \ } \ } while (0); //If you want to end the macro early, such that the code won't hit the end of the loop, use this to //free the root object. #define FOREACH_OBJECT_JSON_ARRAY_ITEM_FREE(name) \ json_decref(root##name); // Some macros to make generating objects easier #define ADD_ITEM1(temp, arg1, dest, func, name) \ temp = func(arg1); \ if(!temp) return NULL; \ json_object_set_new(dest, name, temp); #define ADD_ITEM2(temp, arg1, arg2, dest, func, name) \ temp = func(arg1, arg2); \ if(!temp) return NULL; \ json_object_set_new(dest, name, temp); #define ADD_STRING(temp, arg1, dest, name) ADD_ITEM1(temp, arg1, dest, json_string, name) #define ADD_INT(temp, arg1, dest, name) ADD_ITEM1(temp, arg1, dest, json_integer, name) #define ADD_UINT64T ADD_INT //Internally they both use json_integer #define ADD_MEM(temp, arg1, arg2, dest, name) ADD_ITEM2(temp, arg1, arg2, dest, json_mem, name) #define ADD_DOUBLE(temp, arg1, dest, name) ADD_ITEM1(temp, arg1, dest, json_real, name) #define GET_ITEM(arg1, dest, temp, func, name, ret) \ temp = func(arg1, name, &ret); \ if (ret <= 0) \ return 1; \ dest = temp; #define GET_STRING(temp, arg1, dest, name, ret) GET_ITEM(arg1, dest, temp, get_string_options, name, ret) #define GET_INT(temp, arg1, dest, name, ret) GET_ITEM(arg1, dest, temp, get_int_options, name, ret) #define GET_UINT64T(temp, arg1, dest, name, ret) GET_ITEM(arg1, dest, temp, get_uint64t_options, name, ret) #define GET_MEM(temp, arg1, dest, name, ret) GET_ITEM(arg1, dest, temp, get_mem_options, name, ret) #define GET_DOUBLE(temp, arg1, dest, name, ret) GET_ITEM(arg1, dest, temp, get_double_options, name, ret) JANSSON_API char * get_string_options(const char * options, const char * option_name, int * result); JANSSON_API char * get_string_options_from_json(json_t * root, const char * option_name, int * result); JANSSON_API char * get_mem_options(const char * options, const char * option_name, int * result); JANSSON_API char * get_mem_options_from_json(json_t * root, const char * option_name, int * result); JANSSON_API int get_int_options(const char * options, const char * option_name, int * result); JANSSON_API int get_int_options_from_json(json_t * root, const char * option_name, int * result); JANSSON_API uint64_t get_uint64t_options(const char * options, const char * option_name, int * result); JANSSON_API uint64_t get_uint64t_options_from_json(json_t * root, const char * option_name, int * result); JANSSON_API double get_double_options(const char * json_string, const char * option_name, int * result); JANSSON_API double get_double_options_from_json(json_t * root, const char * option_name, int * result); JANSSON_API char ** get_array_options(const char * options, const char * option_name, size_t * count, int * result); JANSSON_API int * get_int_array_options(const char * json_string, const char * option_name, size_t * count, int * result); JANSSON_API json_t * get_root_option_json_object(const char * options); JANSSON_API char * add_string_option_to_json(const char * root_options, const char * new_option_name, const char * new_value); JANSSON_API char * add_int_option_to_json(const char * root_options, const char * new_option_name, int new_value); JANSSON_API int decode_mem_array(const char *json_string, char *** items, size_t ** item_lengths, size_t * items_count); JANSSON_API char * encode_mem_array(char ** items, size_t * item_lengths, size_t items_count, int * output_length); #ifdef __cplusplus } #endif ================================================ FILE: jansson/jansson_private.h ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #ifndef JANSSON_PRIVATE_H #define JANSSON_PRIVATE_H #include "jansson_private_config.h" #include #include "jansson.h" #include "hashtable.h" #include "strbuffer.h" #define MEM_TOKEN "::MEM::" #define MEM_TOKEN_LEN strlen("::MEM::") #define container_of(ptr_, type_, member_) \ ((type_ *)((char *)ptr_ - offsetof(type_, member_))) /* On some platforms, max() may already be defined */ #ifndef max #define max(a, b) ((a) > (b) ? (a) : (b)) #endif /* va_copy is a C99 feature. In C89 implementations, it's sometimes available as __va_copy. If not, memcpy() should do the trick. */ #ifndef va_copy #ifdef __va_copy #define va_copy __va_copy #else #define va_copy(a, b) memcpy(&(a), &(b), sizeof(va_list)) #endif #endif typedef struct { json_t json; hashtable_t hashtable; int visited; } json_object_t; typedef struct { json_t json; size_t size; size_t entries; json_t **table; int visited; } json_array_t; typedef struct { json_t json; char *value; size_t length; } json_string_t; typedef struct { json_t json; char *value; size_t length; } json_mem_t; typedef struct { json_t json; double value; } json_real_t; typedef struct { json_t json; json_int_t value; } json_integer_t; #define json_to_object(json_) container_of(json_, json_object_t, json) #define json_to_array(json_) container_of(json_, json_array_t, json) #define json_to_string(json_) container_of(json_, json_string_t, json) #define json_to_mem(json_) container_of(json_, json_mem_t, json) #define json_to_real(json_) container_of(json_, json_real_t, json) #define json_to_integer(json_) container_of(json_, json_integer_t, json) /* Create a string by taking ownership of an existing buffer */ json_t *jsonp_stringn_nocheck_own(const char *value, size_t len); /* Create a mem object by taking ownership of an existing buffer */ json_t *json_mem_own(const char *value, size_t len); /* Error message formatting */ void jsonp_error_init(json_error_t *error, const char *source); void jsonp_error_set_source(json_error_t *error, const char *source); void jsonp_error_set(json_error_t *error, int line, int column, size_t position, const char *msg, ...); void jsonp_error_vset(json_error_t *error, int line, int column, size_t position, const char *msg, va_list ap); /* Locale independent string<->double conversions */ int jsonp_strtod(strbuffer_t *strbuffer, double *out); int jsonp_dtostr(char *buffer, size_t size, double value, int prec); /* Wrappers for custom memory functions */ void* jsonp_malloc(size_t size); void jsonp_free(void *ptr); char *jsonp_strndup(const char *str, size_t length); char *jsonp_strdup(const char *str); char *jsonp_strndup(const char *str, size_t len); /* Windows compatibility */ #if defined(_WIN32) || defined(WIN32) # if defined(_MSC_VER) /* MS compiller */ # if (_MSC_VER < 1900) && !defined(snprintf) /* snprintf not defined yet & not introduced */ # define snprintf _snprintf # endif # if (_MSC_VER < 1500) && !defined(vsnprintf) /* vsnprintf not defined yet & not introduced */ # define vsnprintf(b,c,f,a) _vsnprintf(b,c,f,a) # endif # else /* Other Windows compiller, old definition */ # define snprintf _snprintf # define vsnprintf _vsnprintf # endif #endif #endif ================================================ FILE: jansson/jansson_private_config.h ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ /* #undef HAVE_ENDIAN_H */ #define HAVE_FCNTL_H 1 /* #undef HAVE_SCHED_H */ /* #undef HAVE_UNISTD_H */ /* #undef HAVE_SYS_PARAM_H */ #define HAVE_SYS_STAT_H 1 /* #undef HAVE_SYS_TIME_H */ /* #undef HAVE_SYS_TYPES_H */ #define HAVE_STDINT_H 1 #define HAVE_CLOSE 1 #define HAVE_GETPID 1 /* #undef HAVE_GETTIMEOFDAY */ #define HAVE_OPEN 1 #define HAVE_READ 1 /* #undef HAVE_SCHED_YIELD */ /* #undef HAVE_SYNC_BUILTINS */ /* #undef HAVE_ATOMIC_BUILTINS */ #define HAVE_LOCALE_H 1 #define HAVE_SETLOCALE 1 #define HAVE_INT32_T 1 #ifndef HAVE_INT32_T # define int32_t int32_t #endif #define HAVE_UINT32_T 1 #ifndef HAVE_UINT32_T # define uint32_t uint32_t #endif #define HAVE_UINT16_T 1 #ifndef HAVE_UINT16_T # define uint16_t uint16_t #endif #define HAVE_UINT8_T 1 #ifndef HAVE_UINT8_T # define uint8_t uint8_t #endif #define USE_URANDOM 1 #define USE_WINDOWS_CRYPTOAPI 1 #define INITIAL_HASHTABLE_ORDER 3 ================================================ FILE: jansson/load.c ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include "jansson_private.h" #include #include #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include "jansson.h" #include "strbuffer.h" #include "utf.h" #define STREAM_STATE_OK 0 #define STREAM_STATE_EOF -1 #define STREAM_STATE_ERROR -2 #define TOKEN_INVALID -1 #define TOKEN_EOF 0 #define TOKEN_STRING 256 #define TOKEN_INTEGER 257 #define TOKEN_REAL 258 #define TOKEN_TRUE 259 #define TOKEN_FALSE 260 #define TOKEN_NULL 261 /* Locale independent versions of isxxx() functions */ #define l_isupper(c) ('A' <= (c) && (c) <= 'Z') #define l_islower(c) ('a' <= (c) && (c) <= 'z') #define l_isalpha(c) (l_isupper(c) || l_islower(c)) #define l_isdigit(c) ('0' <= (c) && (c) <= '9') #define l_isxdigit(c) \ (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f')) /* Read one byte from stream, convert to unsigned char, then int, and return. return EOF on end of file. This corresponds to the behaviour of fgetc(). */ typedef int(*get_func)(void *data); typedef struct { get_func get; void *data; char buffer[5]; size_t buffer_pos; int state; int line; int column, last_column; size_t position; } stream_t; typedef struct { stream_t stream; strbuffer_t saved_text; size_t flags; size_t depth; int token; union { struct { char *val; size_t len; } string; json_int_t integer; double real; } value; } lex_t; #define stream_to_lex(stream) container_of(stream, lex_t, stream) /*** error reporting ***/ static void error_set(json_error_t *error, const lex_t *lex, const char *msg, ...) { va_list ap; char msg_text[JSON_ERROR_TEXT_LENGTH]; char msg_with_context[JSON_ERROR_TEXT_LENGTH]; int line = -1, col = -1; size_t pos = 0; const char *result = msg_text; if (!error) return; va_start(ap, msg); vsnprintf(msg_text, JSON_ERROR_TEXT_LENGTH, msg, ap); msg_text[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; va_end(ap); if (lex) { const char *saved_text = strbuffer_value(&lex->saved_text); line = lex->stream.line; col = lex->stream.column; pos = lex->stream.position; if (saved_text && saved_text[0]) { if (lex->saved_text.length <= 20) { snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH, "%s near '%s'", msg_text, saved_text); msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; result = msg_with_context; } } else { if (lex->stream.state == STREAM_STATE_ERROR) { /* No context for UTF-8 decoding errors */ result = msg_text; } else { snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH, "%s near end of file", msg_text); msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; result = msg_with_context; } } } jsonp_error_set(error, line, col, pos, "%s", result); } /*** lexical analyzer ***/ static void stream_init(stream_t *stream, get_func get, void *data) { stream->get = get; stream->data = data; stream->buffer[0] = '\0'; stream->buffer_pos = 0; stream->state = STREAM_STATE_OK; stream->line = 1; stream->column = 0; stream->position = 0; } static int stream_get(stream_t *stream, json_error_t *error) { int c; if (stream->state != STREAM_STATE_OK) return stream->state; if (!stream->buffer[stream->buffer_pos]) { c = stream->get(stream->data); if (c == EOF) { stream->state = STREAM_STATE_EOF; return STREAM_STATE_EOF; } stream->buffer[0] = c; stream->buffer_pos = 0; if (0x80 <= c && c <= 0xFF) { /* multi-byte UTF-8 sequence */ size_t i, count; count = utf8_check_first(c); if (!count) goto out; assert(count >= 2); for (i = 1; i < count; i++) stream->buffer[i] = stream->get(stream->data); if (!utf8_check_full(stream->buffer, count, NULL)) goto out; stream->buffer[count] = '\0'; } else stream->buffer[1] = '\0'; } c = stream->buffer[stream->buffer_pos++]; stream->position++; if (c == '\n') { stream->line++; stream->last_column = stream->column; stream->column = 0; } else if (utf8_check_first(c)) { /* track the Unicode character column, so increment only if this is the first character of a UTF-8 sequence */ stream->column++; } return c; out: stream->state = STREAM_STATE_ERROR; error_set(error, stream_to_lex(stream), "unable to decode byte 0x%x", c); return STREAM_STATE_ERROR; } static void stream_unget(stream_t *stream, int c) { if (c == STREAM_STATE_EOF || c == STREAM_STATE_ERROR) return; stream->position--; if (c == '\n') { stream->line--; stream->column = stream->last_column; } else if (utf8_check_first(c)) stream->column--; assert(stream->buffer_pos > 0); stream->buffer_pos--; assert(stream->buffer[stream->buffer_pos] == c); } static int lex_get(lex_t *lex, json_error_t *error) { return stream_get(&lex->stream, error); } static void lex_save(lex_t *lex, int c) { strbuffer_append_byte(&lex->saved_text, c); } static int lex_get_save(lex_t *lex, json_error_t *error) { int c = stream_get(&lex->stream, error); if (c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) lex_save(lex, c); return c; } static void lex_unget(lex_t *lex, int c) { stream_unget(&lex->stream, c); } static void lex_unget_unsave(lex_t *lex, int c) { if (c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) { /* Since we treat warnings as errors, when assertions are turned * off the "d" variable would be set but never used. Which is * treated as an error by GCC. */ #ifndef NDEBUG char d; #endif stream_unget(&lex->stream, c); #ifndef NDEBUG d = #endif strbuffer_pop(&lex->saved_text); assert(c == d); } } static void lex_save_cached(lex_t *lex) { while (lex->stream.buffer[lex->stream.buffer_pos] != '\0') { lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]); lex->stream.buffer_pos++; lex->stream.position++; } } static void lex_free_string(lex_t *lex) { jsonp_free(lex->value.string.val); lex->value.string.val = NULL; lex->value.string.len = 0; } /* assumes that str points to 'u' plus at least 4 valid hex digits */ static int32_t decode_unicode_escape(const char *str) { int i; int32_t value = 0; assert(str[0] == 'u'); for (i = 1; i <= 4; i++) { char c = str[i]; value <<= 4; if (l_isdigit(c)) value += c - '0'; else if (l_islower(c)) value += c - 'a' + 10; else if (l_isupper(c)) value += c - 'A' + 10; else return -1; } return value; } static void lex_scan_string(lex_t *lex, json_error_t *error) { int c; const char *p; char *t; int i; lex->value.string.val = NULL; lex->token = TOKEN_INVALID; c = lex_get_save(lex, error); while (c != '"') { if (c == STREAM_STATE_ERROR) goto out; else if (c == STREAM_STATE_EOF) { error_set(error, lex, "premature end of input"); goto out; } else if (0 <= c && c <= 0x1F) { /* control character */ lex_unget_unsave(lex, c); if (c == '\n') error_set(error, lex, "unexpected newline"); else error_set(error, lex, "control character 0x%x", c); goto out; } else if (c == '\\') { c = lex_get_save(lex, error); if (c == 'u') { c = lex_get_save(lex, error); for (i = 0; i < 4; i++) { if (!l_isxdigit(c)) { error_set(error, lex, "invalid escape"); goto out; } c = lex_get_save(lex, error); } } else if (c == '"' || c == '\\' || c == '/' || c == 'b' || c == 'f' || c == 'n' || c == 'r' || c == 't') c = lex_get_save(lex, error); else { error_set(error, lex, "invalid escape"); goto out; } } else c = lex_get_save(lex, error); } /* the actual value is at most of the same length as the source string, because: - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte - a single \uXXXX escape (length 6) is converted to at most 3 bytes - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair are converted to 4 bytes */ t = jsonp_malloc(lex->saved_text.length + 1); if (!t) { /* this is not very nice, since TOKEN_INVALID is returned */ goto out; } lex->value.string.val = t; /* + 1 to skip the " */ p = strbuffer_value(&lex->saved_text) + 1; while (*p != '"') { if (*p == '\\') { p++; if (*p == 'u') { size_t length; int32_t value; value = decode_unicode_escape(p); if (value < 0) { error_set(error, lex, "invalid Unicode escape '%.6s'", p - 1); goto out; } p += 5; if (0xD800 <= value && value <= 0xDBFF) { /* surrogate pair */ if (*p == '\\' && *(p + 1) == 'u') { int32_t value2 = decode_unicode_escape(++p); if (value2 < 0) { error_set(error, lex, "invalid Unicode escape '%.6s'", p - 1); goto out; } p += 5; if (0xDC00 <= value2 && value2 <= 0xDFFF) { /* valid second surrogate */ value = ((value - 0xD800) << 10) + (value2 - 0xDC00) + 0x10000; } else { /* invalid second surrogate */ error_set(error, lex, "invalid Unicode '\\u%04X\\u%04X'", value, value2); goto out; } } else { /* no second surrogate */ error_set(error, lex, "invalid Unicode '\\u%04X'", value); goto out; } } else if (0xDC00 <= value && value <= 0xDFFF) { error_set(error, lex, "invalid Unicode '\\u%04X'", value); goto out; } if (utf8_encode(value, t, &length)) assert(0); t += length; } else { switch (*p) { case '"': case '\\': case '/': *t = *p; break; case 'b': *t = '\b'; break; case 'f': *t = '\f'; break; case 'n': *t = '\n'; break; case 'r': *t = '\r'; break; case 't': *t = '\t'; break; default: assert(0); } t++; p++; } } else *(t++) = *(p++); } *t = '\0'; lex->value.string.len = t - lex->value.string.val; lex->token = TOKEN_STRING; return; out: lex_free_string(lex); } #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */ #if JSON_INTEGER_IS_LONG_LONG #ifdef _MSC_VER /* Microsoft Visual Studio */ #define json_strtoint _strtoi64 #else #define json_strtoint strtoll #endif #else #define json_strtoint strtol #endif #endif static int lex_scan_number(lex_t *lex, int c, json_error_t *error) { const char *saved_text; char *end; double doubleval; lex->token = TOKEN_INVALID; if (c == '-') c = lex_get_save(lex, error); if (c == '0') { c = lex_get_save(lex, error); if (l_isdigit(c)) { lex_unget_unsave(lex, c); goto out; } } else if (l_isdigit(c)) { do c = lex_get_save(lex, error); while (l_isdigit(c)); } else { lex_unget_unsave(lex, c); goto out; } if (!(lex->flags & JSON_DECODE_INT_AS_REAL) && c != '.' && c != 'E' && c != 'e') { json_int_t intval; lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); errno = 0; intval = json_strtoint(saved_text, &end, 10); if (errno == ERANGE) { if (intval < 0) error_set(error, lex, "too big negative integer"); else error_set(error, lex, "too big integer"); goto out; } assert(end == saved_text + lex->saved_text.length); lex->token = TOKEN_INTEGER; lex->value.integer = intval; return 0; } if (c == '.') { c = lex_get(lex, error); if (!l_isdigit(c)) { lex_unget(lex, c); goto out; } lex_save(lex, c); do c = lex_get_save(lex, error); while (l_isdigit(c)); } if (c == 'E' || c == 'e') { c = lex_get_save(lex, error); if (c == '+' || c == '-') c = lex_get_save(lex, error); if (!l_isdigit(c)) { lex_unget_unsave(lex, c); goto out; } do c = lex_get_save(lex, error); while (l_isdigit(c)); } lex_unget_unsave(lex, c); if (jsonp_strtod(&lex->saved_text, &doubleval)) { error_set(error, lex, "real number overflow"); goto out; } lex->token = TOKEN_REAL; lex->value.real = doubleval; return 0; out: return -1; } static int lex_scan(lex_t *lex, json_error_t *error) { int c; strbuffer_clear(&lex->saved_text); if (lex->token == TOKEN_STRING) lex_free_string(lex); do c = lex_get(lex, error); while (c == ' ' || c == '\t' || c == '\n' || c == '\r'); if (c == STREAM_STATE_EOF) { lex->token = TOKEN_EOF; goto out; } if (c == STREAM_STATE_ERROR) { lex->token = TOKEN_INVALID; goto out; } lex_save(lex, c); if (c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',') lex->token = c; else if (c == '"') lex_scan_string(lex, error); else if (l_isdigit(c) || c == '-') { if (lex_scan_number(lex, c, error)) goto out; } else if (l_isalpha(c)) { /* eat up the whole identifier for clearer error messages */ const char *saved_text; do c = lex_get_save(lex, error); while (l_isalpha(c)); lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); if (strcmp(saved_text, "true") == 0) lex->token = TOKEN_TRUE; else if (strcmp(saved_text, "false") == 0) lex->token = TOKEN_FALSE; else if (strcmp(saved_text, "null") == 0) lex->token = TOKEN_NULL; else lex->token = TOKEN_INVALID; } else { /* save the rest of the input UTF-8 sequence to get an error message of valid UTF-8 */ lex_save_cached(lex); lex->token = TOKEN_INVALID; } out: return lex->token; } static char *lex_steal_string(lex_t *lex, size_t *out_len) { char *result = NULL; if (lex->token == TOKEN_STRING) { result = lex->value.string.val; *out_len = lex->value.string.len; lex->value.string.val = NULL; lex->value.string.len = 0; } return result; } static int lex_init(lex_t *lex, get_func get, size_t flags, void *data) { stream_init(&lex->stream, get, data); if (strbuffer_init(&lex->saved_text)) return -1; lex->flags = flags; lex->token = TOKEN_INVALID; return 0; } static void lex_close(lex_t *lex) { if (lex->token == TOKEN_STRING) lex_free_string(lex); strbuffer_close(&lex->saved_text); } /*** parser ***/ static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error); static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error) { json_t *object = json_object(); if (!object) return NULL; lex_scan(lex, error); if (lex->token == '}') return object; while (1) { char *key; size_t len; json_t *value; if (lex->token != TOKEN_STRING) { error_set(error, lex, "string or '}' expected"); goto error; } key = lex_steal_string(lex, &len); if (!key) return NULL; if (memchr(key, '\0', len)) { jsonp_free(key); error_set(error, lex, "NUL byte in object key not supported"); goto error; } if (flags & JSON_REJECT_DUPLICATES) { if (json_object_get(object, key)) { jsonp_free(key); error_set(error, lex, "duplicate object key"); goto error; } } lex_scan(lex, error); if (lex->token != ':') { jsonp_free(key); error_set(error, lex, "':' expected"); goto error; } lex_scan(lex, error); value = parse_value(lex, flags, error); if (!value) { jsonp_free(key); goto error; } if (json_object_set_nocheck(object, key, value)) { jsonp_free(key); json_decref(value); goto error; } json_decref(value); jsonp_free(key); lex_scan(lex, error); if (lex->token != ',') break; lex_scan(lex, error); } if (lex->token != '}') { error_set(error, lex, "'}' expected"); goto error; } return object; error: json_decref(object); return NULL; } static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error) { json_t *array = json_array(); if (!array) return NULL; lex_scan(lex, error); if (lex->token == ']') return array; while (lex->token) { json_t *elem = parse_value(lex, flags, error); if (!elem) goto error; if (json_array_append(array, elem)) { json_decref(elem); goto error; } json_decref(elem); lex_scan(lex, error); if (lex->token != ',') break; lex_scan(lex, error); } if (lex->token != ']') { error_set(error, lex, "']' expected"); goto error; } return array; error: json_decref(array); return NULL; } static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error) { json_t *json; lex->depth++; if (lex->depth > JSON_PARSER_MAX_DEPTH) { error_set(error, lex, "maximum parsing depth reached"); return NULL; } switch (lex->token) { case TOKEN_STRING: { const char *value = lex->value.string.val; size_t len = lex->value.string.len; unsigned char * temp, *mem; const char *pos; if (len >= MEM_TOKEN_LEN && !memcmp(value, MEM_TOKEN, MEM_TOKEN_LEN)) { len = len - MEM_TOKEN_LEN; value = value + MEM_TOKEN_LEN; mem = temp = malloc(len / 2); if (!temp) return NULL; size_t num_read; for (pos = value; pos < value + len; pos += num_read, temp += num_read / 2) { if (pos + 0x20 < value + len) { sscanf(pos, "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx" "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx", temp, temp + 1, temp + 2, temp + 3, temp + 4, temp + 5, temp + 6, temp + 7, temp + 8, temp + 9, temp + 10, temp + 11, temp + 12, temp + 13, temp + 14, temp + 15, temp + 16, temp + 17, temp + 18, temp + 19, temp + 20, temp + 21, temp + 22, temp + 23, temp + 24, temp + 25, temp + 26, temp + 27, temp + 28, temp + 29, temp + 30, temp + 31); num_read = 0x40; } else { sscanf(pos, "%02hhx", temp); num_read = 2; } } json = json_mem_own(mem, len / 2); } else { if (!(flags & JSON_ALLOW_NUL)) { if (memchr(value, '\0', len)) { error_set(error, lex, "\\u0000 is not allowed without JSON_ALLOW_NUL"); return NULL; } } json = jsonp_stringn_nocheck_own(value, len); if (json) { lex->value.string.val = NULL; lex->value.string.len = 0; } } break; } case TOKEN_INTEGER: { json = json_integer(lex->value.integer); break; } case TOKEN_REAL: { json = json_real(lex->value.real); break; } case TOKEN_TRUE: json = json_true(); break; case TOKEN_FALSE: json = json_false(); break; case TOKEN_NULL: json = json_null(); break; case '{': json = parse_object(lex, flags, error); break; case '[': json = parse_array(lex, flags, error); break; case TOKEN_INVALID: error_set(error, lex, "invalid token"); return NULL; default: error_set(error, lex, "unexpected token"); return NULL; } if (!json) return NULL; lex->depth--; return json; } static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error) { json_t *result; lex->depth = 0; lex_scan(lex, error); if (!(flags & JSON_DECODE_ANY)) { if (lex->token != '[' && lex->token != '{') { error_set(error, lex, "'[' or '{' expected"); return NULL; } } result = parse_value(lex, flags, error); if (!result) return NULL; if (!(flags & JSON_DISABLE_EOF_CHECK)) { lex_scan(lex, error); if (lex->token != TOKEN_EOF) { error_set(error, lex, "end of file expected"); json_decref(result); return NULL; } } if (error) { /* Save the position even though there was no error */ error->position = (int)lex->stream.position; } return result; } typedef struct { const char *data; size_t pos; } string_data_t; static int string_get(void *data) { char c; string_data_t *stream = (string_data_t *)data; c = stream->data[stream->pos]; if (c == '\0') return EOF; else { stream->pos++; return (unsigned char)c; } } json_t *json_loads(const char *string, size_t flags, json_error_t *error) { lex_t lex; json_t *result; string_data_t stream_data; jsonp_error_init(error, ""); if (string == NULL) { error_set(error, NULL, "wrong arguments"); return NULL; } stream_data.data = string; stream_data.pos = 0; if (lex_init(&lex, string_get, flags, (void *)&stream_data)) return NULL; result = parse_json(&lex, flags, error); lex_close(&lex); return result; } typedef struct { const char *data; size_t len; size_t pos; } buffer_data_t; static int buffer_get(void *data) { char c; buffer_data_t *stream = data; if (stream->pos >= stream->len) return EOF; c = stream->data[stream->pos]; stream->pos++; return (unsigned char)c; } json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error) { lex_t lex; json_t *result; buffer_data_t stream_data; jsonp_error_init(error, ""); if (buffer == NULL) { error_set(error, NULL, "wrong arguments"); return NULL; } stream_data.data = buffer; stream_data.pos = 0; stream_data.len = buflen; if (lex_init(&lex, buffer_get, flags, (void *)&stream_data)) return NULL; result = parse_json(&lex, flags, error); lex_close(&lex); return result; } json_t *json_loadf(FILE *input, size_t flags, json_error_t *error) { lex_t lex; const char *source; json_t *result; if (input == stdin) source = ""; else source = ""; jsonp_error_init(error, source); if (input == NULL) { error_set(error, NULL, "wrong arguments"); return NULL; } if (lex_init(&lex, (get_func)fgetc, flags, input)) return NULL; result = parse_json(&lex, flags, error); lex_close(&lex); return result; } static int fd_get_func(int *fd) { uint8_t c; #ifdef HAVE_UNISTD_H if (read(*fd, &c, 1) == 1) return c; #endif return EOF; } json_t *json_loadfd(int input, size_t flags, json_error_t *error) { lex_t lex; const char *source; json_t *result; #ifdef HAVE_UNISTD_H if (input == STDIN_FILENO) source = ""; else #endif source = ""; jsonp_error_init(error, source); if (input < 0) { error_set(error, NULL, "wrong arguments"); return NULL; } if (lex_init(&lex, (get_func)fd_get_func, flags, &input)) return NULL; result = parse_json(&lex, flags, error); lex_close(&lex); return result; } json_t *json_load_file(const char *path, size_t flags, json_error_t *error) { json_t *result; FILE *fp; jsonp_error_init(error, path); if (path == NULL) { error_set(error, NULL, "wrong arguments"); return NULL; } fp = fopen(path, "rb"); if (!fp) { error_set(error, NULL, "unable to open %s: %s", path, strerror(errno)); return NULL; } result = json_loadf(fp, flags, error); fclose(fp); return result; } #define MAX_BUF_LEN 1024 typedef struct { char data[MAX_BUF_LEN]; size_t len; size_t pos; json_load_callback_t callback; void *arg; } callback_data_t; static int callback_get(void *data) { char c; callback_data_t *stream = data; if (stream->pos >= stream->len) { stream->pos = 0; stream->len = stream->callback(stream->data, MAX_BUF_LEN, stream->arg); if (stream->len == 0 || stream->len == (size_t)-1) return EOF; } c = stream->data[stream->pos]; stream->pos++; return (unsigned char)c; } json_t *json_load_callback(json_load_callback_t callback, void *arg, size_t flags, json_error_t *error) { lex_t lex; json_t *result; callback_data_t stream_data; memset(&stream_data, 0, sizeof(stream_data)); stream_data.callback = callback; stream_data.arg = arg; jsonp_error_init(error, ""); if (callback == NULL) { error_set(error, NULL, "wrong arguments"); return NULL; } if (lex_init(&lex, (get_func)callback_get, flags, &stream_data)) return NULL; result = parse_json(&lex, flags, error); lex_close(&lex); return result; } ================================================ FILE: jansson/lookup3.h ================================================ /* ------------------------------------------------------------------------------- lookup3.c, by Bob Jenkins, May 2006, Public Domain. These are functions for producing 32-bit hashes for hash table lookup. hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() are externally useful functions. Routines to test the hash are included if SELF_TEST is defined. You can use this free for any purpose. It's in the public domain. It has no warranty. You probably want to use hashlittle(). hashlittle() and hashbig() hash byte arrays. hashlittle() is is faster than hashbig() on little-endian machines. Intel and AMD are little-endian machines. On second thought, you probably want hashlittle2(), which is identical to hashlittle() except it returns two 32-bit hashes for the price of one. You could implement hashbig2() if you wanted but I haven't bothered here. If you want to find a hash of, say, exactly 7 integers, do a = i1; b = i2; c = i3; mix(a,b,c); a += i4; b += i5; c += i6; mix(a,b,c); a += i7; final(a,b,c); then use c as the hash value. If you have a variable length array of 4-byte integers to hash, use hashword(). If you have a byte array (like a character string), use hashlittle(). If you have several byte arrays, or a mix of things, see the comments above hashlittle(). Why is this so big? I read 12 bytes at a time into 3 4-byte integers, then mix those integers. This is fast (you can do a lot more thorough mixing with 12*3 instructions on 3 integers than you can with 3 instructions on 1 byte), but shoehorning those bytes into integers efficiently is messy. ------------------------------------------------------------------------------- */ #include #ifdef HAVE_CONFIG_H #include #endif #ifdef HAVE_STDINT_H #include /* defines uint32_t etc */ #endif #ifdef HAVE_SYS_PARAM_H #include /* attempt to define endianness */ #endif #ifdef HAVE_ENDIAN_H # include /* attempt to define endianness */ #endif /* * My best guess at if you are big-endian or little-endian. This may * need adjustment. */ #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ __BYTE_ORDER == __LITTLE_ENDIAN) || \ (defined(i386) || defined(__i386__) || defined(__i486__) || \ defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) # define HASH_LITTLE_ENDIAN 1 # define HASH_BIG_ENDIAN 0 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ __BYTE_ORDER == __BIG_ENDIAN) || \ (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) # define HASH_LITTLE_ENDIAN 0 # define HASH_BIG_ENDIAN 1 #else # define HASH_LITTLE_ENDIAN 0 # define HASH_BIG_ENDIAN 0 #endif #define hashsize(n) ((uint32_t)1<<(n)) #define hashmask(n) (hashsize(n)-1) #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) /* ------------------------------------------------------------------------------- mix -- mix 3 32-bit values reversibly. This is reversible, so any information in (a,b,c) before mix() is still in (a,b,c) after mix(). If four pairs of (a,b,c) inputs are run through mix(), or through mix() in reverse, there are at least 32 bits of the output that are sometimes the same for one pair and different for another pair. This was tested for: * pairs that differed by one bit, by two bits, in any combination of top bits of (a,b,c), or in any combination of bottom bits of (a,b,c). * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed the output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly produced by subtraction) look like a single 1-bit difference. * the base values were pseudorandom, all zero but one bit set, or all zero plus a counter that starts at zero. Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that satisfy this are 4 6 8 16 19 4 9 15 3 18 27 15 14 9 3 7 17 3 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for "differ" defined as + with a one-bit base and a two-bit delta. I used http://burtleburtle.net/bob/hash/avalanche.html to choose the operations, constants, and arrangements of the variables. This does not achieve avalanche. There are input bits of (a,b,c) that fail to affect some output bits of (a,b,c), especially of a. The most thoroughly mixed value is c, but it doesn't really even achieve avalanche in c. This allows some parallelism. Read-after-writes are good at doubling the number of bits affected, so the goal of mixing pulls in the opposite direction as the goal of parallelism. I did what I could. Rotates seem to cost as much as shifts on every machine I could lay my hands on, and rotates are much kinder to the top and bottom bits, so I used rotates. ------------------------------------------------------------------------------- */ #define mix(a,b,c) \ { \ a -= c; a ^= rot(c, 4); c += b; \ b -= a; b ^= rot(a, 6); a += c; \ c -= b; c ^= rot(b, 8); b += a; \ a -= c; a ^= rot(c,16); c += b; \ b -= a; b ^= rot(a,19); a += c; \ c -= b; c ^= rot(b, 4); b += a; \ } /* ------------------------------------------------------------------------------- final -- final mixing of 3 32-bit values (a,b,c) into c Pairs of (a,b,c) values differing in only a few bits will usually produce values of c that look totally different. This was tested for * pairs that differed by one bit, by two bits, in any combination of top bits of (a,b,c), or in any combination of bottom bits of (a,b,c). * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed the output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly produced by subtraction) look like a single 1-bit difference. * the base values were pseudorandom, all zero but one bit set, or all zero plus a counter that starts at zero. These constants passed: 14 11 25 16 4 14 24 12 14 25 16 4 14 24 and these came close: 4 8 15 26 3 22 24 10 8 15 26 3 22 24 11 8 15 26 3 22 24 ------------------------------------------------------------------------------- */ #define final(a,b,c) \ { \ c ^= b; c -= rot(b,14); \ a ^= c; a -= rot(c,11); \ b ^= a; b -= rot(a,25); \ c ^= b; c -= rot(b,16); \ a ^= c; a -= rot(c,4); \ b ^= a; b -= rot(a,14); \ c ^= b; c -= rot(b,24); \ } /* ------------------------------------------------------------------------------- hashlittle() -- hash a variable-length key into a 32-bit value k : the key (the unaligned variable-length array of bytes) length : the length of the key, counting by bytes initval : can be any 4-byte value Returns a 32-bit value. Every bit of the key affects every bit of the return value. Two keys differing by one or two bits will have totally different hash values. The best hash table sizes are powers of 2. There is no need to do mod a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. For example, if you need only 10 bits, do h = (h & hashmask(10)); In which case, the hash table should have hashsize(10) elements. If you are hashing n strings (uint8_t **)k, do it like this: for (i=0, h=0; i 12) { a += k[0]; b += k[1]; c += k[2]; mix(a,b,c); length -= 12; k += 3; } /*----------------------------- handle the last (probably partial) block */ /* * "k[2]&0xffffff" actually reads beyond the end of the string, but * then masks off the part it's not allowed to read. Because the * string is aligned, the masked-off tail is in the same word as the * rest of the string. Every machine with memory protection I've seen * does it on word boundaries, so is OK with this. But VALGRIND will * still catch it and complain. The masking trick does make the hash * noticably faster for short strings (like English words). */ #ifndef NO_MASKING_TRICK switch(length) { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=k[1]&0xffffff; a+=k[0]; break; case 6 : b+=k[1]&0xffff; a+=k[0]; break; case 5 : b+=k[1]&0xff; a+=k[0]; break; case 4 : a+=k[0]; break; case 3 : a+=k[0]&0xffffff; break; case 2 : a+=k[0]&0xffff; break; case 1 : a+=k[0]&0xff; break; case 0 : return c; /* zero length strings require no mixing */ } #else /* make valgrind happy */ k8 = (const uint8_t *)k; switch(length) { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ case 9 : c+=k8[8]; /* fall through */ case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ case 5 : b+=k8[4]; /* fall through */ case 4 : a+=k[0]; break; case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ case 1 : a+=k8[0]; break; case 0 : return c; } #endif /* !valgrind */ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ const uint8_t *k8; /*--------------- all but last block: aligned reads and different mixing */ while (length > 12) { a += k[0] + (((uint32_t)k[1])<<16); b += k[2] + (((uint32_t)k[3])<<16); c += k[4] + (((uint32_t)k[5])<<16); mix(a,b,c); length -= 12; k += 6; } /*----------------------------- handle the last (probably partial) block */ k8 = (const uint8_t *)k; switch(length) { case 12: c+=k[4]+(((uint32_t)k[5])<<16); b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ case 10: c+=k[4]; b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 9 : c+=k8[8]; /* fall through */ case 8 : b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ case 6 : b+=k[2]; a+=k[0]+(((uint32_t)k[1])<<16); break; case 5 : b+=k8[4]; /* fall through */ case 4 : a+=k[0]+(((uint32_t)k[1])<<16); break; case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ case 2 : a+=k[0]; break; case 1 : a+=k8[0]; break; case 0 : return c; /* zero length requires no mixing */ } } else { /* need to read the key one byte at a time */ const uint8_t *k = (const uint8_t *)key; /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { a += k[0]; a += ((uint32_t)k[1])<<8; a += ((uint32_t)k[2])<<16; a += ((uint32_t)k[3])<<24; b += k[4]; b += ((uint32_t)k[5])<<8; b += ((uint32_t)k[6])<<16; b += ((uint32_t)k[7])<<24; c += k[8]; c += ((uint32_t)k[9])<<8; c += ((uint32_t)k[10])<<16; c += ((uint32_t)k[11])<<24; mix(a,b,c); length -= 12; k += 12; } /*-------------------------------- last block: affect all 32 bits of (c) */ switch(length) /* all the case statements fall through */ { case 12: c+=((uint32_t)k[11])<<24; case 11: c+=((uint32_t)k[10])<<16; case 10: c+=((uint32_t)k[9])<<8; case 9 : c+=k[8]; case 8 : b+=((uint32_t)k[7])<<24; case 7 : b+=((uint32_t)k[6])<<16; case 6 : b+=((uint32_t)k[5])<<8; case 5 : b+=k[4]; case 4 : a+=((uint32_t)k[3])<<24; case 3 : a+=((uint32_t)k[2])<<16; case 2 : a+=((uint32_t)k[1])<<8; case 1 : a+=k[0]; break; case 0 : return c; } } final(a,b,c); return c; } ================================================ FILE: jansson/memory.c ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * Copyright (c) 2011-2012 Basile Starynkevitch * * Jansson is free software; you can redistribute it and/or modify it * under the terms of the MIT license. See MIT for details. */ #include #include #include "jansson.h" #include "jansson_private.h" /* C89 allows these to be macros */ #undef malloc #undef free /* memory function pointers */ static json_malloc_t do_malloc = malloc; static json_free_t do_free = free; void *jsonp_malloc(size_t size) { if(!size) return NULL; return (*do_malloc)(size); } void jsonp_free(void *ptr) { if(!ptr) return; (*do_free)(ptr); } char *jsonp_strdup(const char *str) { return jsonp_strndup(str, strlen(str)); } char *jsonp_strndup(const char *str, size_t len) { char *new_str; new_str = jsonp_malloc(len + 1); if(!new_str) return NULL; memcpy(new_str, str, len); new_str[len] = '\0'; return new_str; } void json_set_alloc_funcs(json_malloc_t malloc_fn, json_free_t free_fn) { do_malloc = malloc_fn; do_free = free_fn; } void json_get_alloc_funcs(json_malloc_t *malloc_fn, json_free_t *free_fn) { if (malloc_fn) *malloc_fn = do_malloc; if (free_fn) *free_fn = do_free; } ================================================ FILE: jansson/pack_unpack.c ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * Copyright (c) 2011-2012 Graeme Smecher * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #include #include "jansson.h" #include "jansson_private.h" #include "utf.h" typedef struct { int line; int column; size_t pos; char token; } token_t; typedef struct { const char *start; const char *fmt; token_t prev_token; token_t token; token_t next_token; json_error_t *error; size_t flags; int line; int column; size_t pos; } scanner_t; #define token(scanner) ((scanner)->token.token) static const char * const type_names[] = { "object", "array", "string", "mem", "integer", "real", "true", "false", "null" }; #define type_name(x) type_names[json_typeof(x)] static const char unpack_value_starters[] = "{[siIbfFOon"; static void scanner_init(scanner_t *s, json_error_t *error, size_t flags, const char *fmt) { s->error = error; s->flags = flags; s->fmt = s->start = fmt; memset(&s->prev_token, 0, sizeof(token_t)); memset(&s->token, 0, sizeof(token_t)); memset(&s->next_token, 0, sizeof(token_t)); s->line = 1; s->column = 0; s->pos = 0; } static void next_token(scanner_t *s) { const char *t; s->prev_token = s->token; if(s->next_token.line) { s->token = s->next_token; s->next_token.line = 0; return; } t = s->fmt; s->column++; s->pos++; /* skip space and ignored chars */ while(*t == ' ' || *t == '\t' || *t == '\n' || *t == ',' || *t == ':') { if(*t == '\n') { s->line++; s->column = 1; } else s->column++; s->pos++; t++; } s->token.token = *t; s->token.line = s->line; s->token.column = s->column; s->token.pos = s->pos; t++; s->fmt = t; } static void prev_token(scanner_t *s) { s->next_token = s->token; s->token = s->prev_token; } static void set_error(scanner_t *s, const char *source, const char *fmt, ...) { va_list ap; va_start(ap, fmt); jsonp_error_vset(s->error, s->token.line, s->token.column, s->token.pos, fmt, ap); jsonp_error_set_source(s->error, source); va_end(ap); } static json_t *pack(scanner_t *s, va_list *ap); /* ours will be set to 1 if jsonp_free() must be called for the result afterwards */ static char *read_string(scanner_t *s, va_list *ap, const char *purpose, size_t *out_len, int *ours) { char t; strbuffer_t strbuff; const char *str; size_t length; next_token(s); t = token(s); prev_token(s); if(t != '#' && t != '%' && t != '+') { /* Optimize the simple case */ str = va_arg(*ap, const char *); if(!str) { set_error(s, "", "NULL string argument"); return NULL; } length = strlen(str); if(!utf8_check_string(str, length)) { set_error(s, "", "Invalid UTF-8 %s", purpose); return NULL; } *out_len = length; *ours = 0; return (char *)str; } strbuffer_init(&strbuff); while(1) { str = va_arg(*ap, const char *); if(!str) { set_error(s, "", "NULL string argument"); strbuffer_close(&strbuff); return NULL; } next_token(s); if(token(s) == '#') { length = va_arg(*ap, int); } else if(token(s) == '%') { length = va_arg(*ap, size_t); } else { prev_token(s); length = strlen(str); } if(strbuffer_append_bytes(&strbuff, str, length) == -1) { set_error(s, "", "Out of memory"); strbuffer_close(&strbuff); return NULL; } next_token(s); if(token(s) != '+') { prev_token(s); break; } } if(!utf8_check_string(strbuff.value, strbuff.length)) { set_error(s, "", "Invalid UTF-8 %s", purpose); strbuffer_close(&strbuff); return NULL; } *out_len = strbuff.length; *ours = 1; return strbuffer_steal_value(&strbuff); } static json_t *pack_object(scanner_t *s, va_list *ap) { json_t *object = json_object(); next_token(s); while(token(s) != '}') { char *key; size_t len; int ours; json_t *value; if(!token(s)) { set_error(s, "", "Unexpected end of format string"); goto error; } if(token(s) != 's') { set_error(s, "", "Expected format 's', got '%c'", token(s)); goto error; } key = read_string(s, ap, "object key", &len, &ours); if(!key) goto error; next_token(s); value = pack(s, ap); if(!value) { if(ours) jsonp_free(key); if(strchr("soO", token(s)) && s->next_token.token == '*') { next_token(s); next_token(s); continue; } goto error; } if(json_object_set_new_nocheck(object, key, value)) { set_error(s, "", "Unable to add key \"%s\"", key); if(ours) jsonp_free(key); goto error; } if(ours) jsonp_free(key); if(strchr("soO", token(s)) && s->next_token.token == '*') next_token(s); next_token(s); } return object; error: json_decref(object); return NULL; } static json_t *pack_array(scanner_t *s, va_list *ap) { json_t *array = json_array(); next_token(s); while(token(s) != ']') { json_t *value; if(!token(s)) { set_error(s, "", "Unexpected end of format string"); goto error; } value = pack(s, ap); if(!value) { if(strchr("soO", token(s)) && s->next_token.token == '*') { next_token(s); next_token(s); continue; } goto error; } if(json_array_append_new(array, value)) { set_error(s, "", "Unable to append to array"); goto error; } if(strchr("soO", token(s)) && s->next_token.token == '*') next_token(s); next_token(s); } return array; error: json_decref(array); return NULL; } static json_t *pack_string(scanner_t *s, va_list *ap) { char *str; size_t len; int ours; int nullable; next_token(s); nullable = token(s) == '?'; if (!nullable) prev_token(s); str = read_string(s, ap, "string", &len, &ours); if (!str) { return nullable ? json_null() : NULL; } else if (ours) { return jsonp_stringn_nocheck_own(str, len); } else { return json_stringn_nocheck(str, len); } } static json_t *pack(scanner_t *s, va_list *ap) { switch(token(s)) { case '{': return pack_object(s, ap); case '[': return pack_array(s, ap); case 's': /* string */ return pack_string(s, ap); case 'n': /* null */ return json_null(); case 'b': /* boolean */ return va_arg(*ap, int) ? json_true() : json_false(); case 'i': /* integer from int */ return json_integer(va_arg(*ap, int)); case 'I': /* integer from json_int_t */ return json_integer(va_arg(*ap, json_int_t)); case 'f': /* real */ return json_real(va_arg(*ap, double)); case 'O': /* a json_t object; increments refcount */ { int nullable; json_t *json; next_token(s); nullable = token(s) == '?'; if (!nullable) prev_token(s); json = va_arg(*ap, json_t *); if (!json && nullable) { return json_null(); } else { return json_incref(json); } } case 'o': /* a json_t object; doesn't increment refcount */ { int nullable; json_t *json; next_token(s); nullable = token(s) == '?'; if (!nullable) prev_token(s); json = va_arg(*ap, json_t *); if (!json && nullable) { return json_null(); } else { return json; } } default: set_error(s, "", "Unexpected format character '%c'", token(s)); return NULL; } } static int unpack(scanner_t *s, json_t *root, va_list *ap); static int unpack_object(scanner_t *s, json_t *root, va_list *ap) { int ret = -1; int strict = 0; int gotopt = 0; /* Use a set (emulated by a hashtable) to check that all object keys are accessed. Checking that the correct number of keys were accessed is not enough, as the same key can be unpacked multiple times. */ hashtable_t key_set; if(hashtable_init(&key_set)) { set_error(s, "", "Out of memory"); return -1; } if(root && !json_is_object(root)) { set_error(s, "", "Expected object, got %s", type_name(root)); goto out; } next_token(s); while(token(s) != '}') { const char *key; json_t *value; int opt = 0; if(strict != 0) { set_error(s, "", "Expected '}' after '%c', got '%c'", (strict == 1 ? '!' : '*'), token(s)); goto out; } if(!token(s)) { set_error(s, "", "Unexpected end of format string"); goto out; } if(token(s) == '!' || token(s) == '*') { strict = (token(s) == '!' ? 1 : -1); next_token(s); continue; } if(token(s) != 's') { set_error(s, "", "Expected format 's', got '%c'", token(s)); goto out; } key = va_arg(*ap, const char *); if(!key) { set_error(s, "", "NULL object key"); goto out; } next_token(s); if(token(s) == '?') { opt = gotopt = 1; next_token(s); } if(!root) { /* skipping */ value = NULL; } else { value = json_object_get(root, key); if(!value && !opt) { set_error(s, "", "Object item not found: %s", key); goto out; } } if(unpack(s, value, ap)) goto out; hashtable_set(&key_set, key, json_null()); next_token(s); } if(strict == 0 && (s->flags & JSON_STRICT)) strict = 1; if(root && strict == 1) { /* We need to check that all non optional items have been parsed */ const char *key; int have_unrecognized_keys = 0; strbuffer_t unrecognized_keys; json_t *value; long unpacked = 0; if (gotopt) { /* We have optional keys, we need to iter on each key */ json_object_foreach(root, key, value) { if(!hashtable_get(&key_set, key)) { unpacked++; /* Save unrecognized keys for the error message */ if (!have_unrecognized_keys) { strbuffer_init(&unrecognized_keys); have_unrecognized_keys = 1; } else { strbuffer_append_bytes(&unrecognized_keys, ", ", 2); } strbuffer_append_bytes(&unrecognized_keys, key, strlen(key)); } } } else { /* No optional keys, we can just compare the number of items */ unpacked = (long)json_object_size(root) - (long)key_set.size; } if (unpacked) { if (!gotopt) { /* Save unrecognized keys for the error message */ json_object_foreach(root, key, value) { if(!hashtable_get(&key_set, key)) { if (!have_unrecognized_keys) { strbuffer_init(&unrecognized_keys); have_unrecognized_keys = 1; } else { strbuffer_append_bytes(&unrecognized_keys, ", ", 2); } strbuffer_append_bytes(&unrecognized_keys, key, strlen(key)); } } } set_error(s, "", "%li object item(s) left unpacked: %s", unpacked, strbuffer_value(&unrecognized_keys)); strbuffer_close(&unrecognized_keys); goto out; } } ret = 0; out: hashtable_close(&key_set); return ret; } static int unpack_array(scanner_t *s, json_t *root, va_list *ap) { size_t i = 0; int strict = 0; if(root && !json_is_array(root)) { set_error(s, "", "Expected array, got %s", type_name(root)); return -1; } next_token(s); while(token(s) != ']') { json_t *value; if(strict != 0) { set_error(s, "", "Expected ']' after '%c', got '%c'", (strict == 1 ? '!' : '*'), token(s)); return -1; } if(!token(s)) { set_error(s, "", "Unexpected end of format string"); return -1; } if(token(s) == '!' || token(s) == '*') { strict = (token(s) == '!' ? 1 : -1); next_token(s); continue; } if(!strchr(unpack_value_starters, token(s))) { set_error(s, "", "Unexpected format character '%c'", token(s)); return -1; } if(!root) { /* skipping */ value = NULL; } else { value = json_array_get(root, i); if(!value) { set_error(s, "", "Array index %lu out of range", (unsigned long)i); return -1; } } if(unpack(s, value, ap)) return -1; next_token(s); i++; } if(strict == 0 && (s->flags & JSON_STRICT)) strict = 1; if(root && strict == 1 && i != json_array_size(root)) { long diff = (long)json_array_size(root) - (long)i; set_error(s, "", "%li array item(s) left unpacked", diff); return -1; } return 0; } static int unpack(scanner_t *s, json_t *root, va_list *ap) { switch(token(s)) { case '{': return unpack_object(s, root, ap); case '[': return unpack_array(s, root, ap); case 's': if(root && !json_is_string(root)) { set_error(s, "", "Expected string, got %s", type_name(root)); return -1; } if(!(s->flags & JSON_VALIDATE_ONLY)) { const char **str_target; size_t *len_target = NULL; str_target = va_arg(*ap, const char **); if(!str_target) { set_error(s, "", "NULL string argument"); return -1; } next_token(s); if(token(s) == '%') { len_target = va_arg(*ap, size_t *); if(!len_target) { set_error(s, "", "NULL string length argument"); return -1; } } else prev_token(s); if(root) { *str_target = json_string_value(root); if(len_target) *len_target = json_string_length(root); } } return 0; case 'i': if(root && !json_is_integer(root)) { set_error(s, "", "Expected integer, got %s", type_name(root)); return -1; } if(!(s->flags & JSON_VALIDATE_ONLY)) { int *target = va_arg(*ap, int*); if(root) *target = (int)json_integer_value(root); } return 0; case 'I': if(root && !json_is_integer(root)) { set_error(s, "", "Expected integer, got %s", type_name(root)); return -1; } if(!(s->flags & JSON_VALIDATE_ONLY)) { json_int_t *target = va_arg(*ap, json_int_t*); if(root) *target = json_integer_value(root); } return 0; case 'b': if(root && !json_is_boolean(root)) { set_error(s, "", "Expected true or false, got %s", type_name(root)); return -1; } if(!(s->flags & JSON_VALIDATE_ONLY)) { int *target = va_arg(*ap, int*); if(root) *target = json_is_true(root); } return 0; case 'f': if(root && !json_is_real(root)) { set_error(s, "", "Expected real, got %s", type_name(root)); return -1; } if(!(s->flags & JSON_VALIDATE_ONLY)) { double *target = va_arg(*ap, double*); if(root) *target = json_real_value(root); } return 0; case 'F': if(root && !json_is_number(root)) { set_error(s, "", "Expected real or integer, got %s", type_name(root)); return -1; } if(!(s->flags & JSON_VALIDATE_ONLY)) { double *target = va_arg(*ap, double*); if(root) *target = json_number_value(root); } return 0; case 'O': if(root && !(s->flags & JSON_VALIDATE_ONLY)) json_incref(root); /* Fall through */ case 'o': if(!(s->flags & JSON_VALIDATE_ONLY)) { json_t **target = va_arg(*ap, json_t**); if(root) *target = root; } return 0; case 'n': /* Never assign, just validate */ if(root && !json_is_null(root)) { set_error(s, "", "Expected null, got %s", type_name(root)); return -1; } return 0; default: set_error(s, "", "Unexpected format character '%c'", token(s)); return -1; } } json_t *json_vpack_ex(json_error_t *error, size_t flags, const char *fmt, va_list ap) { scanner_t s; va_list ap_copy; json_t *value; if(!fmt || !*fmt) { jsonp_error_init(error, ""); jsonp_error_set(error, -1, -1, 0, "NULL or empty format string"); return NULL; } jsonp_error_init(error, NULL); scanner_init(&s, error, flags, fmt); next_token(&s); va_copy(ap_copy, ap); value = pack(&s, &ap_copy); va_end(ap_copy); if(!value) return NULL; next_token(&s); if(token(&s)) { json_decref(value); set_error(&s, "", "Garbage after format string"); return NULL; } return value; } json_t *json_pack_ex(json_error_t *error, size_t flags, const char *fmt, ...) { json_t *value; va_list ap; va_start(ap, fmt); value = json_vpack_ex(error, flags, fmt, ap); va_end(ap); return value; } json_t *json_pack(const char *fmt, ...) { json_t *value; va_list ap; va_start(ap, fmt); value = json_vpack_ex(NULL, 0, fmt, ap); va_end(ap); return value; } int json_vunpack_ex(json_t *root, json_error_t *error, size_t flags, const char *fmt, va_list ap) { scanner_t s; va_list ap_copy; if(!root) { jsonp_error_init(error, ""); jsonp_error_set(error, -1, -1, 0, "NULL root value"); return -1; } if(!fmt || !*fmt) { jsonp_error_init(error, ""); jsonp_error_set(error, -1, -1, 0, "NULL or empty format string"); return -1; } jsonp_error_init(error, NULL); scanner_init(&s, error, flags, fmt); next_token(&s); va_copy(ap_copy, ap); if(unpack(&s, root, &ap_copy)) { va_end(ap_copy); return -1; } va_end(ap_copy); next_token(&s); if(token(&s)) { set_error(&s, "", "Garbage after format string"); return -1; } return 0; } int json_unpack_ex(json_t *root, json_error_t *error, size_t flags, const char *fmt, ...) { int ret; va_list ap; va_start(ap, fmt); ret = json_vunpack_ex(root, error, flags, fmt, ap); va_end(ap); return ret; } int json_unpack(json_t *root, const char *fmt, ...) { int ret; va_list ap; va_start(ap, fmt); ret = json_vunpack_ex(root, NULL, 0, fmt, ap); va_end(ap); return ret; } ================================================ FILE: jansson/strbuffer.c ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #include #include "jansson_private.h" #include "strbuffer.h" #define STRBUFFER_MIN_SIZE 16 #define STRBUFFER_FACTOR 2 #define STRBUFFER_SIZE_MAX ((size_t)-1) int strbuffer_init(strbuffer_t *strbuff) { strbuff->size = STRBUFFER_MIN_SIZE; strbuff->length = 0; strbuff->value = jsonp_malloc(strbuff->size); if(!strbuff->value) return -1; /* initialize to empty */ strbuff->value[0] = '\0'; return 0; } void strbuffer_close(strbuffer_t *strbuff) { if(strbuff->value) jsonp_free(strbuff->value); strbuff->size = 0; strbuff->length = 0; strbuff->value = NULL; } void strbuffer_clear(strbuffer_t *strbuff) { strbuff->length = 0; strbuff->value[0] = '\0'; } const char *strbuffer_value(const strbuffer_t *strbuff) { return strbuff->value; } char *strbuffer_steal_value(strbuffer_t *strbuff) { char *result = strbuff->value; strbuff->value = NULL; return result; } int strbuffer_append_byte(strbuffer_t *strbuff, char byte) { return strbuffer_append_bytes(strbuff, &byte, 1); } int strbuffer_append_bytes(strbuffer_t *strbuff, const char *data, size_t size) { if(size >= strbuff->size - strbuff->length) { size_t new_size; char *new_value; /* avoid integer overflow */ if (strbuff->size > STRBUFFER_SIZE_MAX / STRBUFFER_FACTOR || size > STRBUFFER_SIZE_MAX - 1 || strbuff->length > STRBUFFER_SIZE_MAX - 1 - size) return -1; new_size = max(strbuff->size * STRBUFFER_FACTOR, strbuff->length + size + 1); new_value = jsonp_malloc(new_size); if(!new_value) return -1; memcpy(new_value, strbuff->value, strbuff->length); jsonp_free(strbuff->value); strbuff->value = new_value; strbuff->size = new_size; } memcpy(strbuff->value + strbuff->length, data, size); strbuff->length += size; strbuff->value[strbuff->length] = '\0'; return 0; } char strbuffer_pop(strbuffer_t *strbuff) { if(strbuff->length > 0) { char c = strbuff->value[--strbuff->length]; strbuff->value[strbuff->length] = '\0'; return c; } else return '\0'; } ================================================ FILE: jansson/strbuffer.h ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #ifndef STRBUFFER_H #define STRBUFFER_H #include typedef struct { char *value; size_t length; /* bytes used */ size_t size; /* bytes allocated */ } strbuffer_t; int strbuffer_init(strbuffer_t *strbuff); void strbuffer_close(strbuffer_t *strbuff); void strbuffer_clear(strbuffer_t *strbuff); const char *strbuffer_value(const strbuffer_t *strbuff); /* Steal the value and close the strbuffer */ char *strbuffer_steal_value(strbuffer_t *strbuff); int strbuffer_append_byte(strbuffer_t *strbuff, char byte); int strbuffer_append_bytes(strbuffer_t *strbuff, const char *data, size_t size); char strbuffer_pop(strbuffer_t *strbuff); #endif ================================================ FILE: jansson/strconv.c ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #include #include #include #include #include #ifdef __MINGW32__ #undef __NO_ISOCEXT /* ensure stdlib.h will declare prototypes for mingw own 'strtod' replacement, called '__strtod' */ #endif #include "jansson_private.h" #include "strbuffer.h" /* need jansson_private_config.h to get the correct snprintf */ #ifdef HAVE_CONFIG_H #include #endif #ifdef __MINGW32__ #define strtod __strtod #endif #if JSON_HAVE_LOCALECONV #include /* - This code assumes that the decimal separator is exactly one character. - If setlocale() is called by another thread between the call to localeconv() and the call to sprintf() or strtod(), the result may be wrong. setlocale() is not thread-safe and should not be used this way. Multi-threaded programs should use uselocale() instead. */ static void to_locale(strbuffer_t *strbuffer) { const char *point; char *pos; point = localeconv()->decimal_point; if(*point == '.') { /* No conversion needed */ return; } pos = strchr(strbuffer->value, '.'); if(pos) *pos = *point; } static void from_locale(char *buffer) { const char *point; char *pos; point = localeconv()->decimal_point; if(*point == '.') { /* No conversion needed */ return; } pos = strchr(buffer, *point); if(pos) *pos = '.'; } #endif int jsonp_strtod(strbuffer_t *strbuffer, double *out) { double value; char *end; #if JSON_HAVE_LOCALECONV to_locale(strbuffer); #endif errno = 0; value = strtod(strbuffer->value, &end); assert(end == strbuffer->value + strbuffer->length); if((value == HUGE_VAL || value == -HUGE_VAL) && errno == ERANGE) { /* Overflow */ return -1; } *out = value; return 0; } int jsonp_dtostr(char *buffer, size_t size, double value, int precision) { int ret; char *start, *end; size_t length; if (precision == 0) precision = 17; ret = snprintf(buffer, size, "%.*g", precision, value); if(ret < 0) return -1; length = (size_t)ret; if(length >= size) return -1; #if JSON_HAVE_LOCALECONV from_locale(buffer); #endif /* Make sure there's a dot or 'e' in the output. Otherwise a real is converted to an integer when decoding */ if(strchr(buffer, '.') == NULL && strchr(buffer, 'e') == NULL) { if(length + 3 >= size) { /* No space to append ".0" */ return -1; } buffer[length] = '.'; buffer[length + 1] = '0'; buffer[length + 2] = '\0'; length += 2; } /* Remove leading '+' from positive exponent. Also remove leading zeros from exponents (added by some printf() implementations) */ start = strchr(buffer, 'e'); if(start) { start++; end = start + 1; if(*start == '-') start++; while(*end == '0') end++; if(end != start) { memmove(start, end, length - (size_t)(end - buffer)); length -= (size_t)(end - start); } } return (int)length; } ================================================ FILE: jansson/utf.c ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #include #include "utf.h" int utf8_encode(int32_t codepoint, char *buffer, size_t *size) { if(codepoint < 0) return -1; else if(codepoint < 0x80) { buffer[0] = (char)codepoint; *size = 1; } else if(codepoint < 0x800) { buffer[0] = 0xC0 + ((codepoint & 0x7C0) >> 6); buffer[1] = 0x80 + ((codepoint & 0x03F)); *size = 2; } else if(codepoint < 0x10000) { buffer[0] = 0xE0 + ((codepoint & 0xF000) >> 12); buffer[1] = 0x80 + ((codepoint & 0x0FC0) >> 6); buffer[2] = 0x80 + ((codepoint & 0x003F)); *size = 3; } else if(codepoint <= 0x10FFFF) { buffer[0] = 0xF0 + ((codepoint & 0x1C0000) >> 18); buffer[1] = 0x80 + ((codepoint & 0x03F000) >> 12); buffer[2] = 0x80 + ((codepoint & 0x000FC0) >> 6); buffer[3] = 0x80 + ((codepoint & 0x00003F)); *size = 4; } else return -1; return 0; } size_t utf8_check_first(char byte) { unsigned char u = (unsigned char)byte; if(u < 0x80) return 1; if(0x80 <= u && u <= 0xBF) { /* second, third or fourth byte of a multi-byte sequence, i.e. a "continuation byte" */ return 0; } else if(u == 0xC0 || u == 0xC1) { /* overlong encoding of an ASCII byte */ return 0; } else if(0xC2 <= u && u <= 0xDF) { /* 2-byte sequence */ return 2; } else if(0xE0 <= u && u <= 0xEF) { /* 3-byte sequence */ return 3; } else if(0xF0 <= u && u <= 0xF4) { /* 4-byte sequence */ return 4; } else { /* u >= 0xF5 */ /* Restricted (start of 4-, 5- or 6-byte sequence) or invalid UTF-8 */ return 0; } } size_t utf8_check_full(const char *buffer, size_t size, int32_t *codepoint) { size_t i; int32_t value = 0; unsigned char u = (unsigned char)buffer[0]; if(size == 2) { value = u & 0x1F; } else if(size == 3) { value = u & 0xF; } else if(size == 4) { value = u & 0x7; } else return 0; for(i = 1; i < size; i++) { u = (unsigned char)buffer[i]; if(u < 0x80 || u > 0xBF) { /* not a continuation byte */ return 0; } value = (value << 6) + (u & 0x3F); } if(value > 0x10FFFF) { /* not in Unicode range */ return 0; } else if(0xD800 <= value && value <= 0xDFFF) { /* invalid code point (UTF-16 surrogate halves) */ return 0; } else if((size == 2 && value < 0x80) || (size == 3 && value < 0x800) || (size == 4 && value < 0x10000)) { /* overlong encoding */ return 0; } if(codepoint) *codepoint = value; return 1; } const char *utf8_iterate(const char *buffer, size_t bufsize, int32_t *codepoint) { size_t count; int32_t value; if(!bufsize) return buffer; count = utf8_check_first(buffer[0]); if(count <= 0) return NULL; if(count == 1) value = (unsigned char)buffer[0]; else { if(count > bufsize || !utf8_check_full(buffer, count, &value)) return NULL; } if(codepoint) *codepoint = value; return buffer + count; } int utf8_check_string(const char *string, size_t length) { size_t i; for(i = 0; i < length; i++) { size_t count = utf8_check_first(string[i]); if(count == 0) return 0; else if(count > 1) { if(count > length - i) return 0; if(!utf8_check_full(&string[i], count, NULL)) return 0; i += count - 1; } } return 1; } ================================================ FILE: jansson/utf.h ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #ifndef UTF_H #define UTF_H #ifdef HAVE_CONFIG_H #include "jansson_private_config.h" #endif #include int utf8_encode(int32_t codepoint, char *buffer, size_t *size); size_t utf8_check_first(char byte); size_t utf8_check_full(const char *buffer, size_t size, int32_t *codepoint); const char *utf8_iterate(const char *buffer, size_t size, int32_t *codepoint); int utf8_check_string(const char *string, size_t length); #endif ================================================ FILE: jansson/value.c ================================================ /* * Copyright (c) 2009-2016 Petri Lehtinen * * Jansson is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See MIT for details. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #ifdef HAVE_STDINT_H #include #endif #include "jansson.h" #include "hashtable.h" #include "jansson_private.h" #include "utf.h" /* Work around nonstandard isnan() and isinf() implementations */ #ifndef isnan #ifndef __sun static JSON_INLINE int isnan(double x) { return x != x; } #endif #endif #ifndef isinf static JSON_INLINE int isinf(double x) { return !isnan(x) && isnan(x - x); } #endif static JSON_INLINE void json_init(json_t *json, json_type type) { json->type = type; json->refcount = 1; } /*** object ***/ extern volatile uint32_t hashtable_seed; json_t *json_object(void) { json_object_t *object = jsonp_malloc(sizeof(json_object_t)); if (!object) return NULL; if (!hashtable_seed) { /* Autoseed */ json_object_seed(0); } json_init(&object->json, JSON_OBJECT); if (hashtable_init(&object->hashtable)) { jsonp_free(object); return NULL; } object->visited = 0; return &object->json; } static void json_delete_object(json_object_t *object) { hashtable_close(&object->hashtable); jsonp_free(object); } size_t json_object_size(const json_t *json) { json_object_t *object; if (!json_is_object(json)) return 0; object = json_to_object(json); return object->hashtable.size; } json_t *json_object_get(const json_t *json, const char *key) { json_object_t *object; if (!key || !json_is_object(json)) return NULL; object = json_to_object(json); return hashtable_get(&object->hashtable, key); } int json_object_set_new_nocheck(json_t *json, const char *key, json_t *value) { json_object_t *object; if (!value) return -1; if (!key || !json_is_object(json) || json == value) { json_decref(value); return -1; } object = json_to_object(json); if (hashtable_set(&object->hashtable, key, value)) { json_decref(value); return -1; } return 0; } int json_object_set_new(json_t *json, const char *key, json_t *value) { if (!key || !utf8_check_string(key, strlen(key))) { json_decref(value); return -1; } return json_object_set_new_nocheck(json, key, value); } int json_object_del(json_t *json, const char *key) { json_object_t *object; if (!key || !json_is_object(json)) return -1; object = json_to_object(json); return hashtable_del(&object->hashtable, key); } int json_object_clear(json_t *json) { json_object_t *object; if (!json_is_object(json)) return -1; object = json_to_object(json); hashtable_clear(&object->hashtable); return 0; } int json_object_update(json_t *object, json_t *other) { const char *key; json_t *value; if (!json_is_object(object) || !json_is_object(other)) return -1; json_object_foreach(other, key, value) { if (json_object_set_nocheck(object, key, value)) return -1; } return 0; } int json_object_update_existing(json_t *object, json_t *other) { const char *key; json_t *value; if (!json_is_object(object) || !json_is_object(other)) return -1; json_object_foreach(other, key, value) { if (json_object_get(object, key)) json_object_set_nocheck(object, key, value); } return 0; } int json_object_update_missing(json_t *object, json_t *other) { const char *key; json_t *value; if (!json_is_object(object) || !json_is_object(other)) return -1; json_object_foreach(other, key, value) { if (!json_object_get(object, key)) json_object_set_nocheck(object, key, value); } return 0; } void *json_object_iter(json_t *json) { json_object_t *object; if (!json_is_object(json)) return NULL; object = json_to_object(json); return hashtable_iter(&object->hashtable); } void *json_object_iter_at(json_t *json, const char *key) { json_object_t *object; if (!key || !json_is_object(json)) return NULL; object = json_to_object(json); return hashtable_iter_at(&object->hashtable, key); } void *json_object_iter_next(json_t *json, void *iter) { json_object_t *object; if (!json_is_object(json) || iter == NULL) return NULL; object = json_to_object(json); return hashtable_iter_next(&object->hashtable, iter); } const char *json_object_iter_key(void *iter) { if (!iter) return NULL; return hashtable_iter_key(iter); } json_t *json_object_iter_value(void *iter) { if (!iter) return NULL; return (json_t *)hashtable_iter_value(iter); } int json_object_iter_set_new(json_t *json, void *iter, json_t *value) { if (!json_is_object(json) || !iter || !value) return -1; hashtable_iter_set(iter, value); return 0; } void *json_object_key_to_iter(const char *key) { if (!key) return NULL; return hashtable_key_to_iter(key); } static int json_object_equal(json_t *object1, json_t *object2) { const char *key; json_t *value1, *value2; if (json_object_size(object1) != json_object_size(object2)) return 0; json_object_foreach(object1, key, value1) { value2 = json_object_get(object2, key); if (!json_equal(value1, value2)) return 0; } return 1; } static json_t *json_object_copy(json_t *object) { json_t *result; const char *key; json_t *value; result = json_object(); if (!result) return NULL; json_object_foreach(object, key, value) json_object_set_nocheck(result, key, value); return result; } static json_t *json_object_deep_copy(const json_t *object) { json_t *result; void *iter; result = json_object(); if (!result) return NULL; /* Cannot use json_object_foreach because object has to be cast non-const */ iter = json_object_iter((json_t *)object); while (iter) { const char *key; const json_t *value; key = json_object_iter_key(iter); value = json_object_iter_value(iter); json_object_set_new_nocheck(result, key, json_deep_copy(value)); iter = json_object_iter_next((json_t *)object, iter); } return result; } /*** array ***/ json_t *json_array(void) { json_array_t *array = jsonp_malloc(sizeof(json_array_t)); if (!array) return NULL; json_init(&array->json, JSON_ARRAY); array->entries = 0; array->size = 8; array->table = jsonp_malloc(array->size * sizeof(json_t *)); if (!array->table) { jsonp_free(array); return NULL; } array->visited = 0; return &array->json; } static void json_delete_array(json_array_t *array) { size_t i; for (i = 0; i < array->entries; i++) json_decref(array->table[i]); jsonp_free(array->table); jsonp_free(array); } size_t json_array_size(const json_t *json) { if (!json_is_array(json)) return 0; return json_to_array(json)->entries; } json_t *json_array_get(const json_t *json, size_t index) { json_array_t *array; if (!json_is_array(json)) return NULL; array = json_to_array(json); if (index >= array->entries) return NULL; return array->table[index]; } int json_array_set_new(json_t *json, size_t index, json_t *value) { json_array_t *array; if (!value) return -1; if (!json_is_array(json) || json == value) { json_decref(value); return -1; } array = json_to_array(json); if (index >= array->entries) { json_decref(value); return -1; } json_decref(array->table[index]); array->table[index] = value; return 0; } static void array_move(json_array_t *array, size_t dest, size_t src, size_t count) { memmove(&array->table[dest], &array->table[src], count * sizeof(json_t *)); } static void array_copy(json_t **dest, size_t dpos, json_t **src, size_t spos, size_t count) { memcpy(&dest[dpos], &src[spos], count * sizeof(json_t *)); } static json_t **json_array_grow(json_array_t *array, size_t amount, int copy) { size_t new_size; json_t **old_table, **new_table; if (array->entries + amount <= array->size) return array->table; old_table = array->table; new_size = max(array->size + amount, array->size * 2); new_table = jsonp_malloc(new_size * sizeof(json_t *)); if (!new_table) return NULL; array->size = new_size; array->table = new_table; if (copy) { array_copy(array->table, 0, old_table, 0, array->entries); jsonp_free(old_table); return array->table; } return old_table; } int json_array_append_new(json_t *json, json_t *value) { json_array_t *array; if (!value) return -1; if (!json_is_array(json) || json == value) { json_decref(value); return -1; } array = json_to_array(json); if (!json_array_grow(array, 1, 1)) { json_decref(value); return -1; } array->table[array->entries] = value; array->entries++; return 0; } int json_array_insert_new(json_t *json, size_t index, json_t *value) { json_array_t *array; json_t **old_table; if (!value) return -1; if (!json_is_array(json) || json == value) { json_decref(value); return -1; } array = json_to_array(json); if (index > array->entries) { json_decref(value); return -1; } old_table = json_array_grow(array, 1, 0); if (!old_table) { json_decref(value); return -1; } if (old_table != array->table) { array_copy(array->table, 0, old_table, 0, index); array_copy(array->table, index + 1, old_table, index, array->entries - index); jsonp_free(old_table); } else array_move(array, index + 1, index, array->entries - index); array->table[index] = value; array->entries++; return 0; } int json_array_remove(json_t *json, size_t index) { json_array_t *array; if (!json_is_array(json)) return -1; array = json_to_array(json); if (index >= array->entries) return -1; json_decref(array->table[index]); /* If we're removing the last element, nothing has to be moved */ if (index < array->entries - 1) array_move(array, index, index + 1, array->entries - index - 1); array->entries--; return 0; } int json_array_clear(json_t *json) { json_array_t *array; size_t i; if (!json_is_array(json)) return -1; array = json_to_array(json); for (i = 0; i < array->entries; i++) json_decref(array->table[i]); array->entries = 0; return 0; } int json_array_extend(json_t *json, json_t *other_json) { json_array_t *array, *other; size_t i; if (!json_is_array(json) || !json_is_array(other_json)) return -1; array = json_to_array(json); other = json_to_array(other_json); if (!json_array_grow(array, other->entries, 1)) return -1; for (i = 0; i < other->entries; i++) json_incref(other->table[i]); array_copy(array->table, array->entries, other->table, 0, other->entries); array->entries += other->entries; return 0; } static int json_array_equal(json_t *array1, json_t *array2) { size_t i, size; size = json_array_size(array1); if (size != json_array_size(array2)) return 0; for (i = 0; i < size; i++) { json_t *value1, *value2; value1 = json_array_get(array1, i); value2 = json_array_get(array2, i); if (!json_equal(value1, value2)) return 0; } return 1; } static json_t *json_array_copy(json_t *array) { json_t *result; size_t i; result = json_array(); if (!result) return NULL; for (i = 0; i < json_array_size(array); i++) json_array_append(result, json_array_get(array, i)); return result; } static json_t *json_array_deep_copy(const json_t *array) { json_t *result; size_t i; result = json_array(); if (!result) return NULL; for (i = 0; i < json_array_size(array); i++) json_array_append_new(result, json_deep_copy(json_array_get(array, i))); return result; } /*** string ***/ static json_t *string_create(const char *value, size_t len, int own) { char *v; json_string_t *string; if (!value) return NULL; if (own) v = (char *)value; else { v = jsonp_strndup(value, len); if (!v) return NULL; } string = jsonp_malloc(sizeof(json_string_t)); if (!string) { if (!own) jsonp_free(v); return NULL; } json_init(&string->json, JSON_STRING); string->value = v; string->length = len; return &string->json; } json_t *json_string_nocheck(const char *value) { if (!value) return NULL; return string_create(value, strlen(value), 0); } json_t *json_stringn_nocheck(const char *value, size_t len) { return string_create(value, len, 0); } /* this is private; "steal" is not a public API concept */ json_t *jsonp_stringn_nocheck_own(const char *value, size_t len) { return string_create(value, len, 1); } json_t *json_string(const char *value) { if (!value) return NULL; return json_stringn(value, strlen(value)); } json_t *json_stringn(const char *value, size_t len) { if (!value || !utf8_check_string(value, len)) return NULL; return json_stringn_nocheck(value, len); } const char *json_string_value(const json_t *json) { if (!json_is_string(json)) return NULL; return json_to_string(json)->value; } size_t json_string_length(const json_t *json) { if (!json_is_string(json)) return 0; return json_to_string(json)->length; } /*** mem ***/ static json_t *mem_create(const char *value, size_t len, int own) { char *v; json_mem_t *mem; if (!value) return NULL; if (own) v = (char *)value; else { v = malloc(len); if (!v) return NULL; memcpy(v, value, len); } mem = jsonp_malloc(sizeof(json_mem_t)); if (!mem) { if (!own) jsonp_free(v); return NULL; } json_init(&mem->json, JSON_MEM); mem->value = v; mem->length = len; return &mem->json; } json_t *json_mem(const char *value, size_t len) { return mem_create(value, len, 0); } json_t *json_mem_own(const char *value, size_t len) { return mem_create(value, len, 1); } const char *json_mem_value(const json_t *json) { if (!json_is_mem(json)) return NULL; return json_to_mem(json)->value; } size_t json_mem_length(const json_t *json) { if (!json_is_mem(json)) return 0; return json_to_mem(json)->length; } int json_string_set_nocheck(json_t *json, const char *value) { if (!value) return -1; return json_string_setn_nocheck(json, value, strlen(value)); } int json_string_setn_nocheck(json_t *json, const char *value, size_t len) { char *dup; json_string_t *string; if (!json_is_string(json) || !value) return -1; dup = jsonp_strndup(value, len); if (!dup) return -1; string = json_to_string(json); jsonp_free(string->value); string->value = dup; string->length = len; return 0; } int json_string_set(json_t *json, const char *value) { if (!value) return -1; return json_string_setn(json, value, strlen(value)); } int json_string_setn(json_t *json, const char *value, size_t len) { if (!value || !utf8_check_string(value, len)) return -1; return json_string_setn_nocheck(json, value, len); } static void json_delete_string(json_string_t *string) { jsonp_free(string->value); jsonp_free(string); } static int json_string_equal(json_t *string1, json_t *string2) { json_string_t *s1, *s2; if (!json_is_string(string1) || !json_is_string(string2)) return 0; s1 = json_to_string(string1); s2 = json_to_string(string2); return s1->length == s2->length && !memcmp(s1->value, s2->value, s1->length); } static json_t *json_string_copy(const json_t *string) { json_string_t *s; if (!json_is_string(string)) return NULL; s = json_to_string(string); return json_stringn_nocheck(s->value, s->length); } /*** integer ***/ json_t *json_integer(json_int_t value) { json_integer_t *integer = jsonp_malloc(sizeof(json_integer_t)); if (!integer) return NULL; json_init(&integer->json, JSON_INTEGER); integer->value = value; return &integer->json; } json_int_t json_integer_value(const json_t *json) { if (!json_is_integer(json)) return 0; return json_to_integer(json)->value; } int json_integer_set(json_t *json, json_int_t value) { if (!json_is_integer(json)) return -1; json_to_integer(json)->value = value; return 0; } static void json_delete_integer(json_integer_t *integer) { jsonp_free(integer); } static int json_integer_equal(json_t *integer1, json_t *integer2) { return json_integer_value(integer1) == json_integer_value(integer2); } static json_t *json_integer_copy(const json_t *integer) { return json_integer(json_integer_value(integer)); } /*** real ***/ json_t *json_real(double value) { json_real_t *real; if (isnan(value) || isinf(value)) return NULL; real = jsonp_malloc(sizeof(json_real_t)); if (!real) return NULL; json_init(&real->json, JSON_REAL); real->value = value; return &real->json; } double json_real_value(const json_t *json) { if (!json_is_real(json)) return 0; return json_to_real(json)->value; } int json_real_set(json_t *json, double value) { if (!json_is_real(json) || isnan(value) || isinf(value)) return -1; json_to_real(json)->value = value; return 0; } static void json_delete_real(json_real_t *real) { jsonp_free(real); } static int json_real_equal(json_t *real1, json_t *real2) { return json_real_value(real1) == json_real_value(real2); } static json_t *json_real_copy(const json_t *real) { return json_real(json_real_value(real)); } /*** number ***/ double json_number_value(const json_t *json) { if (json_is_integer(json)) return (double)json_integer_value(json); else if (json_is_real(json)) return json_real_value(json); else return 0.0; } /*** simple values ***/ json_t *json_true(void) { static json_t the_true = { JSON_TRUE, (size_t)-1 }; return &the_true; } json_t *json_false(void) { static json_t the_false = { JSON_FALSE, (size_t)-1 }; return &the_false; } json_t *json_null(void) { static json_t the_null = { JSON_NULL, (size_t)-1 }; return &the_null; } /*** deletion ***/ void json_delete(json_t *json) { if (!json) return; switch (json_typeof(json)) { case JSON_OBJECT: json_delete_object(json_to_object(json)); break; case JSON_ARRAY: json_delete_array(json_to_array(json)); break; case JSON_STRING: json_delete_string(json_to_string(json)); break; case JSON_INTEGER: json_delete_integer(json_to_integer(json)); break; case JSON_REAL: json_delete_real(json_to_real(json)); break; default: return; } /* json_delete is not called for true, false or null */ } /*** equality ***/ int json_equal(json_t *json1, json_t *json2) { if (!json1 || !json2) return 0; if (json_typeof(json1) != json_typeof(json2)) return 0; /* this covers true, false and null as they are singletons */ if (json1 == json2) return 1; switch (json_typeof(json1)) { case JSON_OBJECT: return json_object_equal(json1, json2); case JSON_ARRAY: return json_array_equal(json1, json2); case JSON_STRING: return json_string_equal(json1, json2); case JSON_INTEGER: return json_integer_equal(json1, json2); case JSON_REAL: return json_real_equal(json1, json2); default: return 0; } } /*** copying ***/ json_t *json_copy(json_t *json) { if (!json) return NULL; switch (json_typeof(json)) { case JSON_OBJECT: return json_object_copy(json); case JSON_ARRAY: return json_array_copy(json); case JSON_STRING: return json_string_copy(json); case JSON_INTEGER: return json_integer_copy(json); case JSON_REAL: return json_real_copy(json); case JSON_TRUE: case JSON_FALSE: case JSON_NULL: return json; default: return NULL; } return NULL; } json_t *json_deep_copy(const json_t *json) { if (!json) return NULL; switch (json_typeof(json)) { case JSON_OBJECT: return json_object_deep_copy(json); case JSON_ARRAY: return json_array_deep_copy(json); /* for the rest of the types, deep copying doesn't differ from shallow copying */ case JSON_STRING: return json_string_copy(json); case JSON_INTEGER: return json_integer_copy(json); case JSON_REAL: return json_real_copy(json); case JSON_TRUE: case JSON_FALSE: case JSON_NULL: return (json_t *)json; default: return NULL; } return NULL; } ================================================ FILE: merger/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (merger) include_directories (${CMAKE_SOURCE_DIR}/instrumentation/) set(MERGER_SRC ${PROJECT_SOURCE_DIR}/merger.c) source_group("Executable Sources" FILES ${MERGER_SRC}) add_executable(merger ${MERGER_SRC} $) target_compile_definitions(merger PUBLIC INSTRUMENTATION_NO_IMPORT) target_link_libraries(merger utils) target_link_libraries(merger jansson) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(merger Shlwapi) endif (WIN32) ================================================ FILE: merger/merger.c ================================================ //This program merges multiple sets of instrumentation data into one //instrumentation state. The resulting instrumentation state will include the //tracked coverage from all of the input instrumentation states. This allows //multiple instances of the fuzzer to share instrumentation data, and ignore //paths that the other fuzzer found. #include #include #include #include #include #include /** * This function prints out the usage information for the merger and the available instrumentations. * @param program_name - the name of the program currently being run (for use in the outputted message) */ void usage(char * program_name) { char * help_text; printf( "Usage: %s instrumentation_name [-i instrumentation_options] output_file input_file [input_file ...]\n" "\n" "Options:\n" "\t -i instrumentation_options Set the options for the instrumentation\n" "\t output_file Set the file containing that the combined instrumentation state should dump to\n" "\t input_file Set the file containing that the instrumentation state should load from\n" "\n", program_name ); #define PRINT_HELP(x, y) \ x = y; \ if(x) { \ puts(x); \ free(x); \ } PRINT_HELP(help_text, instrumentation_help()); exit(1); } int main(int argc, char ** argv) { instrumentation_t * instrumentation; int instrumentation_length, argv_index; char *instrumentation_options = NULL, *instrumentation_state_string = NULL, *instrumentation_state_dump_file = NULL; void * instrumentation_state = NULL, *new_instrumentation_state = NULL, *merged_instrumentation_state = NULL; if (argc < 3) usage(argv[0]); if (setup_logging(NULL)) { printf("Failed setting up logging, exitting\n"); return 1; } instrumentation = instrumentation_factory(argv[1]); if (!instrumentation) FATAL_MSG("Unknown instrumentation (%s)", argv[1]); if (strcmp("-i", argv[2])) { argv_index = 3; instrumentation_state_dump_file = argv[2]; } else { instrumentation_options = argv[3]; argv_index = 5; instrumentation_state_dump_file = argv[4]; } for (; argv_index < argc; argv_index++) { //Load the instrumentation state from disk instrumentation_length = read_file(argv[argv_index], &instrumentation_state_string); if (instrumentation_length <= 0) FATAL_MSG("Could not read instrumentation file or empty instrumentation file: %s", argv[argv_index]); new_instrumentation_state = instrumentation->create(instrumentation_options, instrumentation_state_string); if (!instrumentation) FATAL_MSG("Bad options/state for instrumentation file %s", argv[argv_index]); free(instrumentation_state_string); if (!instrumentation_state) instrumentation_state = new_instrumentation_state; else { merged_instrumentation_state = instrumentation->merge(instrumentation_state, new_instrumentation_state); instrumentation->cleanup(instrumentation_state); instrumentation->cleanup(new_instrumentation_state); instrumentation_state = merged_instrumentation_state; } } instrumentation_state_string = instrumentation->get_state(instrumentation_state); if (instrumentation_state_string) { write_buffer_to_file(instrumentation_state_dump_file, instrumentation_state_string, strlen(instrumentation_state_string)); instrumentation->free_state(instrumentation_state_string); } else WARNING_MSG("Couldn't dump instrumentation state to file %s", instrumentation_state_dump_file); //Cleanup the objects and exit instrumentation->cleanup(instrumentation_state); free(instrumentation); return 0; } ================================================ FILE: mutators/APLv2 ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: mutators/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (mutators_repo) SET ( BUILD_DIRECTORY ${CMAKE_SOURCE_DIR}/build ) if (WIN32) SET( WINDOWS_BUILD_PATH ${BUILD_DIRECTORY}/${CMAKE_C_COMPILER_ARCHITECTURE_ID}/${CMAKE_BUILD_TYPE} ) SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${WINDOWS_BUILD_PATH}/mutators/ ) SET( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${WINDOWS_BUILD_PATH}/mutators/ ) SET( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${WINDOWS_BUILD_PATH}/mutators/ ) LINK_DIRECTORIES(${WINDOWS_BUILD_PATH}/killerbeez/) add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_DEPRECATION_DISABLE -D_CRT_NONSTDC_NO_DEPRECATE -DUTILS_NO_IMPORT -DJANSSON_NO_IMPORT) else (WIN32) SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/mutators/ ) SET( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/mutators/ ) SET( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/mutators/ ) LINK_DIRECTORIES(${CMAKE_BINARY_DIR}/killerbeez/) endif (WIN32) include_directories (${CMAKE_SOURCE_DIR}/jansson/) include_directories (${CMAKE_SOURCE_DIR}/utils/) # The general mutator library with the common functionality add_subdirectory(mutators) # The various mutators add_subdirectory(afl_mutator) add_subdirectory(bit_flip_mutator) add_subdirectory(arithmetic_mutator) add_subdirectory(dictionary_mutator) add_subdirectory(havoc_mutator) add_subdirectory(honggfuzz_mutator) add_subdirectory(interesting_value_mutator) add_subdirectory(multipart_mutator) add_subdirectory(ni_mutator) add_subdirectory(nop_mutator) add_subdirectory(splice_mutator) add_subdirectory(zzuf_mutator) if (NOT APPLE) add_subdirectory(radamsa_mutator) endif (NOT APPLE) # The mutator test program, uncomment to build #add_subdirectory(mutator_tester) ================================================ FILE: mutators/LICENSE ================================================ Unless otherwise marked, this license applies to all code in this repository. University of Illinois/NCSA Open Source License (UIUC license) Copyright (c) 2018 Grimm. All rights reserved. Developed by: Software Security Group Grimm https://grimm-co.com Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution. - Neither the names of Grimm, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission. SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. ================================================ FILE: mutators/afl_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (afl_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(AFL_SRC ${PROJECT_SOURCE_DIR}/afl_mutator.c) source_group("Library Sources" FILES ${AFL_SRC}) add_library(afl_mutator SHARED ${AFL_SRC} $ $) target_link_libraries(afl_mutator utils) target_compile_definitions(afl_mutator PUBLIC AFL_MUTATOR_EXPORTS) target_compile_definitions(afl_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(afl_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(afl_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(afl_mutator Shlwapi) endif (WIN32) ================================================ FILE: mutators/afl_mutator/afl_mutator.c ================================================ #include #include #include #include #include "afl_mutator.h" #include #include #include #include struct afl_state { int skip_deterministic; char * input; size_t input_length; int iteration; mutate_info_t info; }; typedef struct afl_state afl_state_t; mutator_t afl_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), afl_free_state, FUNCNAME(set_state), FUNCNAME(get_current_iteration), afl_get_total_iteration_count, FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; static int afl_havoc(mutate_info_t * info, mutate_buffer_t * buf) { if (info->stage_cur > MAX(HAVOC_MIN, (info->queue_cycle > 1 ? HAVOC_CYCLES : HAVOC_CYCLES_INIT) * (info->perf_score / info->havoc_div) / 100)) return MUTATOR_DONE; return havoc(info, buf); } static int afl_splice(mutate_info_t * info, mutate_buffer_t * buf) { if (info->stage_cur > MAX(HAVOC_MIN, SPLICE_HAVOC * (info->perf_score / info->havoc_div) / 100)) return MUTATOR_DONE; return splice_buffers(info, buf); } static int(*const mutate_funcs[])(mutate_info_t *, mutate_buffer_t *) = { single_walking_bit, two_walking_bit, four_walking_bit, walking_byte, two_walking_byte, four_walking_byte, one_byte_arithmetics, two_byte_arithmetics, four_byte_arithmetics, interesting_one_byte, interesting_two_byte, interesting_four_byte, dictionary_overwrite, dictionary_insert, afl_havoc, afl_splice, }; /* Fuzzing stages */ enum { /* 00 */ STAGE_FLIP1, /* 01 */ STAGE_FLIP2, /* 02 */ STAGE_FLIP4, /* 03 */ STAGE_FLIP8, /* 04 */ STAGE_FLIP16, /* 05 */ STAGE_FLIP32, /* 06 */ STAGE_ARITH8, /* 07 */ STAGE_ARITH16, /* 08 */ STAGE_ARITH32, /* 09 */ STAGE_INTEREST8, /* 10 */ STAGE_INTEREST16, /* 11 */ STAGE_INTEREST32, /* 12 */ STAGE_EXTRAS_UO, /* 13 */ STAGE_EXTRAS_UI, /* 14 */ STAGE_HAVOC, /* 15 */ STAGE_SPLICE, }; //////////////////////////////////////////////////////////////////////////////////////////// //// API methods /////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////// #ifndef ALL_MUTATORS_IN_ONE /** * This function fills in m with all of the function pointers for this mutator. * @param m - a pointer to a mutator_t structure * @return none */ AFL_MUTATOR_API void init(mutator_t * m) { memcpy(m, &afl_mutator, sizeof(mutator_t)); } #endif /** * This function creates and initializes a afl_state_t object based on the passed in JSON options. * @param options - a JSON of options for the afl mutator * @return the newly created afl_state_t object or NULL on failure */ static afl_state_t * setup_options(char * options) { afl_state_t * state; state = (afl_state_t *)malloc(sizeof(afl_state_t)); if (!state) return NULL; memset(state, 0, sizeof(afl_state_t)); //Setup defaults if (reset_mutate_info(&state->info)) { free(state); return NULL; } state->info.should_skip_previous = 1; if (!options || !strlen(options)) return state; PARSE_MUTATE_INFO_OPTIONS(state, options, FUNCNAME(cleanup), 0, 0); PARSE_OPTION_INT(state, options, skip_deterministic, "skip_deterministic", FUNCNAME(cleanup)); if (state->skip_deterministic) state->info.stage = STAGE_HAVOC; return state; } /** * This function will allocate and initialize the mutator state used in the other Mutator API * functions. * @param options - a json string that contains the mutator specific string of options. * @param state - Optionally, used to load a previously dumped state (with the get_state() * function), that defines the current iteration of the mutator. * @param input - used to produce new mutated inputs later when the mutate function is called * @param input_length - the size of the input buffer * @return a mutator specific structure or NULL on failure. */ AFL_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { GENERIC_MUTATOR_CREATE(afl_state_t, setup_options, FUNCNAME(cleanup)); } /** * This function will release any resources that the mutator has open * and free the mutator state structure. * @param mutator_state - a mutator specific structure previously created by * the create function. This structure will be freed and should not be referenced afterwards. */ AFL_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { cleanup_mutate_info(&((afl_state_t *)mutator_state)->info); GENERIC_MUTATOR_CLEANUP(afl_state_t) } static int mutate_inner(void * mutator_state, char * buffer, size_t buffer_length, int is_thread_safe) { afl_state_t * state = (afl_state_t *)mutator_state; mutate_buffer_t buf; int ret; if (buffer_length < state->input_length) return -1; buf.buffer = (uint8_t *)buffer; buf.length = MIN(buffer_length, state->input_length); buf.max_length = buffer_length; memcpy(buf.buffer, state->input, buf.length); if(is_thread_safe && take_mutex(state->info.mutate_mutex)) return -1; state->iteration++; while (1) { ret = mutate_one(&state->info, &buf, mutate_funcs, ARRAY_SIZE(mutate_funcs)); if (ret != MUTATOR_DONE) break; //We've finished this cycle, reset back to havoc and continue state->info.stage = STAGE_HAVOC; state->skip_deterministic = 1; state->info.queue_cycle++; } if (is_thread_safe && release_mutex(state->info.mutate_mutex)) return -1; return ret; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ AFL_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { return mutate_inner(mutator_state, buffer, buffer_length, 0); } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ AFL_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { SINGLE_INPUT_MUTATE_EXTENDED(afl_state_t, state->info.mutate_mutex); } /** * This function will return the state of the mutator. The returned value can be used to restart the * mutator at a later time, by passing it to the create or set_state function. It is the caller's * responsibility to free the memory allocated here by calling the free_state function. * @param mutator_state - a mutator specific structure previously created by the create function. * @return - a buffer that defines the current state of the mutator. */ AFL_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { afl_state_t * state = (afl_state_t *)mutator_state; json_t *state_obj, *temp; char * ret; state_obj = json_object(); ADD_INT(temp, state->iteration, state_obj, "iteration"); ADD_INT(temp, state->skip_deterministic, state_obj, "skip_deterministic"); if (!add_mutate_info_to_json(state_obj, &state->info)) return NULL; ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } /** * This function will set the current state of the mutator. * This can be used to restart a mutator once from a previous run. * @param mutator_state - a mutator specific structure previously created by the create function. * @param state - a previously dumped state buffer obtained by the get_state function. * @return 0 on success or non-zero on failure */ AFL_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { afl_state_t * current_state = (afl_state_t *)mutator_state; int result, temp_int; if (!state) return 1; GET_INT(temp_int, state, current_state->iteration, "iteration", result); GET_INT(temp_int, state, current_state->skip_deterministic, "skip_deterministic", result); if (get_mutate_info_from_json(state, ¤t_state->info)) return 1; return 0; } /** * This function will return the current iteration count of the mutator, i.e. * how many mutations have been generated with it. * @param mutator_state - a mutator specific structure previously created by the create function. * @return value - the number of previously generated mutations */ AFL_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { GENERIC_MUTATOR_GET_ITERATION(afl_state_t); } /** * This function will set the mutator's input to something new. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ AFL_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { GENERIC_MUTATOR_SET_INPUT(afl_state_t); } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ AFL_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { SINGLE_INPUT_GET_INFO(afl_state_t); } /** * This function sets a help message for the mutator. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ AFL_MUTATOR_API int FUNCNAME(help)(char ** help_str) { GENERIC_MUTATOR_HELP( "afl - afl-based mutator\n" "Options:\n" " dictionary A file or directory containing dictionary to use while\n" " mangling input\n" " havoc_div A divisor for determining the number of rounds that\n" " the havoc stage should run (typically 1, 2, 5, or 10)\n" " perf_score A performance score used to determine how long to run\n" " the havoc and splice stages. Typically 100, higher\n" " results in a larger number of mutations in these\n" " stages before moving on.\n" " queue_cycle The queue round counter. Used in determining how to\n" " mutate input. Generally this shouldn't need to be set\n" " random_state0 The first half of the seed to afl's random number\n" " generator\n" " random_state1 The second half of the seed to afl's random number\n" " generator\n" " skip_deterministic Instruct AFL to skip the deterministic mutations\n" " splice_filenames An array of files to use during afl's splice stage,\n" " for mixing with the input\n" "\n" ); } ================================================ FILE: mutators/afl_mutator/afl_mutator.h ================================================ #pragma once #include #include #ifdef _WIN32 #ifdef AFL_MUTATOR_EXPORTS #define AFL_MUTATOR_API __declspec(dllexport) #else #define AFL_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define AFL_MUTATOR_API #endif #define MUTATOR_NAME "afl" AFL_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); AFL_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); AFL_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); AFL_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); AFL_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); #define afl_free_state default_free_state AFL_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); AFL_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); #define afl_get_total_iteration_count return_unknown_or_infinite_total_iterations AFL_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); AFL_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); AFL_MUTATOR_API int FUNCNAME(help)(char **help_str); #ifndef ALL_MUTATORS_IN_ONE AFL_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: mutators/arithmetic_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (arithmetic_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(ARITHMETIC_SRC ${PROJECT_SOURCE_DIR}/arithmetic_mutator.c) source_group("Library Sources" FILES ${ARITHMETIC_SRC}) add_library(arithmetic_mutator SHARED ${ARITHMETIC_SRC} $ $) target_link_libraries(arithmetic_mutator utils) target_compile_definitions(arithmetic_mutator PUBLIC ARITHMETIC_MUTATOR_EXPORTS) target_compile_definitions(arithmetic_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(arithmetic_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(arithmetic_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(arithmetic_mutator Shlwapi) endif (WIN32) ================================================ FILE: mutators/arithmetic_mutator/arithmetic_mutator.c ================================================ #include "arithmetic_mutator.h" #include #include #include #include #include #include #include #include struct arithmetic_state { uint64_t num_bytes; char * input; size_t input_length; int iteration; mutate_info_t info; }; typedef struct arithmetic_state arithmetic_state_t; static int(*const mutate_funcs[])(mutate_info_t *, mutate_buffer_t *) = { one_byte_arithmetics, two_byte_arithmetics, four_byte_arithmetics, }; mutator_t arithmetic_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), arithmetic_free_state, FUNCNAME(set_state), FUNCNAME(get_current_iteration), arithmetic_get_total_iteration_count, FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; /** * This function fills in m with all of the function pointers for this mutator. * @param m - a pointer to a mutator_t structure * @return none */ #ifndef ALL_MUTATORS_IN_ONE ARITHMETIC_MUTATOR_API void init(mutator_t * m) { memcpy(m, &arithmetic_mutator, sizeof(mutator_t)); } #endif static arithmetic_state_t * setup_options(char * options) { arithmetic_state_t * state; int bytes_per_stage[] = { 1, 2, 4 }; int i; state = (arithmetic_state_t *)malloc(sizeof(arithmetic_state_t)); if (!state) return NULL; memset(state, 0, sizeof(arithmetic_state_t)); //Setup defaults if (reset_mutate_info(&state->info)) { free(state); return NULL; } if (!options || !strlen(options)) return state; PARSE_MUTATE_INFO_OPTIONS(state, options, FUNCNAME(cleanup), 0, 0); PARSE_OPTION_INT(state, options, num_bytes, "num_bytes", FUNCNAME(cleanup)); if (state->num_bytes) { state->info.stage = -1; for (i = 0; i < ARRAY_SIZE(bytes_per_stage) && state->info.stage < 0; i++) { if (bytes_per_stage[i] == state->num_bytes) state->info.stage = i; } if (state->info.stage < 0) { FUNCNAME(cleanup)(state); return NULL; } state->info.one_stage_only = 1; } return state; } /** * This function will allocate and initialize the mutator state used in the other Mutator API * functions. * @param options - a json string that contains the mutator specific string of options. * @param state - Optionally, used to load a previously dumped state (with the get_state() * function), that defines the current iteration of the mutator. * @param input - used to produce new mutated inputs later when the mutate function is called * @param input_length - the size of the input buffer * @return a mutator specific structure or NULL on failure. */ ARITHMETIC_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { GENERIC_MUTATOR_CREATE(arithmetic_state_t, setup_options, FUNCNAME(cleanup)); } /** * This function will release any resources that the mutator has open * and free the mutator state structure. * @param mutator_state - a mutator specific structure previously created by * the create function. This structure will be freed and should not be referenced afterwards. */ ARITHMETIC_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { cleanup_mutate_info(&((arithmetic_state_t *)mutator_state)->info); GENERIC_MUTATOR_CLEANUP(arithmetic_state_t) } static int mutate_inner(void * mutator_state, char * buffer, size_t buffer_length, int is_thread_safe) { arithmetic_state_t * state = (arithmetic_state_t *)mutator_state; mutate_buffer_t buf; int ret; if (buffer_length < state->input_length) return -1; buf.buffer = (uint8_t *)buffer; buf.length = MIN(buffer_length, state->input_length); buf.max_length = buffer_length; memcpy(buf.buffer, state->input, buf.length); if(is_thread_safe && take_mutex(state->info.mutate_mutex)) return -1; state->iteration++; ret = mutate_one(&state->info, &buf, mutate_funcs, ARRAY_SIZE(mutate_funcs)); if (is_thread_safe && release_mutex(state->info.mutate_mutex)) return -1; return ret; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ ARITHMETIC_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { return mutate_inner(mutator_state, buffer, buffer_length, 0); } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ ARITHMETIC_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { SINGLE_INPUT_MUTATE_EXTENDED(arithmetic_state_t, state->info.mutate_mutex); } /** * This function will return the state of the mutator. The returned value can be used to restart the * mutator at a later time, by passing it to the create or set_state function. It is the caller's * responsibility to free the memory allocated here by calling the free_state function. * @param mutator_state - a mutator specific structure previously created by the create function. * @return - a buffer that defines the current state of the mutator. */ ARITHMETIC_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { arithmetic_state_t * state = (arithmetic_state_t *)mutator_state; json_t *state_obj, *temp; char * ret; state_obj = json_object(); ADD_INT(temp, state->iteration, state_obj, "iteration"); if (!add_mutate_info_to_json(state_obj, &state->info)) return NULL; ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } /** * This function will set the current state of the mutator. * This can be used to restart a mutator once from a previous run. * @param mutator_state - a mutator specific structure previously created by the create function. * @param state - a previously dumped state buffer obtained by the get_state function. * @return 0 on success or non-zero on failure */ ARITHMETIC_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { arithmetic_state_t * current_state = (arithmetic_state_t *)mutator_state; int result, temp_int; if (!state) return 1; GET_INT(temp_int, state, current_state->iteration, "iteration", result); if (get_mutate_info_from_json(state, ¤t_state->info)) return 1; return 0; } /** * This function will return the current iteration count of the mutator, i.e. * how many mutations have been generated with it. * @param mutator_state - a mutator specific structure previously created by the create function. * @return value - the number of previously generated mutations */ ARITHMETIC_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { GENERIC_MUTATOR_GET_ITERATION(arithmetic_state_t); } /** * This function will set the mutator's input to something new. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ ARITHMETIC_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { GENERIC_MUTATOR_SET_INPUT(arithmetic_state_t); } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ ARITHMETIC_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { SINGLE_INPUT_GET_INFO(arithmetic_state_t); } /** * This function sets a help message for the mutator. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ ARITHMETIC_MUTATOR_API int FUNCNAME(help)(char **help_str) { GENERIC_MUTATOR_HELP( "arithmetic - afl-based arithmetic mutator\n" "Options:\n" " num_bytes The number of bytes to operate on; either 1, 2, or 4.\n" " The default option is to do all three of the\n" " options, one after another.\n" " skip_previous_stages Whether the mutation outputs should skip any output\n" " that would match the output of the bit_flip or\n" " previous rounds of the arithmetic mutator. Useful\n" " when using multiple mutators\n" "\n" ); } ================================================ FILE: mutators/arithmetic_mutator/arithmetic_mutator.h ================================================ #pragma once #include #include #ifdef _WIN32 #ifdef ARITHMETIC_MUTATOR_EXPORTS #define ARITHMETIC_MUTATOR_API __declspec(dllexport) #else #define ARITHMETIC_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define ARITHMETIC_MUTATOR_API #endif #define MUTATOR_NAME "arithmetic" ARITHMETIC_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); ARITHMETIC_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); ARITHMETIC_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); ARITHMETIC_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); ARITHMETIC_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); #define arithmetic_free_state default_free_state ARITHMETIC_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); ARITHMETIC_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); #define arithmetic_get_total_iteration_count return_unknown_or_infinite_total_iterations ARITHMETIC_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); ARITHMETIC_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); ARITHMETIC_MUTATOR_API int FUNCNAME(help)(char **help_str); #ifndef ALL_MUTATORS_IN_ONE ARITHMETIC_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: mutators/bit_flip_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (bit_flip_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(BIT_FLIP_SRC ${PROJECT_SOURCE_DIR}/bit_flip_mutator.c) source_group("Library Sources" FILES ${BIT_FLIP_SRC}) add_library(bit_flip_mutator SHARED ${BIT_FLIP_SRC} $ $) target_link_libraries(bit_flip_mutator utils) target_compile_definitions(bit_flip_mutator PUBLIC BF_MUTATOR_EXPORTS) target_compile_definitions(bit_flip_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(bit_flip_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(bit_flip_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(bit_flip_mutator Shlwapi) endif (WIN32) ================================================ FILE: mutators/bit_flip_mutator/bit_flip_mutator.c ================================================ #include "bit_flip_mutator.h" #include #include #include #include #include #include #include #include struct bit_flip_state { uint64_t num_bits; char * input; size_t input_length; int iteration; mutate_info_t info; }; typedef struct bit_flip_state bit_flip_state_t; static int(*const mutate_funcs[])(mutate_info_t *, mutate_buffer_t *) = { single_walking_bit, two_walking_bit, four_walking_bit, walking_byte, two_walking_byte, four_walking_byte, }; mutator_t arithmetic_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), bit_flip_free_state, FUNCNAME(set_state), FUNCNAME(get_current_iteration), FUNCNAME(get_total_iteration_count), FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; /** * This function fills in m with all of the function pointers for this mutator. * @param m - a pointer to a mutator_t structure * @return none */ #ifndef ALL_MUTATORS_IN_ONE BF_MUTATOR_API void init(mutator_t * m) { memcpy(m, &arithmetic_mutator, sizeof(mutator_t)); } #endif static bit_flip_state_t * setup_options(char * options) { bit_flip_state_t * state; int bits_per_stage[] = { 1, 2, 4, 8, 16, 32 }; int i; state = (bit_flip_state_t *)malloc(sizeof(bit_flip_state_t)); if (!state) return NULL; memset(state, 0, sizeof(bit_flip_state_t)); //Setup defaults if (reset_mutate_info(&state->info)) { free(state); return NULL; } if (!options || !strlen(options)) return state; PARSE_MUTATE_INFO_OPTIONS(state, options, FUNCNAME(cleanup), 0, 0); PARSE_OPTION_INT(state, options, num_bits, "num_bits", FUNCNAME(cleanup)); if (state->num_bits) { state->info.stage = -1; for (i = 0; i < ARRAY_SIZE(bits_per_stage) && state->info.stage < 0; i++) { if (bits_per_stage[i] == state->num_bits) state->info.stage = i; } if (state->info.stage < 0) { FUNCNAME(cleanup)(state); return NULL; } state->info.one_stage_only = 1; } return state; } /** * This function will allocate and initialize the mutator state used in the other Mutator API * functions. * @param options - a json string that contains the mutator specific string of options. * @param state - Optionally, used to load a previously dumped state (with the get_state() * function), that defines the current iteration of the mutator. * @param input - used to produce new mutated inputs later when the mutate function is called * @param input_length - the size of the input buffer * @return a mutator specific structure or NULL on failure. */ BF_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { GENERIC_MUTATOR_CREATE(bit_flip_state_t, setup_options, FUNCNAME(cleanup)); } /** * This function will release any resources that the mutator has open * and free the mutator state structure. * @param mutator_state - a mutator specific structure previously created by * the create function. This structure will be freed and should not be referenced afterwards. */ BF_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { cleanup_mutate_info(&((bit_flip_state_t *)mutator_state)->info); GENERIC_MUTATOR_CLEANUP(bit_flip_state_t) } static int mutate_inner(void * mutator_state, char * buffer, size_t buffer_length, int is_thread_safe) { bit_flip_state_t * state = (bit_flip_state_t *)mutator_state; mutate_buffer_t buf; int ret; if (buffer_length < state->input_length) return -1; buf.buffer = (uint8_t *)buffer; buf.length = MIN(buffer_length, state->input_length); buf.max_length = buffer_length; memcpy(buf.buffer, state->input, buf.length); if(is_thread_safe && take_mutex(state->info.mutate_mutex)) return -1; state->iteration++; ret = mutate_one(&state->info, &buf, mutate_funcs, ARRAY_SIZE(mutate_funcs)); if (is_thread_safe && release_mutex(state->info.mutate_mutex)) return -1; return ret; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ BF_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { return mutate_inner(mutator_state, buffer, buffer_length, 0); } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ BF_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { SINGLE_INPUT_MUTATE_EXTENDED(bit_flip_state_t, state->info.mutate_mutex); } /** * This function will return the state of the mutator. The returned value can be used to restart the * mutator at a later time, by passing it to the create or set_state function. It is the caller's * responsibility to free the memory allocated here by calling the free_state function. * @param mutator_state - a mutator specific structure previously created by the create function. * @return - a buffer that defines the current state of the mutator. */ BF_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { bit_flip_state_t * state = (bit_flip_state_t *)mutator_state; json_t *state_obj, *temp; char * ret; state_obj = json_object(); ADD_INT(temp, state->iteration, state_obj, "iteration"); ADD_INT(temp, state->num_bits, state_obj, "num_bits"); if (!add_mutate_info_to_json(state_obj, &state->info)) return NULL; ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } /** * This function will set the current state of the mutator. * This can be used to restart a mutator once from a previous run. * @param mutator_state - a mutator specific structure previously created by the create function. * @param state - a previously dumped state buffer obtained by the get_state function. * @return 0 on success or non-zero on failure */ BF_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { bit_flip_state_t * current_state = (bit_flip_state_t *)mutator_state; int result, temp_int; if (!state) return 1; GET_INT(temp_int, state, current_state->iteration, "iteration", result); GET_INT(temp_int, state, current_state->num_bits, "num_bits", result); if (get_mutate_info_from_json(state, ¤t_state->info)) return 1; return 0; } /** * This function will return the current iteration count of the mutator, i.e. * how many mutations have been generated with it. * @param mutator_state - a mutator specific structure previously created by the create function. * @return value - the number of previously generated mutations */ BF_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { GENERIC_MUTATOR_GET_ITERATION(bit_flip_state_t); } /** * Returns the total number of mutations possible with this mutator and the current options. * @param mutator_state - a mutator specific structure previously created by the create function. * @return the number of possible mutations with this mutator, or -1 if infinite or the number * cannot be determined. */ BF_MUTATOR_API int FUNCNAME(get_total_iteration_count)(void * mutator_state) { bit_flip_state_t * state = (bit_flip_state_t *)mutator_state; if (state->info.one_stage_only) { if(state->num_bits < 8) //1, 2, 4 return (state->input_length << 3) - (state->num_bits - 1); return state->input_length - ((state->num_bits >> 3) - 1); //8, 16, 32 } //Add up all rounds return ((state->input_length << 3) //1 + ((state->input_length << 3) - 1) //2 + ((state->input_length << 3) - 3) //4 + state->input_length //8 + (state->input_length - 1) //16 + (state->input_length - 3)); //32 } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ BF_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { SINGLE_INPUT_GET_INFO(bit_flip_state_t); } /** * This function will set the mutator's input to something new. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ BF_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { GENERIC_MUTATOR_SET_INPUT(bit_flip_state_t); } /** * This function sets a help message for the mutator. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ BF_MUTATOR_API int FUNCNAME(help)(char **help_str) { GENERIC_MUTATOR_HELP( "bit_flip - afl-based bit flip mutator\n" "Options:\n" " num_bits The number of bits to operate on; either 1, 2, 4, 8,\n" " 16, or 32. The default option is to do all six of\n" " the options, one after another.\n" "\n" ); } ================================================ FILE: mutators/bit_flip_mutator/bit_flip_mutator.h ================================================ #pragma once #include #include #ifdef _WIN32 #ifdef BF_MUTATOR_EXPORTS #define BF_MUTATOR_API __declspec(dllexport) #else #define BF_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define BF_MUTATOR_API #endif #define MUTATOR_NAME "bit_flip" BF_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); BF_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); BF_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); BF_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); BF_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); #define bit_flip_free_state default_free_state BF_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); BF_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); BF_MUTATOR_API int FUNCNAME(get_total_iteration_count)(void * mutator_state); BF_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); BF_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); BF_MUTATOR_API int FUNCNAME(help)(char **help_str); #ifndef ALL_MUTATORS_IN_ONE BF_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: mutators/dictionary_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (dictionary_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(DICTIONARY_SRC ${PROJECT_SOURCE_DIR}/dictionary_mutator.c) source_group("Library Sources" FILES ${DICTIONARY_SRC}) add_library(dictionary_mutator SHARED ${DICTIONARY_SRC} $ $) target_link_libraries(dictionary_mutator utils) target_compile_definitions(dictionary_mutator PUBLIC DICTIONARY_MUTATOR_EXPORTS) target_compile_definitions(dictionary_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(dictionary_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(dictionary_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(dictionary_mutator Shlwapi) endif (WIN32) ================================================ FILE: mutators/dictionary_mutator/dictionary_mutator.c ================================================ #include "dictionary_mutator.h" #include #include #include #include #include #include #include #include struct dictionary_state { char * operation; char * input; size_t input_length; int iteration; mutate_info_t info; }; typedef struct dictionary_state dictionary_state_t; static int(*const mutate_funcs[])(mutate_info_t *, mutate_buffer_t *) = { dictionary_overwrite, dictionary_insert, }; mutator_t dictionary_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), dictionary_free_state, FUNCNAME(set_state), FUNCNAME(get_current_iteration), dictionary_get_total_iteration_count, FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; /** * This function fills in m with all of the function pointers for this mutator. * @param m - a pointer to a mutator_t structure * @return none */ #ifndef ALL_MUTATORS_IN_ONE DICTIONARY_MUTATOR_API void init(mutator_t * m) { memcpy(m, &dictionary_mutator, sizeof(mutator_t)); } #endif static dictionary_state_t * setup_options(char * options) { dictionary_state_t * state; char * operation_per_stage[] = { "overwrite", "insert" }; int i; state = (dictionary_state_t *)malloc(sizeof(dictionary_state_t)); if (!state) return NULL; memset(state, 0, sizeof(dictionary_state_t)); //Setup defaults if (reset_mutate_info(&state->info)) { free(state); return NULL; } if (!options || !strlen(options)) return state; PARSE_MUTATE_INFO_OPTIONS(state, options, FUNCNAME(cleanup), 1, 0); PARSE_OPTION_STRING(state, options, operation, "operation", FUNCNAME(cleanup)); if (state->operation) { state->info.stage = -1; for (i = 0; i < ARRAY_SIZE(operation_per_stage) && state->info.stage < 0; i++) { if (!strcmp(operation_per_stage[i], state->operation)) state->info.stage = i; } if (state->info.stage < 0) { FUNCNAME(cleanup)(state); return NULL; } state->info.one_stage_only = 1; } return state; } /** * This function will allocate and initialize the mutator state used in the other Mutator API * functions. * @param options - a json string that contains the mutator specific string of options. * @param state - Optionally, used to load a previously dumped state (with the get_state() * function), that defines the current iteration of the mutator. * @param input - used to produce new mutated inputs later when the mutate function is called * @param input_length - the size of the input buffer * @return a mutator specific structure or NULL on failure. */ DICTIONARY_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { GENERIC_MUTATOR_CREATE(dictionary_state_t, setup_options, FUNCNAME(cleanup)); } /** * This function will release any resources that the mutator has open * and free the mutator state structure. * @param mutator_state - a mutator specific structure previously created by * the create function. This structure will be freed and should not be referenced afterwards. */ DICTIONARY_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { cleanup_mutate_info(&((dictionary_state_t *)mutator_state)->info); GENERIC_MUTATOR_CLEANUP(dictionary_state_t) } static int mutate_inner(void * mutator_state, char * buffer, size_t buffer_length, int is_thread_safe) { dictionary_state_t * state = (dictionary_state_t *)mutator_state; mutate_buffer_t buf; int ret; if (buffer_length < state->input_length) return -1; buf.buffer = (uint8_t *)buffer; buf.length = MIN(buffer_length, state->input_length); buf.max_length = buffer_length; memcpy(buf.buffer, state->input, buf.length); if(is_thread_safe && take_mutex(state->info.mutate_mutex)) return -1; state->iteration++; ret = mutate_one(&state->info, &buf, mutate_funcs, ARRAY_SIZE(mutate_funcs)); if (is_thread_safe && release_mutex(state->info.mutate_mutex)) return -1; return ret; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ DICTIONARY_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { return mutate_inner(mutator_state, buffer, buffer_length, 0); } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ DICTIONARY_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { SINGLE_INPUT_MUTATE_EXTENDED(dictionary_state_t, state->info.mutate_mutex); } /** * This function will return the state of the mutator. The returned value can be used to restart the * mutator at a later time, by passing it to the create or set_state function. It is the caller's * responsibility to free the memory allocated here by calling the free_state function. * @param mutator_state - a mutator specific structure previously created by the create function. * @return - a buffer that defines the current state of the mutator. */ DICTIONARY_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { dictionary_state_t * state = (dictionary_state_t *)mutator_state; json_t *state_obj, *temp; char * ret; state_obj = json_object(); ADD_INT(temp, state->iteration, state_obj, "iteration"); if (!add_mutate_info_to_json(state_obj, &state->info)) return NULL; ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } /** * This function will set the current state of the mutator. * This can be used to restart a mutator once from a previous run. * @param mutator_state - a mutator specific structure previously created by the create function. * @param state - a previously dumped state buffer obtained by the get_state function. * @return 0 on success or non-zero on failure */ DICTIONARY_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { dictionary_state_t * current_state = (dictionary_state_t *)mutator_state; int result, temp_int; if (!state) return 1; GET_INT(temp_int, state, current_state->iteration, "iteration", result); if (get_mutate_info_from_json(state, ¤t_state->info)) return 1; return 0; } /** * This function will return the current iteration count of the mutator, i.e. * how many mutations have been generated with it. * @param mutator_state - a mutator specific structure previously created by the create function. * @return value - the number of previously generated mutations */ DICTIONARY_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { GENERIC_MUTATOR_GET_ITERATION(dictionary_state_t); } /** * This function will set the mutator's input to something new. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ DICTIONARY_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { GENERIC_MUTATOR_SET_INPUT(dictionary_state_t); } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ DICTIONARY_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { SINGLE_INPUT_GET_INFO(dictionary_state_t); } /** * This function sets a help message for the mutator. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ DICTIONARY_MUTATOR_API int FUNCNAME(help)(char **help_str) { GENERIC_MUTATOR_HELP( "dictionary - afl-based dictionary mutator\n" "Options:\n" " dictionary A file or directory containing dictionary to use while\n" " mangling input\n" " operation The operation to perform with each dictionary item.\n" " Either, overwrite or insert. Default option is both.\n" " random_state0 The first half of the seed to afl's random number\n" " generator\n" " random_state1 The second half of the seed to afl's random number\n" " generator\n" "\n" ); } ================================================ FILE: mutators/dictionary_mutator/dictionary_mutator.h ================================================ #pragma once #include #include #ifdef _WIN32 #ifdef DICTIONARY_MUTATOR_EXPORTS #define DICTIONARY_MUTATOR_API __declspec(dllexport) #else #define DICTIONARY_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define DICTIONARY_MUTATOR_API #endif #define MUTATOR_NAME "dictionary" DICTIONARY_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); DICTIONARY_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); DICTIONARY_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); DICTIONARY_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); DICTIONARY_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); #define dictionary_free_state default_free_state DICTIONARY_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); DICTIONARY_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); #define dictionary_get_total_iteration_count return_unknown_or_infinite_total_iterations DICTIONARY_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); DICTIONARY_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); DICTIONARY_MUTATOR_API int FUNCNAME(help)(char **help_str); #ifndef ALL_MUTATORS_IN_ONE DICTIONARY_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: mutators/havoc_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (havoc_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(HAVOC_SRC ${PROJECT_SOURCE_DIR}/havoc_mutator.c) source_group("Library Sources" FILES ${HAVOC_SRC}) add_library(havoc_mutator SHARED ${HAVOC_SRC} $ $) target_link_libraries(havoc_mutator utils) target_compile_definitions(havoc_mutator PUBLIC HAVOC_MUTATOR_EXPORTS) target_compile_definitions(havoc_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(havoc_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(havoc_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(havoc_mutator Shlwapi) endif (WIN32) ================================================ FILE: mutators/havoc_mutator/havoc_mutator.c ================================================ #include "havoc_mutator.h" #include #include #include #include #include #include #include #include #include struct havoc_state { char * input; size_t input_length; int iteration; mutate_info_t info; }; typedef struct havoc_state havoc_state_t; static int(*const mutate_funcs[])(mutate_info_t *, mutate_buffer_t *) = { havoc, }; mutator_t havoc_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), havoc_free_state, FUNCNAME(set_state), FUNCNAME(get_current_iteration), havoc_get_total_iteration_count, FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; /** * This function fills in m with all of the function pointers for this mutator. * @param m - a pointer to a mutator_t structure * @return none */ #ifndef ALL_MUTATORS_IN_ONE HAVOC_MUTATOR_API void init(mutator_t * m) { memcpy(m, &havoc_mutator, sizeof(mutator_t)); } #endif static havoc_state_t * setup_options(char * options) { havoc_state_t * state = (havoc_state_t *)malloc(sizeof(havoc_state_t)); if (!state) return NULL; memset(state, 0, sizeof(havoc_state_t)); //Setup defaults if (reset_mutate_info(&state->info)) { free(state); return NULL; } if (!options || !strlen(options)) return state; PARSE_MUTATE_INFO_OPTIONS(state, options, FUNCNAME(cleanup), 0, 0); return state; } /** * This function will allocate and initialize the mutator state used in the other Mutator API * functions. * @param options - a json string that contains the mutator specific string of options. * @param state - Optionally, used to load a previously dumped state (with the get_state() * function), that defines the current iteration of the mutator. * @param input - used to produce new mutated inputs later when the mutate function is called * @param input_length - the size of the input buffer * @return a mutator specific structure or NULL on failure. */ HAVOC_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { GENERIC_MUTATOR_CREATE(havoc_state_t, setup_options, FUNCNAME(cleanup)); } /** * This function will release any resources that the mutator has open * and free the mutator state structure. * @param mutator_state - a mutator specific structure previously created by * the create function. This structure will be freed and should not be referenced afterwards. */ HAVOC_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { cleanup_mutate_info(&((havoc_state_t *)mutator_state)->info); GENERIC_MUTATOR_CLEANUP(havoc_state_t) } static int mutate_inner(void * mutator_state, char * buffer, size_t buffer_length, int is_thread_safe) { havoc_state_t * state = (havoc_state_t *)mutator_state; mutate_buffer_t buf; int ret; if (buffer_length < state->input_length) return -1; buf.buffer = (uint8_t *)buffer; buf.length = MIN(buffer_length, state->input_length); buf.max_length = buffer_length; memcpy(buf.buffer, state->input, buf.length); if(is_thread_safe && take_mutex(state->info.mutate_mutex)) return -1; if (state->info.stage_cur > MAX(HAVOC_MIN, HAVOC_CYCLES * (state->info.perf_score / state->info.havoc_div) / 100)) { state->info.stage = 0; state->info.stage_cur = 0; state->info.queue_cycle++; } state->iteration++; ret = mutate_one(&state->info, &buf, mutate_funcs, ARRAY_SIZE(mutate_funcs)); if (is_thread_safe && release_mutex(state->info.mutate_mutex)) return -1; return ret; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ HAVOC_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { return mutate_inner(mutator_state, buffer, buffer_length, 0); } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ HAVOC_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { SINGLE_INPUT_MUTATE_EXTENDED(havoc_state_t, state->info.mutate_mutex); } /** * This function will return the state of the mutator. The returned value can be used to restart the * mutator at a later time, by passing it to the create or set_state function. It is the caller's * responsibility to free the memory allocated here by calling the free_state function. * @param mutator_state - a mutator specific structure previously created by the create function. * @return - a buffer that defines the current state of the mutator. */ HAVOC_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { havoc_state_t * state = (havoc_state_t *)mutator_state; json_t *state_obj, *temp; char * ret; state_obj = json_object(); ADD_INT(temp, state->iteration, state_obj, "iteration"); if (!add_mutate_info_to_json(state_obj, &state->info)) return NULL; ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } /** * This function will set the current state of the mutator. * This can be used to restart a mutator once from a previous run. * @param mutator_state - a mutator specific structure previously created by the create function. * @param state - a previously dumped state buffer obtained by the get_state function. * @return 0 on success or non-zero on failure */ HAVOC_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { havoc_state_t * current_state = (havoc_state_t *)mutator_state; int result, temp_int; if (!state) return 1; GET_INT(temp_int, state, current_state->iteration, "iteration", result); if (get_mutate_info_from_json(state, ¤t_state->info)) return 1; return 0; } /** * This function will return the current iteration count of the mutator, i.e. * how many mutations have been generated with it. * @param mutator_state - a mutator specific structure previously created by the create function. * @return value - the number of previously generated mutations */ HAVOC_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { GENERIC_MUTATOR_GET_ITERATION(havoc_state_t); } /** * This function will set the mutator's input to something new. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ HAVOC_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { GENERIC_MUTATOR_SET_INPUT(havoc_state_t); } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ HAVOC_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { SINGLE_INPUT_GET_INFO(havoc_state_t); } /** * This function sets a help message for the mutator. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ HAVOC_MUTATOR_API int FUNCNAME(help)(char **help_str) { GENERIC_MUTATOR_HELP( "havoc - afl-based havoc mutator\n" "Options:\n" " dictionary A file or directory containing dictionary to use while\n" " mangling input\n" " havoc_div A divisor for determining the number of rounds that\n" " the havoc stage should run (typically 1, 2, 5, or 10)\n" " perf_score A performance score used to determine how long a havoc\n" " round lasts. Typically 100, higher results in a\n" " larger number of mutations in these stages before\n" " moving on.\n" " queue_cycle The queue round counter. Used in determining how to\n" " mutate input. Generally this shouldn't need to be set\n" " random_state0 The first half of the seed to afl's random number\n" " generator\n" " random_state1 The second half of the seed to afl's random number\n" " generator\n" "\n" ); } ================================================ FILE: mutators/havoc_mutator/havoc_mutator.h ================================================ #pragma once #include #include #ifdef _WIN32 #ifdef HAVOC_MUTATOR_EXPORTS #define HAVOC_MUTATOR_API __declspec(dllexport) #else #define HAVOC_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define HAVOC_MUTATOR_API #endif #define MUTATOR_NAME "havoc" HAVOC_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); HAVOC_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); HAVOC_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); HAVOC_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); HAVOC_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); #define havoc_free_state default_free_state HAVOC_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); HAVOC_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); #define havoc_get_total_iteration_count return_unknown_or_infinite_total_iterations HAVOC_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); HAVOC_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); HAVOC_MUTATOR_API int FUNCNAME(help)(char **help_str); #ifndef ALL_MUTATORS_IN_ONE HAVOC_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: mutators/honggfuzz_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (honggfuzz_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(HONGGFUZZ_SRC ${PROJECT_SOURCE_DIR}/honggfuzz_mutator.c) source_group("Library Sources" FILES ${HONGGFUZZ_SRC}) add_library(honggfuzz_mutator SHARED ${HONGGFUZZ_SRC} $ $) target_link_libraries(honggfuzz_mutator utils) target_compile_definitions(honggfuzz_mutator PUBLIC HONGGFUZZ_MUTATOR_EXPORTS) target_compile_definitions(honggfuzz_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(honggfuzz_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(honggfuzz_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(honggfuzz_mutator Shlwapi) endif (WIN32) ================================================ FILE: mutators/honggfuzz_mutator/honggfuzz_mutator.c ================================================ #include "honggfuzz_mutator.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef struct { char* s; size_t len; } string_t; struct honggfuzz_state { int mutations_per_run; char * dictionary_file; uint64_t dictionary_count; string_t ** dictq; char * input; size_t input_length; //Protects the fields below, i.e. the iteration count, mutate buffer information, and random state mutex_t mutate_mutex; int iteration; uint8_t * mutated_buffer; uint64_t mutated_buffer_length; uint64_t max_mutated_buffer_length; uint64_t random_state[2]; }; typedef struct honggfuzz_state honggfuzz_state_t; mutator_t honggfuzz_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), honggfuzz_free_state, FUNCNAME(set_state), FUNCNAME(get_current_iteration), honggfuzz_get_total_iteration_count, FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; //////////////////////////////////////////////////////////////////////////////////////////// //// Honggfuzz mutator methods ///////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////// /* * The code in this section (Honggfuzz mutator methods) was taken * from honggfuzz and falls under the following license: * * honggfuzz - run->dynamicFilefer mangling routines * ----------------------------------------- * * Author: * Robert Swiecki * * Copyright 2010-2015 by Google Inc. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. * * The code in this section has been modified from the original to suit the * purposes of this project. */ //The following functions are taken from honggfuzz, see: //https://github.com/google/honggfuzz/blob/master/mangle.c //https://github.com/google/honggfuzz/blob/master/libcommon/util.c /* * xoroshiro128plus by David Blackman and Sebastiano Vigna */ static inline uint64_t util_RotL(const uint64_t x, int k) { return (x << k) | (x >> (64 - k)); } static inline uint64_t util_InternalRnd64(honggfuzz_state_t * state) { const uint64_t s0 = state->random_state[0]; uint64_t s1 = state->random_state[1]; const uint64_t result = s0 + s1; s1 ^= s0; state->random_state[0] = util_RotL(s0, 55) ^ s1 ^ (s1 << 14); state->random_state[1] = util_RotL(s1, 36); return result; } uint64_t util_rnd64(honggfuzz_state_t * state) { return util_InternalRnd64(state); } uint64_t util_rndGet(honggfuzz_state_t * state, uint64_t min, uint64_t max) { assert(min <= max); if (max == UINT64_MAX) { return util_rnd64(state); } return ((util_rnd64(state) % (max - min + 1)) + min); } void util_rndBuf(honggfuzz_state_t * state, uint8_t* buf, uint64_t sz) { if (sz == 0) { return; } for (uint64_t i = 0; i < sz; i++) { buf[i] = (uint8_t)util_InternalRnd64(state); } } static inline void mangle_Overwrite(honggfuzz_state_t * state, const uint8_t* src, uint64_t off, uint64_t sz) { uint64_t maxToCopy = state->mutated_buffer_length - off; if (sz > maxToCopy) { sz = maxToCopy; } memmove(&state->mutated_buffer[off], src, (size_t)sz); } static inline void mangle_Move(honggfuzz_state_t * state, uint64_t off_from, uint64_t off_to, uint64_t len) { if (off_from >= state->mutated_buffer_length) { return; } if (off_to >= state->mutated_buffer_length) { return; } int64_t len_from = (int64_t)state->mutated_buffer_length - off_from - 1; int64_t len_to = (int64_t)state->mutated_buffer_length - off_to - 1; if ((int64_t)len > len_from) { len = len_from; } if ((int64_t)len > len_to) { len = len_to; } memmove(&state->mutated_buffer[off_to], &state->mutated_buffer[off_from], (size_t)len); } static void mangle_Inflate(honggfuzz_state_t * state, uint64_t off, uint64_t len) { if (state->mutated_buffer_length >= state->max_mutated_buffer_length) { return; } if (len > (state->max_mutated_buffer_length - state->mutated_buffer_length)) { len = state->max_mutated_buffer_length - state->mutated_buffer_length; } state->mutated_buffer_length += len; mangle_Move(state, off, off + len, state->mutated_buffer_length); } static void mangle_MemMove(honggfuzz_state_t * state) { uint64_t off_from = util_rndGet(state, 0, state->mutated_buffer_length - 1); uint64_t off_to = util_rndGet(state, 0, state->mutated_buffer_length - 1); uint64_t len = util_rndGet(state, 0, state->mutated_buffer_length); mangle_Move(state, off_from, off_to, len); } static void mangle_Byte(honggfuzz_state_t * state) { uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); state->mutated_buffer[off] = (uint8_t)util_rnd64(state); } static void mangle_Bytes(honggfuzz_state_t * state) { uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); uint32_t val = (uint32_t)util_rnd64(state); /* Overwrite with random 2,3,4-byte values */ uint64_t toCopy = util_rndGet(state, 2, 4); mangle_Overwrite(state, (uint8_t*)&val, off, toCopy); } static void mangle_Bit(honggfuzz_state_t * state) { uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); state->mutated_buffer[off] ^= (uint8_t)(1U << util_rndGet(state, 0, 7)); } static void mangle_DictionaryInsert(honggfuzz_state_t * state) { if (state->dictionary_count == 0) { mangle_Bit(state); return; } uint64_t choice = util_rndGet(state, 0, state->dictionary_count - 1); string_t* str = state->dictq[choice]; uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); mangle_Inflate(state, off, str->len); mangle_Move(state, off, off + str->len, str->len); mangle_Overwrite(state, (uint8_t*)str->s, off, str->len); } static void mangle_Dictionary(honggfuzz_state_t * state) { if (state->dictionary_count == 0) { mangle_Bit(state); return; } uint64_t choice = util_rndGet(state, 0, state->dictionary_count - 1); string_t* str = state->dictq[choice]; uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); mangle_Overwrite(state, (uint8_t*)str->s, off, str->len); } static void mangle_Magic(honggfuzz_state_t * state) { struct magic_values { //ugh. Visual studio insists on strings being null terminated uint8_t val[9];//so we need to use 9 bytes instead of 8 for the value uint64_t size; }; static const struct magic_values mangleMagicVals[] = { // 1B - No endianness { "\x00\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x01\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x02\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x03\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x04\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x05\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x06\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x07\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x08\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x09\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x0A\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x0B\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x0C\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x0D\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x0E\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x0F\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x10\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x20\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x40\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x7E\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x7F\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x80\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\x81\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\xC0\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\xFE\x00\x00\x00\x00\x00\x00\x00", 1 }, { "\xFF\x00\x00\x00\x00\x00\x00\x00", 1 }, // 2B - NE { "\x00\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x01\x01\x00\x00\x00\x00\x00\x00", 2 }, { "\x80\x80\x00\x00\x00\x00\x00\x00", 2 }, { "\xFF\xFF\x00\x00\x00\x00\x00\x00", 2 }, // 2B - BE { "\x00\x01\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x02\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x03\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x04\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x05\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x06\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x07\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x08\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x09\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x0A\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x0B\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x0C\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x0D\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x0E\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x0F\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x10\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x20\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x40\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x7E\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x7F\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x80\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x81\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\xC0\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\xFE\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\xFF\x00\x00\x00\x00\x00\x00", 2 }, { "\x7E\xFF\x00\x00\x00\x00\x00\x00", 2 }, { "\x7F\xFF\x00\x00\x00\x00\x00\x00", 2 }, { "\x80\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x80\x01\x00\x00\x00\x00\x00\x00", 2 }, { "\xFF\xFE\x00\x00\x00\x00\x00\x00", 2 }, // 2B - LE { "\x00\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x01\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x02\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x03\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x04\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x05\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x06\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x07\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x08\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x09\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x0A\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x0B\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x0C\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x0D\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x0E\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x0F\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x10\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x20\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x40\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x7E\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x7F\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x80\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\x81\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\xC0\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\xFE\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\xFF\x00\x00\x00\x00\x00\x00\x00", 2 }, { "\xFF\x7E\x00\x00\x00\x00\x00\x00", 2 }, { "\xFF\x7F\x00\x00\x00\x00\x00\x00", 2 }, { "\x00\x80\x00\x00\x00\x00\x00\x00", 2 }, { "\x01\x80\x00\x00\x00\x00\x00\x00", 2 }, { "\xFE\xFF\x00\x00\x00\x00\x00\x00", 2 }, // 4B - NE { "\x00\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x01\x01\x01\x01\x00\x00\x00\x00", 4 }, { "\x80\x80\x80\x80\x00\x00\x00\x00", 4 }, { "\xFF\xFF\xFF\xFF\x00\x00\x00\x00", 4 }, // 4B - BE { "\x00\x00\x00\x01\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x02\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x03\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x04\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x05\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x06\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x07\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x08\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x09\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x0A\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x0B\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x0C\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x0D\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x0E\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x0F\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x10\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x20\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x40\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x7E\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x7F\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x80\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x81\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\xC0\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\xFE\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\xFF\x00\x00\x00\x00", 4 }, { "\x7E\xFF\xFF\xFF\x00\x00\x00\x00", 4 }, { "\x7F\xFF\xFF\xFF\x00\x00\x00\x00", 4 }, { "\x80\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x80\x00\x00\x01\x00\x00\x00\x00", 4 }, { "\xFF\xFF\xFF\xFE\x00\x00\x00\x00", 4 }, // 4B - LE { "\x00\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x01\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x02\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x03\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x04\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x05\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x06\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x07\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x08\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x09\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x0A\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x0B\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x0C\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x0D\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x0E\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x0F\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x10\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x20\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x40\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x7E\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x7F\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x80\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\x81\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\xC0\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\xFE\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\xFF\x00\x00\x00\x00\x00\x00\x00", 4 }, { "\xFF\xFF\xFF\x7E\x00\x00\x00\x00", 4 }, { "\xFF\xFF\xFF\x7F\x00\x00\x00\x00", 4 }, { "\x00\x00\x00\x80\x00\x00\x00\x00", 4 }, { "\x01\x00\x00\x80\x00\x00\x00\x00", 4 }, { "\xFE\xFF\xFF\xFF\x00\x00\x00\x00", 4 }, // 8B - NE { "\x00\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x01\x01\x01\x01\x01\x01\x01\x01", 8 }, { "\x80\x80\x80\x80\x80\x80\x80\x80", 8 }, { "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8 }, // 8B - BE { "\x00\x00\x00\x00\x00\x00\x00\x01", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x02", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x03", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x04", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x05", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x06", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x07", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x08", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x09", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x0A", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x0B", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x0C", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x0D", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x0E", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x0F", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x10", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x20", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x40", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x7E", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x7F", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x80", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x81", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\xC0", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\xFE", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\xFF", 8 }, { "\x7E\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8 }, { "\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8 }, { "\x80\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x80\x00\x00\x00\x00\x00\x00\x01", 8 }, { "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE", 8 }, // 8B - LE { "\x00\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x01\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x02\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x03\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x04\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x05\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x06\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x07\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x08\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x09\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x0A\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x0B\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x0C\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x0D\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x0E\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x0F\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x10\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x20\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x40\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x7E\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x7F\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x80\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\x81\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\xC0\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\xFE\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\xFF\x00\x00\x00\x00\x00\x00\x00", 8 }, { "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7E", 8 }, { "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", 8 }, { "\x00\x00\x00\x00\x00\x00\x00\x80", 8 }, { "\x01\x00\x00\x00\x00\x00\x00\x80", 8 }, { "\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8 }, }; uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); uint64_t choice = util_rndGet(state, 0, ARRAY_SIZE(mangleMagicVals) - 1); mangle_Overwrite(state, mangleMagicVals[choice].val, off, mangleMagicVals[choice].size); } static void mangle_MemSet(honggfuzz_state_t * state) { uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); uint64_t sz = util_rndGet(state, 1, state->mutated_buffer_length - off); int val = (int)util_rndGet(state, 0, UINT8_MAX); memset(&state->mutated_buffer[off], val, (size_t)sz); } static void mangle_Random(honggfuzz_state_t * state) { uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); uint64_t len = util_rndGet(state, 1, state->mutated_buffer_length - off); util_rndBuf(state, &state->mutated_buffer[off], len); } static void mangle_AddSub(honggfuzz_state_t * state) { uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); /* 1,2,4,8 */ uint64_t varLen = 1ULL << util_rndGet(state, 0, 3); if ((state->mutated_buffer_length - off) < varLen) { varLen = 1; } int delta = (int)util_rndGet(state, 0, 8192); delta -= 4096; assert(varLen == 1 || varLen == 2 || varLen == 4 || varLen == 8); switch (varLen) { case 1: { state->mutated_buffer[off] += delta; break; } case 2: { int16_t val; memcpy(&val, &state->mutated_buffer[off], sizeof(val)); if (util_rnd64(state) & 0x1) { val += delta; } else { /* Foreign endianess */ val = SWAP16(val); val += delta; val = SWAP16(val); } mangle_Overwrite(state, (uint8_t*)&val, off, varLen); break; } case 4: { int32_t val; memcpy(&val, &state->mutated_buffer[off], sizeof(val)); if (util_rnd64(state) & 0x1) { val += delta; } else { /* Foreign endianess */ val = SWAP32(val); val += delta; val = SWAP32(val); } mangle_Overwrite(state, (uint8_t*)&val, off, varLen); break; } case 8: { int64_t val; memcpy(&val, &state->mutated_buffer[off], sizeof(val)); if (util_rnd64(state) & 0x1) { val += delta; } else { /* Foreign endianess */ val = SWAP64(val); val += delta; val = SWAP64(val); } mangle_Overwrite(state, (uint8_t*)&val, off, varLen); break; } } } static void mangle_IncByte(honggfuzz_state_t * state) { uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); state->mutated_buffer[off] += (uint8_t)1UL; } static void mangle_DecByte(honggfuzz_state_t * state) { uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); state->mutated_buffer[off] -= (uint8_t)1UL; } static void mangle_NegByte(honggfuzz_state_t * state) { uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); state->mutated_buffer[off] = ~(state->mutated_buffer[off]); } static void mangle_CloneByte(honggfuzz_state_t * state) { uint64_t off1 = util_rndGet(state, 0, state->mutated_buffer_length - 1); uint64_t off2 = util_rndGet(state, 0, state->mutated_buffer_length - 1); uint8_t tmp = state->mutated_buffer[off1]; state->mutated_buffer[off1] = state->mutated_buffer[off2]; state->mutated_buffer[off2] = tmp; } static void mangle_Resize(honggfuzz_state_t * state) { state->mutated_buffer_length = util_rndGet(state, 1, state->max_mutated_buffer_length); } static void mangle_Expand(honggfuzz_state_t * state) { uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); uint64_t len = util_rndGet(state, 1, state->mutated_buffer_length - off); mangle_Inflate(state, off, len); mangle_Move(state, off, off + len, state->mutated_buffer_length); } static void mangle_Shrink(honggfuzz_state_t * state) { if (state->mutated_buffer_length <= 1U) { return; } uint64_t len = util_rndGet(state, 1, state->mutated_buffer_length - 1); uint64_t off = util_rndGet(state, 0, len); state->mutated_buffer_length -= len; mangle_Move(state, off + len, off, state->mutated_buffer_length); } static void mangle_InsertRnd(honggfuzz_state_t * state) { uint64_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); uint64_t len = util_rndGet(state, 1, state->mutated_buffer_length - off); mangle_Inflate(state, off, len); mangle_Move(state, off, off + len, state->mutated_buffer_length); util_rndBuf(state, &state->mutated_buffer[off], len); } static void mangle_ASCIIVal(honggfuzz_state_t * state) { char buf[32]; snprintf(buf, sizeof(buf), "%" PRId64, (int64_t)util_rnd64(state)); size_t off = util_rndGet(state, 0, state->mutated_buffer_length - 1); mangle_Overwrite(state, (uint8_t*)buf, off, strlen(buf)); } static void mangle_mangleContent(honggfuzz_state_t* state) { if (state->mutations_per_run == 0U) { return; } /* Minimum support file size for mangling is 1 */ if (state->mutated_buffer_length == 0UL) { state->mutated_buffer_length = 1UL; state->mutated_buffer[0] = '\0'; } static void(*const mangleFuncs[])(honggfuzz_state_t * state) = { mangle_Resize, mangle_Byte, mangle_Bit, mangle_Bytes, mangle_Magic, mangle_IncByte, mangle_DecByte, mangle_NegByte, mangle_AddSub, mangle_Dictionary, mangle_DictionaryInsert, mangle_MemMove, mangle_MemSet, mangle_Random, mangle_CloneByte, mangle_Expand, mangle_Shrink, mangle_InsertRnd, mangle_ASCIIVal, }; uint64_t changesCnt = util_rndGet(state, 1, state->mutations_per_run); for (uint64_t x = 0; x < changesCnt; x++) { uint64_t choice = util_rndGet(state, 0, ARRAY_SIZE(mangleFuncs) - 1); mangleFuncs[choice](state); } } static size_t util_decodeCString(char* s) { size_t o = 0; for (size_t i = 0; s[i] != '\0' && s[i] != '"'; i++, o++) { switch (s[i]) { case '\\': { i++; if (!s[i]) { continue; } switch (s[i]) { case 'a': s[o] = '\a'; break; case 'r': s[o] = '\r'; break; case 'n': s[o] = '\n'; break; case 't': s[o] = '\t'; break; case '0': s[o] = '\0'; break; case 'x': { if (s[i + 1] && s[i + 2]) { char hex[] = { s[i + 1], s[i + 2], 0 }; s[o] = (char) strtoul(hex, NULL, 16); i += 2; } else { s[o] = s[i]; } break; } default: s[o] = s[i]; break; } break; } default: { s[o] = s[i]; break; } } } s[o] = '\0'; return o; } static int input_parseDictionary(honggfuzz_state_t * state) { char * contents; char lineptr[2100]; int start, pos = 0, length, ret = 0; size_t len; length = read_file(state->dictionary_file, &contents); if (length < 0) { printf("Couldn't open '%s'", state->dictionary_file); return 1; } for (;;) { //Find the end of the line len = 0; memset(lineptr, 0, sizeof(lineptr)); for (start = pos; pos < length; pos++) { if (contents[pos] == '\n') { len = pos - start; memcpy(lineptr, &contents[start], len); pos++; break; } } if (len == 0) { if(start == pos) //end of the file with no left over content break; len = pos - start; memcpy(lineptr, &contents[start], len); } //Remove the \r\n if (len > 1 && lineptr[len - 1] == '\n') { lineptr[len - 1] = '\0'; len--; } if (len > 1 && lineptr[len - 1] == '\r') { lineptr[len - 1] = '\0'; len--; } //if the line is empty, skip it if (lineptr[0] == '#' || lineptr[0] == '\r' || lineptr[0] == '\n' || lineptr[0] == '\0') { continue; } //Parse the dictionary line char bufn[1025]; char bufv[1025]; if (sscanf(lineptr, "\"%1024s", bufv) != 1 && sscanf(lineptr, "%1024[^=]=\"%1024s", bufn, bufv) != 2) { printf("Incorrect dictionary entry: '%s'.\n", lineptr); ret = 1; break; } char* s = strdup(bufv); string_t* str = (string_t*)malloc(sizeof(string_t)); str->len = util_decodeCString(s); str->s = s; state->dictq = (string_t **)realloc(state->dictq, (state->dictionary_count + 1) * sizeof(string_t)); state->dictq[state->dictionary_count] = str; state->dictionary_count++; } return ret; } //////////////////////////////////////////////////////////////////////////////////////////// //// API methods /////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////// #ifndef ALL_MUTATORS_IN_ONE /** * This function filled in the supplied mutator_t with all of the function * pointers for this mutator. * @param m - a pointer to a mutator_t structure * @return none */ HONGGFUZZ_MUTATOR_API void init(mutator_t * m) { memcpy(m, &honggfuzz_mutator, sizeof(mutator_t)); } #endif /** * This function creates and initializes a honggfuzz_state_t object based on the passed in JSON options. * @return the newly created honggfuzz_state_t object or NULL on failure */ static honggfuzz_state_t * setup_options(char * options) { honggfuzz_state_t * state; state = (honggfuzz_state_t *)malloc(sizeof(honggfuzz_state_t)); if (!state) return NULL; memset(state, 0, sizeof(honggfuzz_state_t)); //Setup defaults state->mutations_per_run = 6; state->random_state[0] = (((uint64_t)rand()) << 32) | rand(); state->random_state[1] = (((uint64_t)rand()) << 32) | rand(); state->mutate_mutex = create_mutex(); if (!state->mutate_mutex) { free(state); return NULL; } if (!options || !strlen(options)) return state; PARSE_OPTION_INT(state, options, mutations_per_run, "mutations_per_run", FUNCNAME(cleanup)); PARSE_OPTION_UINT64T_TEMP(state, options, random_state[0], "random_state0", FUNCNAME(cleanup), temp1); PARSE_OPTION_UINT64T_TEMP(state, options, random_state[1], "random_state1", FUNCNAME(cleanup), temp2); PARSE_OPTION_STRING(state, options, dictionary_file, "dictionary", FUNCNAME(cleanup)); if (state->dictionary_file && input_parseDictionary(state)) { FUNCNAME(cleanup)(state); return NULL; } return state; } /** * This function will allocate and initialize the mutator state. The mutator state should be * freed by calling the cleanup function. * @param options - a json string that contains the honggfuzz specific options. * @param state - optionally, a previously dumped state (with the get_state() function) to load * @param input - The input that this mutator will later be mutating * @param input_length - the size of the input parameter * @return a mutator specific structure or NULL on failure. The returned value should * not be used for anything other than passing to the various Mutator API functions. */ HONGGFUZZ_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { honggfuzz_state_t * honggfuzz_state = setup_options(options); if (!honggfuzz_state) return NULL; honggfuzz_state->input = (char *)malloc(input_length); if (!honggfuzz_state->input || !input_length) { FUNCNAME(cleanup)(honggfuzz_state); return NULL; } memcpy(honggfuzz_state->input, input, input_length); honggfuzz_state->input_length = input_length; if (state && FUNCNAME(set_state)(honggfuzz_state, state)) { FUNCNAME(cleanup)(honggfuzz_state); return NULL; } return honggfuzz_state; } /** * This function clears out the dictionary related information inside a honggfuzz_state object * @param honggfuzz_state - a previously created honggfuzz specific state structure */ static void clear_dictionary(honggfuzz_state_t * honggfuzz_state) { uint64_t i; for (i = 0; i < honggfuzz_state->dictionary_count; i++) { free(honggfuzz_state->dictq[i]->s); free(honggfuzz_state->dictq[i]); } free(honggfuzz_state->dictionary_file); free(honggfuzz_state->dictq); honggfuzz_state->dictq = NULL; honggfuzz_state->dictionary_count = 0; honggfuzz_state->dictionary_file = NULL; } /** * This function will release any resources that the mutator has open * and free the mutator state structure. * @param mutator_state - a mutator specific structure previously created by * the create function. This structure will be freed and should not be referenced afterwards. */ HONGGFUZZ_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { honggfuzz_state_t * honggfuzz_state = (honggfuzz_state_t *)mutator_state; clear_dictionary(honggfuzz_state); destroy_mutex(honggfuzz_state->mutate_mutex); free(honggfuzz_state->input); honggfuzz_state->input = NULL; free(honggfuzz_state); } /** * This function will mutate the input given in the create function and return it in the buffer argument. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ HONGGFUZZ_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { honggfuzz_state_t * honggfuzz_state = (honggfuzz_state_t *)mutator_state; //Can't mutate an empty buffer if (buffer_length == 0) return -1; //Setup the mutated buffer honggfuzz_state->mutated_buffer = (uint8_t *)buffer; honggfuzz_state->mutated_buffer_length = MIN(buffer_length, honggfuzz_state->input_length); memcpy(honggfuzz_state->mutated_buffer, honggfuzz_state->input, (size_t)honggfuzz_state->mutated_buffer_length); honggfuzz_state->max_mutated_buffer_length = buffer_length; //Now mutate the buffer honggfuzz_state->iteration++; mangle_mangleContent(honggfuzz_state); return (int)honggfuzz_state->mutated_buffer_length; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ HONGGFUZZ_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { SINGLE_INPUT_MUTATE_EXTENDED(honggfuzz_state_t, state->mutate_mutex); } /** * This function will return the state of the mutator. The returned value can be used to restart the * mutator at a later time, by passing it to the create or set_state function. It is the caller's * responsibility to free the memory allocated here by calling the free_state function. * @param mutator_state - a mutator specific structure previously created by the create function. * @return - a buffer that defines the current state of the mutator. This will be a mutator specific JSON string. */ HONGGFUZZ_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { honggfuzz_state_t * honggfuzz_state = (honggfuzz_state_t *)mutator_state; json_t *obj, *temp, *dictionary_file, *dictionary_list, *dictionary_obj; uint64_t i; char * ret; obj = json_object(); ADD_INT(temp, honggfuzz_state->iteration, obj, "iteration"); ADD_UINT64T(temp, honggfuzz_state->random_state[0], obj, "random_state0"); ADD_UINT64T(temp, honggfuzz_state->random_state[1], obj, "random_state1"); if (honggfuzz_state->dictionary_file) { dictionary_file = json_string(honggfuzz_state->dictionary_file); if(!dictionary_file) return NULL; json_object_set_new(obj, "dictionary_file", dictionary_file); //Add the dictionary list to the json object dictionary_list = json_array(); if (!dictionary_list) return NULL; for (i = 0; i < honggfuzz_state->dictionary_count; i++) { dictionary_obj = json_object(); if (!dictionary_obj) return NULL; ADD_MEM(temp, honggfuzz_state->dictq[i]->s, honggfuzz_state->dictq[i]->len, dictionary_obj, "s"); ADD_UINT64T(temp, honggfuzz_state->dictq[i]->len, dictionary_obj, "len"); json_array_append_new(dictionary_list, dictionary_obj); } json_object_set_new(obj, "dictionary", dictionary_list); } ret = json_dumps(obj, 0); json_decref(obj); return ret; } /** * This function will set the current state of the mutator. * This can be used to restart a mutator once from a previous run. * @param mutator_state - a mutator specific structure previously created by the create function. * @param state - a previously dumped state buffer obtained by the get_state function. * @return 0 on success or non-zero on failure */ HONGGFUZZ_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { honggfuzz_state_t * honggfuzz_state = (honggfuzz_state_t *)mutator_state; int result, inner_result, temp_int; uint64_t temp_uint64t; char * temp_str; json_t * dictionary_obj; string_t * dictionary_item; if (!state) return 1; GET_INT(temp_int, state, honggfuzz_state->iteration, "iteration", result); GET_UINT64T(temp_uint64t, state, honggfuzz_state->random_state[0], "random_state0", result); GET_UINT64T(temp_uint64t, state, honggfuzz_state->random_state[1], "random_state1", result); clear_dictionary(honggfuzz_state); temp_str = get_string_options(state, "dictionary_file", &result); if (result > 0) { honggfuzz_state->dictionary_file = temp_str; FOREACH_OBJECT_JSON_ARRAY_ITEM_BEGIN(state, dictionary, "dictionary", dictionary_obj, result) //Create the new dictionary item dictionary_item = (string_t *)malloc(sizeof(string_t)); GET_ITEM(dictionary_obj, dictionary_item->s, temp_str, get_mem_options_from_json, "s", inner_result); GET_ITEM(dictionary_obj, dictionary_item->len, temp_int, get_int_options_from_json, "len", inner_result); //Add the dictionary item to the dictionary linked list honggfuzz_state->dictq = (string_t **)realloc(honggfuzz_state->dictq, (honggfuzz_state->dictionary_count + 1) * sizeof(string_t)); honggfuzz_state->dictq[honggfuzz_state->dictionary_count] = dictionary_item; honggfuzz_state->dictionary_count++; FOREACH_OBJECT_JSON_ARRAY_ITEM_END(dictionary); if (result < 0) return 1; } return 0; } /** * This function will return the current iteration count of the mutator, i.e. * how many mutations have been generated with it. * @param mutator_state - a mutator specific structure previously created by the create function. * @return value - the number of previously generated mutations */ HONGGFUZZ_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { GENERIC_MUTATOR_GET_ITERATION(honggfuzz_state_t); } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ HONGGFUZZ_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { SINGLE_INPUT_GET_INFO(honggfuzz_state_t); } /** * This function will set the input(saved in the mutators state) to something new. * This can be used to reinitialize a mutator with new data, without reallocating the entire state struct. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ HONGGFUZZ_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { GENERIC_MUTATOR_SET_INPUT(honggfuzz_state_t); } /** * This function sets a help message for the mutator. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ HONGGFUZZ_MUTATOR_API int FUNCNAME(help)(char ** help_str) { GENERIC_MUTATOR_HELP( "honggfuzz - honggfuzz-based mutator\n" "Options:\n" " dictionary_file A file containing dictionary words to use while\n" " mangling input\n" " mutations_per_run The number of different mangle functions to apply per\n" " single round of mutating the input\n" " random_state0 The first half of the seed to honggfuzz's random\n" " number generator\n" " random_state1 The second half of the seed to honggfuzz's random\n" " number generator\n" "\n" ); } ================================================ FILE: mutators/honggfuzz_mutator/honggfuzz_mutator.h ================================================ #pragma once #include #include #include //grab the definition of MIN/MAX #ifdef _WIN32 #ifdef HONGGFUZZ_MUTATOR_EXPORTS #define HONGGFUZZ_MUTATOR_API __declspec(dllexport) #else #define HONGGFUZZ_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define HONGGFUZZ_MUTATOR_API #endif #define MUTATOR_NAME "honggfuzz" HONGGFUZZ_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); HONGGFUZZ_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); HONGGFUZZ_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); HONGGFUZZ_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); HONGGFUZZ_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); #define honggfuzz_free_state default_free_state HONGGFUZZ_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); HONGGFUZZ_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); #define honggfuzz_get_total_iteration_count return_unknown_or_infinite_total_iterations HONGGFUZZ_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); HONGGFUZZ_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); HONGGFUZZ_MUTATOR_API int FUNCNAME(help)(char ** help_str); #ifndef ALL_MUTATORS_IN_ONE HONGGFUZZ_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: mutators/interesting_value_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (interesting_value_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(INTERESTING_VALUE_SRC ${PROJECT_SOURCE_DIR}/interesting_value_mutator.c) source_group("Library Sources" FILES ${INTERESTING_VALUE_SRC}) add_library(interesting_value_mutator SHARED ${INTERESTING_VALUE_SRC} $ $) target_link_libraries(interesting_value_mutator utils) target_compile_definitions(interesting_value_mutator PUBLIC INTERESTING_VALUE_MUTATOR_EXPORTS) target_compile_definitions(interesting_value_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(interesting_value_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(interesting_value_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(interesting_value_mutator Shlwapi) endif (WIN32) ================================================ FILE: mutators/interesting_value_mutator/interesting_value_mutator.c ================================================ #include "interesting_value_mutator.h" #include #include #include #include #include #include #include #include struct interesting_value_state { uint64_t num_bytes; char * input; size_t input_length; int iteration; mutate_info_t info; }; typedef struct interesting_value_state interesting_value_state_t; static int(*const mutate_funcs[])(mutate_info_t *, mutate_buffer_t *) = { interesting_one_byte, interesting_two_byte, interesting_four_byte, }; mutator_t interesting_value_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), interesting_value_free_state, FUNCNAME(set_state), FUNCNAME(get_current_iteration), interesting_value_get_total_iteration_count, FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; /** * This function fills in m with all of the function pointers for this mutator. * @param m - a pointer to a mutator_t structure * @return none */ #ifndef ALL_MUTATORS_IN_ONE INTERESTING_VALUE_MUTATOR_API void init(mutator_t * m) { memcpy(m, &interesting_value_mutator, sizeof(mutator_t)); } #endif static interesting_value_state_t * setup_options(char * options) { interesting_value_state_t * state; int bytes_per_stage[] = { 1, 2, 4 }; int i; state = (interesting_value_state_t *)malloc(sizeof(interesting_value_state_t)); if (!state) return NULL; memset(state, 0, sizeof(interesting_value_state_t)); //Setup defaults if (reset_mutate_info(&state->info)) { free(state); return NULL; } if (!options || !strlen(options)) return state; PARSE_MUTATE_INFO_OPTIONS(state, options, FUNCNAME(cleanup), 0, 0); PARSE_OPTION_INT(state, options, num_bytes, "num_bytes", FUNCNAME(cleanup)); if (state->num_bytes) { state->info.stage = -1; for (i = 0; i < ARRAY_SIZE(bytes_per_stage) && state->info.stage < 0; i++) { if (bytes_per_stage[i] == state->num_bytes) state->info.stage = i; } if (state->info.stage < 0) { FUNCNAME(cleanup)(state); return NULL; } state->info.one_stage_only = 1; } return state; } /** * This function will allocate and initialize the mutator state used in the other Mutator API * functions. * @param options - a json string that contains the mutator specific string of options. * @param state - Optionally, used to load a previously dumped state (with the get_state() * function), that defines the current iteration of the mutator. * @param input - used to produce new mutated inputs later when the mutate function is called * @param input_length - the size of the input buffer * @return a mutator specific structure or NULL on failure. */ INTERESTING_VALUE_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { GENERIC_MUTATOR_CREATE(interesting_value_state_t, setup_options, FUNCNAME(cleanup)); } /** * This function will release any resources that the mutator has open * and free the mutator state structure. * @param mutator_state - a mutator specific structure previously created by * the create function. This structure will be freed and should not be referenced afterwards. */ INTERESTING_VALUE_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { cleanup_mutate_info(&((interesting_value_state_t *)mutator_state)->info); GENERIC_MUTATOR_CLEANUP(interesting_value_state_t) } static int mutate_inner(void * mutator_state, char * buffer, size_t buffer_length, int is_thread_safe) { interesting_value_state_t * state = (interesting_value_state_t *)mutator_state; mutate_buffer_t buf; int ret; if (buffer_length < state->input_length) return -1; buf.buffer = (uint8_t *)buffer; buf.length = MIN(buffer_length, state->input_length); buf.max_length = buffer_length; memcpy(buf.buffer, state->input, buf.length); if(is_thread_safe && take_mutex(state->info.mutate_mutex)) return -1; state->iteration++; ret = mutate_one(&state->info, &buf, mutate_funcs, ARRAY_SIZE(mutate_funcs)); if (is_thread_safe && release_mutex(state->info.mutate_mutex)) return -1; return ret; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ INTERESTING_VALUE_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { return mutate_inner(mutator_state, buffer, buffer_length, 0); } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ INTERESTING_VALUE_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { SINGLE_INPUT_MUTATE_EXTENDED(interesting_value_state_t, state->info.mutate_mutex); } /** * This function will return the state of the mutator. The returned value can be used to restart the * mutator at a later time, by passing it to the create or set_state function. It is the caller's * responsibility to free the memory allocated here by calling the free_state function. * @param mutator_state - a mutator specific structure previously created by the create function. * @return - a buffer that defines the current state of the mutator. */ INTERESTING_VALUE_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { interesting_value_state_t * state = (interesting_value_state_t *)mutator_state; json_t *state_obj, *temp; char * ret; state_obj = json_object(); ADD_INT(temp, state->iteration, state_obj, "iteration"); if (!add_mutate_info_to_json(state_obj, &state->info)) return NULL; ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } /** * This function will set the current state of the mutator. * This can be used to restart a mutator once from a previous run. * @param mutator_state - a mutator specific structure previously created by the create function. * @param state - a previously dumped state buffer obtained by the get_state function. * @return 0 on success or non-zero on failure */ INTERESTING_VALUE_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { interesting_value_state_t * current_state = (interesting_value_state_t *)mutator_state; int result, temp_int; if (!state) return 1; GET_INT(temp_int, state, current_state->iteration, "iteration", result); if (get_mutate_info_from_json(state, ¤t_state->info)) return 1; return 0; } /** * This function will return the current iteration count of the mutator, i.e. * how many mutations have been generated with it. * @param mutator_state - a mutator specific structure previously created by the create function. * @return value - the number of previously generated mutations */ INTERESTING_VALUE_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { GENERIC_MUTATOR_GET_ITERATION(interesting_value_state_t); } /** * This function will set the mutator's input to something new. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ INTERESTING_VALUE_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { GENERIC_MUTATOR_SET_INPUT(interesting_value_state_t); } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ INTERESTING_VALUE_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { SINGLE_INPUT_GET_INFO(interesting_value_state_t); } /** * This function sets a help message for the mutator. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ INTERESTING_VALUE_MUTATOR_API int FUNCNAME(help)(char **help_str) { GENERIC_MUTATOR_HELP( "interesting_value - afl-based interesting value mutator\n" "Options:\n" " num_bytes The number of bytes to operate on; either 1, 2, or 4.\n" " The default option is to do all three of the\n" " options, one after another.\n" " skip_previous_stages Whether the mutation outputs should skip any output\n" " that would match the output of the bit_flip or\n" " arithmetic mutator. Useful when using multiple\n" " mutators\n" "\n" ); } ================================================ FILE: mutators/interesting_value_mutator/interesting_value_mutator.h ================================================ #pragma once #include #include #ifdef _WIN32 #ifdef INTERESTING_VALUE_MUTATOR_EXPORTS #define INTERESTING_VALUE_MUTATOR_API __declspec(dllexport) #else #define INTERESTING_VALUE_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define INTERESTING_VALUE_MUTATOR_API #endif #define MUTATOR_NAME "interesting_value" INTERESTING_VALUE_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); INTERESTING_VALUE_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); INTERESTING_VALUE_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); INTERESTING_VALUE_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); INTERESTING_VALUE_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); #define interesting_value_free_state default_free_state INTERESTING_VALUE_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); INTERESTING_VALUE_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); #define interesting_value_get_total_iteration_count return_unknown_or_infinite_total_iterations INTERESTING_VALUE_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); INTERESTING_VALUE_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); INTERESTING_VALUE_MUTATOR_API int FUNCNAME(help)(char **help_str); #ifndef ALL_MUTATORS_IN_ONE INTERESTING_VALUE_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: mutators/multipart_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (multipart_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(MULTIPART_SRC ${PROJECT_SOURCE_DIR}/multipart_mutator.c) source_group("Library Sources" FILES ${MULTIPART_SRC}) add_library(multipart_mutator SHARED ${MULTIPART_SRC} $ $) target_link_libraries(multipart_mutator utils) target_compile_definitions(multipart_mutator PUBLIC MULTIPART_MUTATOR_EXPORTS) target_compile_definitions(multipart_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(multipart_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(multipart_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(multipart_mutator Shlwapi) endif (WIN32) ================================================ FILE: mutators/multipart_mutator/multipart_mutator.c ================================================ #include "multipart_mutator.h" #include #ifdef _WIN32 #include #else #include #include #endif #include #include #include #include #include #include #include typedef struct { char ** mutator_names; char * mutator_directory; mutator_t ** mutators; void ** mutator_states; size_t mutator_count; } multipart_state_t; /////////////////////////////////////////////////////////////////////////////////////////// // Helper functions /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////// static int get_json_items(char * json_text, char * attribute_name, char *** output_array, int *all_use_same) { json_error_t error; json_t * root, *json_array, *item; char ** dumped_array = NULL; int i, num_items; *output_array = NULL; *all_use_same = 0; if (!json_text) return 0; root = json_loads(json_text, 0, &error); if (!root) return -1; if (attribute_name) { json_array = json_object_get(root, attribute_name); if (!json_array) { //Couldn't find the attribute json_decref(root); return 0; } } else json_array = root; //if options isn't an array, then we want all the mutators to use the same options if(!json_is_array(json_array)) { dumped_array = malloc(sizeof(char *)); dumped_array[0] = json_dumps(json_array, 0); *output_array = dumped_array; *all_use_same = 1;; json_decref(root); return 1; } //options is an empty array num_items = json_array_size(json_array); if (!num_items) { json_decref(root); return 0; } dumped_array = calloc(num_items, sizeof(char *)); for (i = 0; i < num_items; i++) { item = json_array_get(json_array, i); if (!json_is_null(item)) dumped_array[i] = json_dumps(item, 0); } json_decref(root); *output_array = dumped_array; return num_items; } static void free_mutator_arrays(char ** inputs, size_t * input_lengths, size_t inputs_count, char ** options, size_t num_options, char ** states, size_t num_states) { size_t i; for (i = 0; i < inputs_count; i++) free(inputs[i]); for (i = 0; i < num_options; i++) free(options[i]); for (i = 0; i < num_states; i++) free(states[i]); free(inputs); free(options); free(states); free(input_lengths); } static int setup_mutators(multipart_state_t * multipart_state, char * mutator_options, char * mutator_states, char * mutator_inputs) { size_t inputs_count, i; char **inputs = NULL, **options = NULL, **states = NULL, *option, *state; int num_options, num_states, all_use_same_options, all_use_same_states; size_t * input_lengths; DEBUG_MSG("Setting up mutators"); if (decode_mem_array(mutator_inputs, &inputs, &input_lengths, &inputs_count)) { FATAL_MSG("Error parsing input data, is it in multipart format?"); return 1; } if (!inputs_count) { //No inputs were found ERROR_MSG("No inputs found"); free_mutator_arrays(inputs, input_lengths, inputs_count, NULL, 0, NULL, 0); return 1; } num_options = get_json_items(mutator_options, "options", &options, &all_use_same_options); num_states = get_json_items(mutator_states, NULL, &states, &all_use_same_states); if (inputs_count != multipart_state->mutator_count || (num_options != 0 && !all_use_same_options && num_options != inputs_count) || (num_states != 0 && !all_use_same_states && num_states != inputs_count)) { free_mutator_arrays(inputs, input_lengths, inputs_count, options, num_options, states, num_states); return 1; } multipart_state->mutators = calloc(inputs_count, sizeof(mutator_t *)); multipart_state->mutator_states = calloc(inputs_count, sizeof(void *)); if(!multipart_state->mutators || !multipart_state->mutator_states) { free(multipart_state->mutators); free(multipart_state->mutator_states); multipart_state->mutators = NULL; multipart_state->mutator_states = NULL; free_mutator_arrays(inputs, input_lengths, inputs_count, options, num_options, states, num_states); return 1; } for (i = 0; i < inputs_count; i++) { DEBUG_MSG("Setting up mutator %d", i); //Create the mutator and get its state multipart_state->mutators[i] = mutator_factory_directory(multipart_state->mutator_directory, multipart_state->mutator_names[i]); if (multipart_state->mutators[i]) { option = NULL; if (all_use_same_options) option = options[0]; else if(num_options != 0) option = options[i]; state = NULL; if (all_use_same_states) state = states[0]; else if (num_states != 0) state = states[i]; multipart_state->mutator_states[i] = multipart_state->mutators[i]->create(option, state, inputs[i], input_lengths[i]); } if (!multipart_state->mutator_states[i] || !multipart_state->mutators[i]) { printf("Unknown mutator %s, bad mutator options, or bad saved state for mutator %lu\n", multipart_state->mutator_names[i], i); free(multipart_state->mutators[i]); //free the one that failed, if it did multipart_state->mutators[i] = NULL; free_mutator_arrays(inputs, input_lengths, inputs_count, options, num_options, states, num_states); return 1; } } free_mutator_arrays(inputs, input_lengths, inputs_count, options, num_options, states, num_states); return 0; } /////////////////////////////////////////////////////////////////////////////////////////// // Mutator Functions ////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////// mutator_t multipart_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), multipart_free_state, FUNCNAME(set_state), FUNCNAME(get_current_iteration), FUNCNAME(get_total_iteration_count), FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; /** * This function fills in m with all of the function pointers for this mutator. * @param m - a pointer to a mutator_t structure * @return none */ #ifndef ALL_MUTATORS_IN_ONE MULTIPART_MUTATOR_API void init(mutator_t * m) { memcpy(m, &multipart_mutator, sizeof(mutator_t)); } #endif /** * This function tries to determine the location of the currently executing library * to use as the default mutator directory. * @return either a string with the directory that contains the currently executing * library path, or NULL if it can't be determined. */ static char * get_default_mutator_directory() { #ifdef _WIN32 HANDLE handle; char filename[MAX_PATH]; //Find the path of the current library if (GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (LPCSTR)&get_default_mutator_directory, (HMODULE *)&handle)) { //Trim the filename off, and look for other mutators in the same directory memset(filename, 0, sizeof(filename)); GetModuleFileName(handle, filename, sizeof(filename)); PathRemoveFileSpec(filename); return strdup(filename); } #else //!_WIN32 FILE * fp; int notdone = 1, found = 0; char path[256]; char * buffer = NULL; size_t length = 0; #ifdef __APPLE__ //Run vmmap on our process and locate this mutator's library on disk char command[64]; snprintf(command, sizeof(command), "vmmap %d", getpid()); fp = popen(command, "r"); #else //Parse /proc/self/maps to try to locate this mutator's library on disk fp = fopen("/proc/self/maps", "r"); #endif if(fp) { while(notdone > 0 && !found) { notdone = getline(&buffer, &length, fp); if(notdone > 0) { memset(path, 0, sizeof(path)); #ifdef __APPLE__ if(strncmp(buffer, "__TEXT", 6)) //if the line didn't start with __TEXT continue; //then skip it notdone = sscanf(buffer, "%*s %*x-%*x [ %*s %*s %*s %*s %*s %*s %255s\n", path); #else notdone = sscanf(buffer, "%*x-%*x %*c%*c%*c%*c %*x %*x:%*x %*u %255s\n", path); #endif if(strstr(path, MUTATOR_NAME "_mutator")) { found = 1; } } } free(buffer); fclose(fp); if(found) return strdup(dirname(path)); } #endif return NULL; //Couldn't figure out a reasonable default, search the normal library paths instead } static multipart_state_t * setup_options(char * options, char * input, size_t input_length) { multipart_state_t * state; if (!options || !strlen(options)) //The multipart needs options return NULL; //so error out if they weren't provided state = (multipart_state_t *)malloc(sizeof(multipart_state_t)); if (!state) return NULL; memset(state, 0, sizeof(multipart_state_t)); PARSE_OPTION_STRING(state, options, mutator_directory, "mutator_directory", FUNCNAME(cleanup)); PARSE_OPTION_ARRAY(state, options, mutator_names, mutator_count, "mutators", FUNCNAME(cleanup)); if(!state->mutator_directory) state->mutator_directory = get_default_mutator_directory(); if (state->mutator_count == 0) { FUNCNAME(cleanup)(state); return NULL; } return state; } /** * This function will allocate and initialize the mutator state used in the other Mutator API * functions. * @param options - a json string that contains the mutator specific string of options. * @param state - Optionally, used to load a previously dumped state (with the get_state() * function), that defines the current iteration of the mutator. * @param input - the input used to produce new mutated inputs later when the mutate function is called * This parameter must be a string containing a JSON array of JSON mem items of the individual inputs * @param input_length - the size of the input buffer * @return a mutator specific structure or NULL on failure. */ MULTIPART_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { DEBUG_MSG("Creating multipart mutator"); multipart_state_t * new_state; new_state = setup_options(options, input, input_length); if (!new_state) return NULL; if (setup_mutators(new_state, options, state, input)) { FUNCNAME(cleanup)(new_state); return NULL; } return new_state; } /** * This function will release any resources that the mutator has open * and free the mutator state structure. * @param mutator_state - a mutator specific structure previously created by * the create function. This structure will be freed and should not be referenced afterwards. */ MULTIPART_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { multipart_state_t * state = (multipart_state_t *)mutator_state; size_t i; for (i = 0; i < state->mutator_count; i++) { free(state->mutator_names[i]); if (state->mutators && state->mutators[i]) { state->mutators[i]->cleanup(state->mutator_states[i]); free(state->mutators[i]); } } free(state->mutator_directory); free(state->mutators); free(state->mutator_states); free(state); } /** * The multipart mutator does not implement the mutate function, and thus this function always * returns an error (-1). * @return - -1 to indicate an error */ MULTIPART_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { return -1; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. The multipart mutator does not support mutating all of the inputs given * during create at once, so the MUTATE_MULTIPLE_INPUTS flag must be set. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ MULTIPART_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { multipart_state_t * state = (multipart_state_t *)mutator_state; unsigned short input_part = flags & MUTATE_MULTIPLE_INPUTS_MASK; uint64_t inner_flags; if (!(flags & MUTATE_MULTIPLE_INPUTS) || input_part < 0 || input_part >= state->mutator_count) return -1; inner_flags = flags & ~(MUTATE_MULTIPLE_INPUTS | MUTATE_MULTIPLE_INPUTS_MASK); return state->mutators[input_part]->mutate_extended(state->mutator_states[input_part], buffer, buffer_length, inner_flags); } /** * This function will return the state of the mutator. The returned value can be used to restart the * mutator at a later time, by passing it to the create or set_state function. It is the caller's * responsibility to free the memory allocated here by calling the free_state function. * @param mutator_state - a mutator specific structure previously created by the create function. * @return - a buffer that defines the current state of the mutator. */ MULTIPART_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { multipart_state_t * state = (multipart_state_t *)mutator_state; json_t *states_array, *temp; json_error_t error; char * ret, *single_state; size_t i; states_array = json_array(); for (i = 0; i < state->mutator_count; i++) { single_state = state->mutators[i]->get_state(state->mutator_states[i]); temp = json_loads(single_state, 0, &error); state->mutators[i]->free_state(single_state); json_array_append_new(states_array, temp); } ret = json_dumps(states_array, 0); json_decref(states_array); return ret; } /** * This function will set the current state of the mutator. * This can be used to restart a mutator once from a previous run. * @param mutator_state - a mutator specific structure previously created by the create function. * @param state - a previously dumped state buffer obtained by the get_state function. * @return 0 on success or non-zero on failure */ MULTIPART_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { multipart_state_t * current_state = (multipart_state_t *)mutator_state; json_t *states_array, *temp; json_error_t error; char *single_state; size_t i; if (!state) return 1; states_array = json_loads(state, 0, &error); if (!states_array || !json_is_array(states_array) || json_array_size(states_array) != current_state->mutator_count) { if(states_array) json_decref(states_array); return 1; } for (i = 0; i < current_state->mutator_count; i++) { temp = json_array_get(states_array, i); single_state = json_dumps(temp, 0); current_state->mutators[i]->set_state(current_state->mutator_states[i], single_state); free(single_state); } json_decref(states_array); return 0; } /** * This function will return the current iteration count of the mutator, i.e. * how many mutations have been generated with it. * @param mutator_state - a mutator specific structure previously created by the create function. * @return value - the number of previously generated mutations */ MULTIPART_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { multipart_state_t * state = (multipart_state_t *)mutator_state; int lowest = -1, temp; size_t i; for (i = 0; i < state->mutator_count; i++) { temp = state->mutators[i]->get_current_iteration(state->mutator_states[i]); if (lowest == -1 || lowest > temp) lowest = temp; } return lowest; } /** * Returns the total number of mutations possible with this mutator and the current options. * For the multipart mutator, it will determine the number of mutations possible from all of * the mutators and return the lowest value * @param mutator_state - a mutator specific structure previously created by the create function. * @return the number of possible mutations with this mutator, or -1 if infinite or the number * cannot be determined. */ MULTIPART_MUTATOR_API int FUNCNAME(get_total_iteration_count)(void * mutator_state) { multipart_state_t * state = (multipart_state_t *)mutator_state; int lowest = -1, temp; size_t i; for (i = 0; i < state->mutator_count; i++) { temp = state->mutators[i]->get_total_iteration_count(state->mutator_states[i]); if (lowest == -1 || (temp != -1 && lowest > temp)) lowest = temp; } return lowest; } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ MULTIPART_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { multipart_state_t * state = (multipart_state_t *)mutator_state; size_t i; size_t * sizes; if (num_inputs) *num_inputs = state->mutator_count; if (input_sizes) { *input_sizes = malloc(sizeof(size_t) * state->mutator_count); for (i = 0; i < state->mutator_count; i++) { state->mutators[i]->get_input_info(state->mutator_states[i], NULL, &sizes); (*input_sizes)[i] = sizes[0]; free(sizes); } } } /** * This function will set the mutator's input to something new. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ MULTIPART_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { multipart_state_t * state = (multipart_state_t *)mutator_state; size_t inputs_count, i; char **inputs = NULL; size_t * input_lengths; int ret = 0; if (decode_mem_array(new_input, &inputs, &input_lengths, &inputs_count)) return -1; if (inputs_count != state->mutator_count) { free_mutator_arrays(inputs, input_lengths, inputs_count, NULL, 0, NULL, 0); return -1; } for (i = 0; ret == 0 && i < state->mutator_count; i++) ret = state->mutators[i]->set_input(state->mutator_states[i], inputs[i], input_lengths[i]); free_mutator_arrays(inputs, input_lengths, inputs_count, NULL, 0, NULL, 0); return ret; } /** * This function sets a help message for the mutator. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ MULTIPART_MUTATOR_API int FUNCNAME(help)(char **help_str) { GENERIC_MUTATOR_HELP( "multipart - a mutator to manage multiple mutators\n" "Required Options:\n" " mutators An array of mutator names or library filenames that\n" " the multipart mutator should use to mutate the input.\n" "Optional Options:\n" " mutator_directory The directory to look for other mutator libraries in\n" " options An array of mutator options to pass to each mutator used\n" "\n" ); } ================================================ FILE: mutators/multipart_mutator/multipart_mutator.h ================================================ #pragma once #include #include #ifdef _WIN32 #ifdef MULTIPART_MUTATOR_EXPORTS #define MULTIPART_MUTATOR_API __declspec(dllexport) #else #define MULTIPART_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define MULTIPART_MUTATOR_API #endif #define MUTATOR_NAME "multipart" MULTIPART_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); MULTIPART_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); MULTIPART_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); MULTIPART_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); MULTIPART_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); #define multipart_free_state default_free_state MULTIPART_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); MULTIPART_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); MULTIPART_MUTATOR_API int FUNCNAME(get_total_iteration_count)(void * mutator_state); MULTIPART_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); MULTIPART_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); MULTIPART_MUTATOR_API int FUNCNAME(help)(char **help_str); #ifndef ALL_MUTATORS_IN_ONE MULTIPART_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: mutators/mutator_tester/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (mutator_tester) set(MUTATOR_TESTER_SRC ${PROJECT_SOURCE_DIR}/mutator_tester.c) add_executable(mutator_tester ${MUTATOR_TESTER_SRC} $) target_link_libraries(mutator_tester utils) source_group("Executable Sources" FILES ${MUTATOR_TESTER_SRC}) target_compile_definitions(mutator_tester PUBLIC UTILS_NO_IMPORT) target_compile_definitions(mutator_tester PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(mutator_tester Shlwapi) else (WIN32) target_link_libraries(mutator_tester dl) target_link_libraries(mutator_tester pthread) endif (WIN32) ================================================ FILE: mutators/mutator_tester/mutator_tester.c ================================================ #include "mutator_tester.h" #include #include #include #include #include #include #include #include #include #ifdef _WIN32 #include #else #include #include #endif //The list of test functions static test_info_t test_info[NUM_TESTS] = { { test_all, "Run all tests!" }, //test_all MUST be the first entry in the test_info array { test_mutate, "Test the mutate() function, this will print each iteration of mutation" }, { test_state, "Test the get_state() and set_state() functions." }, { test_thread_mutate, "Test the thread safe mutate function. Only non-repeating mutators will pass this test" }, { test_run_forever, "Test the mutate() function by mutating the given buffer endlessly." }, { test_mutate_parts, "Test the mutate_input_part() function." }, { test_mutate_once, "Call the mutate() function once and print the output" }, }; static test_function test_all_tests[] = { test_mutate, test_state, test_thread_mutate, test_mutate_parts, test_mutate_once }; /** This function sets up the mutator for testing. This test program is designed * To aid in the debugging of a mutator DLL. The dll is loaded like it would be in the * full blown fuzzer and a series of tests are run against it to find common errors. * @return - if the process fails, it will return 1, if a test fails, the error code * returned will be its test number + 100. For example if test 1 fails main will return 101 * 0 is returned on success! */ int main(int argc, char *argv[]) { //Args char *test_type_to_convert = NULL, *mutator_path = NULL, *mutator_options = NULL, *seed_file = NULL; //args char *seed_buffer = NULL; char *test_string = NULL; char *help = NULL; int ret; unsigned long test_num; size_t seed_length; mutator_t * mutator; void * mutator_state; if (argc == 3 && !strcmp(argv[1], "help")) { help = mutator_help(argv[2]); if(help) puts(help); free(help); return 0; } else if (argc < 3) { print_usage(argv[0]); return 0; } test_type_to_convert = argv[1]; mutator_path = argv[2]; if(argc > 3) mutator_options = argv[3]; srand(time(NULL)); //Convert the test type to int test_num = strtoul(test_type_to_convert, &test_string, 10); if (test_string == test_type_to_convert || test_num >= NUM_TESTS || *test_string != '\0') { //Check for empty str, and overflow printf("Invalid test number!"); return 1; } if (argc < 5) { seed_length = 8; seed_buffer = (char *)malloc(seed_length); memset(seed_buffer, 0, seed_length); } else { //Load the seed buffer from a file seed_file = argv[4]; seed_length = read_file(seed_file, &seed_buffer); if (seed_length <= 0) { printf("Could not read seed file or empty seed file: %s\n", seed_file); return 1; } } //Load the DLL mutator = mutator_factory(mutator_path); if (mutator == NULL) { printf("Load mutator returned a NULL pointer\n"); return 1; } //Setup the mutator mutator_state = setup_mutator(mutator, mutator_options, seed_buffer, seed_length); if (!mutator_state) { printf("setup_mutator() failed\n"); return 1; } //Everything is setup, now do the tests. ret = test_info[test_num].func(mutator, mutator_state, mutator_options, seed_buffer, seed_length); if (ret) ret = 100 + test_num; mutator->cleanup(mutator_state); free(mutator); free(seed_buffer); return ret; } /** * This function initinalizes the mutator. It calls its create function to setup * the mutators state struct, and returns it. This struct is required for all other * mutator spicific function calls. * * @param mutator - a mutator_t struct with the API function pointers for the mutator to setup * @param mutator_options - a JSON string that contains the mutator options * @param seed_buffer - The data buffer used to seed the mutator * @param seed_length - The length of the seed_buffer in bytes * @return mutator_state - the state struct for a spicific mutator */ void * setup_mutator(mutator_t * mutator, char * mutator_options, char * seed_buffer, size_t seed_length) { void * mutator_state; mutator_state = mutator->create(mutator_options, NULL, seed_buffer, seed_length); if (!mutator_state) { printf("Bad mutator options or saved state\n"); return NULL; } return mutator_state; } /** * This function prints the usage statment for the program. * * @param argv - The array of command line arguments * @return none */ void print_usage(char *executable_name) { int i; printf("\nUsage:\n"); printf("\n%s help \"/path/to/mutator/directory\"\n", executable_name); printf("\tPrint mutator help.\n"); printf("\n%s test_type \"/path/to/mutator.dll\" [\"JSON Mutator Options String\" [path/to/input/data]]\n", executable_name); printf("\tRun a mutator test. Valid Test Types:\n"); for (i = 0; i < NUM_TESTS; i++) printf("\t\t %d - %s\n", i, test_info[i].usage_info); } /** * This function runs all other tests in the test_info struct. * * @param mutator - the mutator struct representing the mutator to be tested, returned by load_mutator * @param mutator_state - the state struct for the mutator being tested. Currently unused for this test. * @param mutator_options - a JSON string that contains the mutator options * @param seed_buffer - The data buffer used to seed the mutator * @param seed_length - The length of the seed_buffer in bytes * @return int - the results of the tests. 0 for success and nonzero for fail */ int test_all(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length) { int test_num, ret = 0; void * single_test_mutator_state; for (test_num = 0; test_num < sizeof(test_all_tests)/sizeof(test_all_tests[0]) && !ret; test_num++) { single_test_mutator_state = setup_mutator(mutator, mutator_options, seed_buffer, seed_length); if (!single_test_mutator_state) { printf("setup_mutator() failed\n"); return 1; } printf("+---------+\n"); printf("| TEST %2d |\n", test_num); printf("+---------+\n\n"); ret = test_all_tests[test_num](mutator, single_test_mutator_state, mutator_options, seed_buffer, seed_length); mutator->cleanup(single_test_mutator_state); } return ret; } /** * This function tests several testcases around the mutators mutate() function. * This allows the user to see if the data is being mutated in the expected manner. * It also ensures that each iteration of mutation is being tracked appropriately. * * @param mutator - the mutator struct representing the mutator to be tested, returned by load_mutator * @param mutator_state - the state struct for the mutator being tested, This state should * be at the starting state for the mutator (iteration 0) * @param mutator_options - a JSON string that contains the mutator options * @param seed_buffer - The data buffer used to seed the mutator * @param seed_length - The length of the seed_buffer in bytes * @return int - the results of the tests. 0 for success and 1 for fail */ int test_mutate(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length) { int total_iterations, mut_iter, i, limit; char * mutate_buffer = (char *)malloc(2 * seed_length); int ret; total_iterations = mutator->get_total_iteration_count(mutator_state); printf("The mutator reported %d required iterations.\n\n", total_iterations); printf("=== Original Data ===\n"); print_hex(seed_buffer, seed_length); printf("\n\n\n"); limit = total_iterations; if (total_iterations == -1) limit = 64; for (i = 0; i <= limit; i++) { printf("=== Iteration %d ===\n", i); mut_iter = mutator->get_current_iteration(mutator_state); if (i != mut_iter) { printf("ERROR: The mutator reports that it is on iteration %d but the real iteration is %d\n", mut_iter, i); return 1; } ret = mutator->mutate(mutator_state, mutate_buffer, 2 * seed_length); printf("mutated buffer, %3d bytes:\n", ret); if (ret != -1 && ret != 0) { print_hex(mutate_buffer, ret); } printf("\n\n"); if (ret == 0 && i == total_iterations) { printf("The mutator reported that everything has been mutated on iteration %d of %d\n", i, total_iterations); break; } else if (ret == 0 && total_iterations == -1) { //undeterminable number of outputs, it's not really a bug printf("The mutator reported that everything has been mutated on iteration %d\n", i); break; } else if (ret == -1) { printf("ERROR: the mutator reported an error!\n"); return 1; } else if (i == limit && total_iterations != -1) { printf("ERROR: The expected number of mutations were performed (%d), but the mutator did not return 0\n", total_iterations); } } if (total_iterations != -1 && ret != 0) { for (i = 1; i < 100 && ret != 0; i++) ret = mutator->mutate(mutator_state, mutate_buffer, 2 * seed_length); if (ret == 0 && total_iterations != -1) printf("ERROR: it took %d extra iterations for the mutator to return 0", i-1); else printf("ERROR: the mutator did not return 0 even after %d extra iterations", i-1); return 1; } return 0; } /** * This function tests several testcases around the mutators get_state() and set_state functions. * This allows the user to check if the state of a mutator is being correctly saved and restored. * * @param mutator - the mutator struct returned by load_mutator * @param mutator_state - the state struct for a spicific mutator * @param mutator_options - a JSON string that contains the mutator options * @param seed_buffer - The data buffer used to seed the mutator * @param seed_length - The length of the seed_buffer in bytes * @return int - the results of the tests. 0 for success and 1 for fail */ int test_state(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length) { size_t total_iterations, i; char * mutate_buffer = (char *)malloc(2 * seed_length); char * new_mutate_buffer = (char *)malloc(2 * seed_length); char * old_saved_state_buffer; char * new_saved_state_buffer; json_t * old_JSON_state; json_t * new_JSON_state; int ret, old_iter, new_iter, old_mutate_length, new_mutate_length; void * new_mutator_state; if (!mutate_buffer || !new_mutate_buffer) { printf("Malloc failed\n"); free(mutate_buffer); free(new_mutate_buffer); return 1; } total_iterations = mutator->get_total_iteration_count(mutator_state); if (total_iterations == -1) { total_iterations = 64; } printf("Mutating the data %zi times\n", total_iterations / 2 ); for (i = 0; i <= total_iterations / 2; i++) { ret = mutator->mutate(mutator_state, mutate_buffer, 2 * seed_length); if (ret <= 0) { if (ret < 0) printf("ERROR: The mutate() function returned an error or finished pre-maturely. Run test 1 for more info\n"); else printf("Mutator finished mutations early\n"); break; } } printf("Mutation stopped on iteration %zi\n", i); printf("Saving the mutators state...\n"); old_saved_state_buffer = (char *)mutator->get_state(mutator_state); printf("Here is the OLD JSON string:\n%s\n", old_saved_state_buffer); //Setup a new mutator to restore the state into new_mutator_state = setup_mutator(mutator, mutator_options, seed_buffer, seed_length); if (!new_mutator_state) { printf("setup_mutator() failed\n"); free(mutate_buffer); free(new_mutate_buffer); mutator->free_state(old_saved_state_buffer); return 1; } //set the state from the old -> new printf("Restoring the mutators state...\n"); ret = mutator->set_state(new_mutator_state, old_saved_state_buffer); if (ret) { printf("set_state() returned error code %i\n", ret); } new_saved_state_buffer = (char *)mutator->get_state(new_mutator_state); printf("Here is the NEW JSON string:\n%s\n", new_saved_state_buffer); //Compare JSON states to see if they were saved and restored correctly old_JSON_state = json_string(old_saved_state_buffer); mutator->free_state(old_saved_state_buffer); if (old_JSON_state == NULL) { printf("Failed to convert old JSON string to JSON object\n"); free(mutate_buffer); free(new_mutate_buffer); mutator->free_state(new_saved_state_buffer); mutator->cleanup(new_mutator_state); return 1; } new_JSON_state = json_string(new_saved_state_buffer); mutator->free_state(new_saved_state_buffer); if (new_JSON_state == NULL) { printf("Failed to convert new JSON string to JSON object\n"); json_decref(old_JSON_state); free(mutate_buffer); free(new_mutate_buffer); mutator->cleanup(new_mutator_state); return 1; } if (!json_equal(old_JSON_state, new_JSON_state)) { printf("The mutator failed to restore state properly\n"); json_decref(old_JSON_state); json_decref(new_JSON_state); free(mutate_buffer); free(new_mutate_buffer); mutator->cleanup(new_mutator_state); return 1; } json_decref(old_JSON_state); json_decref(new_JSON_state); printf("The saved states are equal, this is expected\n"); //Get the iteration count and call mutate once, just to make sure that they work old_iter = mutator->get_current_iteration(mutator_state); new_iter = mutator->get_current_iteration(new_mutator_state); old_mutate_length = mutator->mutate(mutator_state, mutate_buffer, 2 * seed_length); new_mutate_length = mutator->mutate(new_mutator_state, new_mutate_buffer, 2 * seed_length); if (old_iter == new_iter && old_mutate_length == new_mutate_length && old_mutate_length >= 0 && !memcmp(mutate_buffer, new_mutate_buffer, old_mutate_length)) { printf("Success! The mutator has restored its state\n"); ret = 0; } else { printf("The mutator failed to mutate properly after restoring the state\n" "Original mutator iteration count %d New mutator iteration count %d\n" "Original mutator output length %d new mutator output length %d\n", old_iter, new_iter, old_mutate_length, new_mutate_length); printf("old (%d bytes): ", old_mutate_length); if(old_mutate_length > 0) print_hex(mutate_buffer, old_mutate_length); printf("\nnew (%d bytes): ", new_mutate_length); if(new_mutate_length > 0) print_hex(new_mutate_buffer, new_mutate_length); printf("\n"); ret = 1; } free(mutate_buffer); free(new_mutate_buffer); mutator->cleanup(new_mutator_state); return ret; } #define RACER_IS_THREAD_SAFE #define NUM_RACER_THREADS 10 #define NUM_RACER_SAVED_BUFFERS 256 #define NUM_RACER_ROUNDS 50 static int racer_buffers_count; static int racer_saved_buffer_lengths[NUM_RACER_SAVED_BUFFERS]; static char * racer_saved_buffers[NUM_RACER_SAVED_BUFFERS]; static char * racer_seed_buffer; static size_t racer_seed_length; static mutator_t * racer_mutator; #ifdef _WIN32 DWORD WINAPI mutate_racer(LPVOID mutator_state) #else void * mutate_racer(void * mutator_state) #endif { int index = 0; size_t mutate_buffer_length = 2 * racer_seed_length; while (index < NUM_RACER_SAVED_BUFFERS) { #ifdef _WIN32 index = InterlockedIncrement(&racer_buffers_count) - 1; #else index = __sync_fetch_and_add(&racer_buffers_count, 1) - 1; #endif if (index >= NUM_RACER_SAVED_BUFFERS) break; racer_saved_buffers[index] = malloc(mutate_buffer_length); memcpy(racer_saved_buffers[index], racer_seed_buffer, racer_seed_length); #ifdef RACER_IS_THREAD_SAFE racer_saved_buffer_lengths[index] = racer_mutator->mutate_extended(mutator_state, racer_saved_buffers[index], mutate_buffer_length, MUTATE_THREAD_SAFE); #else racer_saved_buffer_lengths[index] = racer_mutator->mutate(mutator_state, racer_saved_buffers[index], mutate_buffer_length); #endif if (racer_saved_buffer_lengths[index] <= 0) break; } #ifdef _WIN32 return index; #else return NULL; #endif } /** * This function tests the thread safe mutate function. * * @param mutator - the mutator struct returned by load_mutator * @param mutator_state - the state struct for a spicific mutator * @param mutator_options - a JSON string that contains the mutator options * @param seed_buffer - The data buffer used to seed the mutator * @param seed_length - The length of the seed_buffer in bytes * @return int - the results of the tests. 0 for success and 1 for fail */ int test_thread_mutate(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length) { int round, i, j, found_duplicate = 0; racer_seed_buffer = seed_buffer; racer_seed_length = seed_length; racer_mutator = mutator; for (round = 0; !found_duplicate && round < NUM_RACER_ROUNDS; round++) { memset(racer_saved_buffers, 0, sizeof(racer_saved_buffers)); memset(racer_saved_buffer_lengths, 0, sizeof(racer_saved_buffer_lengths)); racer_buffers_count = 0; mutator_state = setup_mutator(mutator, mutator_options, seed_buffer, seed_length); if (!mutator_state) { printf("setup_mutator() failed\n"); return 1; } //Run the racer threads #ifdef _WIN32 HANDLE threads[NUM_RACER_THREADS]; for(i = 0; i < NUM_RACER_THREADS; i++) threads[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)mutate_racer, mutator_state, 0, NULL); WaitForMultipleObjects(NUM_RACER_THREADS, threads, TRUE, INFINITE); for (i = 0; i < NUM_RACER_THREADS; i++) CloseHandle(threads[i]); #else pthread_t threads[NUM_RACER_THREADS]; for(i = 0; i < NUM_RACER_THREADS; i++) pthread_create(&threads[i], NULL, mutate_racer, mutator_state); for (i = 0; i < NUM_RACER_THREADS; i++) pthread_join(threads[i], NULL); #endif for (i = 0; !found_duplicate && i < NUM_RACER_SAVED_BUFFERS; i++) { if (racer_saved_buffer_lengths[i] <= 0 || racer_saved_buffers[i] == NULL) continue; for (j = i + 1; !found_duplicate && j < NUM_RACER_SAVED_BUFFERS; j++) { if (racer_saved_buffer_lengths[j] <= 0 || racer_saved_buffers[j] == NULL) continue; if (racer_saved_buffer_lengths[i] == racer_saved_buffer_lengths[j] && !memcmp(racer_saved_buffers[i], racer_saved_buffers[j], racer_saved_buffer_lengths[i])) { printf("Found duplicate in round %d: %d and %d\n", round, i, j); print_hex(racer_saved_buffers[i], racer_saved_buffer_lengths[i]); printf("\n"); print_hex(racer_saved_buffers[j], racer_saved_buffer_lengths[j]); printf("\n\n"); found_duplicate = 1; } } } for (i = 0; i < NUM_RACER_SAVED_BUFFERS; i++) free(racer_saved_buffers[i]); mutator->cleanup(mutator_state); } return found_duplicate; } /** * This function tests the provided mutator by mutating a buffer endlessly * * @param mutator - a mutator_t struct to test * @param mutator_state - the state struct for a spicific mutator * @param mutator_options - a JSON string that contains the mutator options * @param seed_buffer - The data buffer used to seed the mutator * @param seed_length - The length of the seed_buffer in bytes * @return int - the results of the tests. 0 for success and 1 for fail */ int test_run_forever(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length) { size_t i; int ret; char * mutate_buffer = (char *)malloc(2 * seed_length); if (!mutate_buffer) { printf("Malloc failed\n"); return 1; } ret = 1; for (i = 0; ret != 0 && ret != -1; i++) { ret = mutator->mutate(mutator_state, mutate_buffer, 2 * seed_length); if (ret == -1) { printf("%4lu: The mutate() function returned an error.\n", i); } else if(ret == 0) { printf("%4lu: The mutate() function returned 0 (i.e. there are no more mutations).\n", i); } else if(ret > 0) { printf("%4lu: ", i); print_hex(mutate_buffer, ret); printf("\n"); } } free(mutate_buffer); return ret; } int test_mutate_parts(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length) { size_t * input_sizes; int i, j, num_bytes, num_inputs, total_iteration_count; char * mutate_buffer; mutate_buffer = (char *)malloc(2 * seed_length); if (!mutate_buffer) { printf("Malloc failed\n"); return 1; } total_iteration_count = mutator->get_total_iteration_count(mutator_state); mutator->get_input_info(mutator_state, &num_inputs, &input_sizes); printf("mutator started with %d inputs and has %d iterations total\n", num_inputs, total_iteration_count); for (i = 0; i < num_inputs; i++) printf("Input %d was %lu bytes\n", i, input_sizes[i]); free(input_sizes); for (i = 0; i < 10; i++) { num_bytes = 1; for (j = 0; j < num_inputs && num_bytes > 0; j++) { num_bytes = mutator->mutate_extended(mutator_state, mutate_buffer, 2 * seed_length, MUTATE_MULTIPLE_INPUTS | j); if (num_bytes > 0) { printf("%4d %4d: ", i, j); print_hex(mutate_buffer, num_bytes); printf("\n"); } } } free(mutate_buffer); return 0; } int test_mutate_once(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length) { size_t max_length = 5 * 1024 * 1024; //Allow for very large mutations char * mutate_buffer; int mutate_length; mutate_buffer = (char *)malloc(max_length); memset(mutate_buffer, 0, max_length); mutate_length = mutator->mutate(mutator_state, mutate_buffer, max_length); if(mutate_length < 0) return -1; write(1, mutate_buffer, mutate_length); free(mutate_buffer); return 0; } ================================================ FILE: mutators/mutator_tester/mutator_tester.h ================================================ #pragma once #include //Helper functions void * setup_mutator(mutator_t * mutator, char * mutator_options, char * seed_buffer, size_t seed_length); void print_usage(char * executable_name); //Test functions #define NUM_TESTS 7 int test_all(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length); int test_mutate(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length); int test_state(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length); int test_thread_mutate(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length); int test_run_forever(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length); int test_mutate_parts(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length); int test_mutate_once(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length); //Test types typedef int(*test_function)(mutator_t * mutator, void * mutator_state, char * mutator_options, char * seed_buffer, size_t seed_length); typedef struct test_info { test_function func; const char * usage_info; } test_info_t; ================================================ FILE: mutators/mutators/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (mutators) set(MUTATORS_SRC ${PROJECT_SOURCE_DIR}/mutators.c ${PROJECT_SOURCE_DIR}/afl_helpers.c ) source_group("Library Sources" FILES ${MUTATORS_SRC}) add_library(mutators SHARED ${MUTATORS_SRC} $) target_link_libraries(mutators utils) target_compile_definitions(mutators PUBLIC MUTATORS_EXPORTS) target_compile_definitions(mutators PUBLIC UTILS_NO_IMPORT) target_compile_definitions(mutators PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(mutators Shlwapi) endif (WIN32) add_library(mutators_object OBJECT ${MUTATORS_SRC}) if (NOT WIN32) set_target_properties(mutators_object PROPERTIES COMPILE_FLAGS "-fPIC") endif (NOT WIN32) target_compile_definitions(mutators_object PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(mutators_object PUBLIC UTILS_NO_IMPORT) target_compile_definitions(mutators_object PUBLIC JANSSON_NO_IMPORT) add_library(mutators_static STATIC ${MUTATORS_SRC}) target_compile_definitions(mutators_static PUBLIC MUTATORS_NO_IMPORT) target_link_libraries(mutators_static utils_static) target_link_libraries(mutators_static jansson_static) ================================================ FILE: mutators/mutators/afl_config.h ================================================ /* american fuzzy lop - vaguely configurable bits ---------------------------------------------- Written and maintained by Michal Zalewski Copyright 2013, 2014, 2015, 2016 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 This file has been modified from the original to suit the purposes of this project. */ #ifndef _HAVE_CONFIG_H #define _HAVE_CONFIG_H #include "afl_types.h" /* Version string: */ #define VERSION "2.52b" /****************************************************** * * * Settings that may be of interest to power users: * * * ******************************************************/ /* Comment out to disable terminal colors (note that this makes afl-analyze a lot less nice): */ //#define USE_COLOR /* Comment out to disable fancy ANSI boxes and use poor man's 7-bit UI: */ #define FANCY_BOXES /* Default timeout for fuzzed code (milliseconds). This is the upper bound, also used for detecting hangs; the actual value is auto-scaled: */ #define EXEC_TIMEOUT 1000 /* Timeout rounding factor when auto-scaling (milliseconds): */ #define EXEC_TM_ROUND 20 /* Default memory limit for child process (MB): */ #ifndef __x86_64__ # define MEM_LIMIT 25 #else # define MEM_LIMIT 50 #endif /* ^!__x86_64__ */ /* Default memory limit when running in QEMU mode (MB): */ #define MEM_LIMIT_QEMU 200 /* Number of calibration cycles per every new test case (and for test cases that show variable behavior): */ #define CAL_CYCLES 8 #define CAL_CYCLES_LONG 40 /* Number of subsequent timeouts before abandoning an input file: */ #define TMOUT_LIMIT 250 /* Maximum number of unique hangs or crashes to record: */ #define KEEP_UNIQUE_HANG 500 #define KEEP_UNIQUE_CRASH 5000 /* Baseline number of random tweaks during a single 'havoc' stage: */ #define HAVOC_CYCLES 256 #define HAVOC_CYCLES_INIT 1024 /* Maximum multiplier for the above (should be a power of two, beware of 32-bit int overflows): */ #define HAVOC_MAX_MULT 16 /* Absolute minimum number of havoc cycles (after all adjustments): */ #define HAVOC_MIN 16 /* Maximum stacking for havoc-stage tweaks. The actual value is calculated like this: n = random between 1 and HAVOC_STACK_POW2 stacking = 2^n In other words, the default (n = 7) produces 2, 4, 8, 16, 32, 64, or 128 stacked tweaks: */ #define HAVOC_STACK_POW2 7 /* Caps on block sizes for cloning and deletion operations. Each of these ranges has a 33% probability of getting picked, except for the first two cycles where smaller blocks are favored: */ #define HAVOC_BLK_SMALL 32 #define HAVOC_BLK_MEDIUM 128 #define HAVOC_BLK_LARGE 1500 /* Extra-large blocks, selected very rarely (<5% of the time): */ #define HAVOC_BLK_XL 32768 /* Probabilities of skipping non-favored entries in the queue, expressed as percentages: */ #define SKIP_TO_NEW_PROB 99 /* ...when there are new, pending favorites */ #define SKIP_NFAV_OLD_PROB 95 /* ...no new favs, cur entry already fuzzed */ #define SKIP_NFAV_NEW_PROB 75 /* ...no new favs, cur entry not fuzzed yet */ /* Splicing cycle count: */ #define SPLICE_CYCLES 15 /* Nominal per-splice havoc cycle length: */ #define SPLICE_HAVOC 32 /* Maximum offset for integer addition / subtraction stages: */ #define ARITH_MAX 35 /* Limits for the test case trimmer. The absolute minimum chunk size; and the starting and ending divisors for chopping up the input file: */ #define TRIM_MIN_BYTES 4 #define TRIM_START_STEPS 16 #define TRIM_END_STEPS 1024 /* Maximum size of input file, in bytes (keep under 100MB): */ #define MAX_FILE (1 * 1024 * 1024) /* The same, for the test case minimizer: */ #define TMIN_MAX_FILE (10 * 1024 * 1024) /* Block normalization steps for afl-tmin: */ #define TMIN_SET_MIN_SIZE 4 #define TMIN_SET_STEPS 128 /* Maximum dictionary token size (-x), in bytes: */ #define MAX_DICT_FILE 128 /* Length limits for auto-detected dictionary tokens: */ #define MIN_AUTO_EXTRA 3 #define MAX_AUTO_EXTRA 32 /* Maximum number of user-specified dictionary tokens to use in deterministic steps; past this point, the "extras/user" step will be still carried out, but with proportionally lower odds: */ #define MAX_DET_EXTRAS 200 /* Maximum number of auto-extracted dictionary tokens to actually use in fuzzing (first value), and to keep in memory as candidates. The latter should be much higher than the former. */ #define USE_AUTO_EXTRAS 50 #define MAX_AUTO_EXTRAS (USE_AUTO_EXTRAS * 10) /* Scaling factor for the effector map used to skip some of the more expensive deterministic steps. The actual divisor is set to 2^EFF_MAP_SCALE2 bytes: */ #define EFF_MAP_SCALE2 3 /* Minimum input file length at which the effector logic kicks in: */ #define EFF_MIN_LEN 128 /* Maximum effector density past which everything is just fuzzed unconditionally (%): */ #define EFF_MAX_PERC 90 /* UI refresh frequency (Hz): */ #define UI_TARGET_HZ 5 /* Fuzzer stats file and plot update intervals (sec): */ #define STATS_UPDATE_SEC 60 #define PLOT_UPDATE_SEC 5 /* Smoothing divisor for CPU load and exec speed stats (1 - no smoothing). */ #define AVG_SMOOTHING 16 /* Sync interval (every n havoc cycles): */ #define SYNC_INTERVAL 5 /* Output directory reuse grace period (minutes): */ #define OUTPUT_GRACE 25 /* Uncomment to use simple file names (id_NNNNNN): */ // #define SIMPLE_FILES /* List of interesting values to use in fuzzing. */ #define INTERESTING_8 \ -128, /* Overflow signed 8-bit when decremented */ \ -1, /* */ \ 0, /* */ \ 1, /* */ \ 16, /* One-off with common buffer size */ \ 32, /* One-off with common buffer size */ \ 64, /* One-off with common buffer size */ \ 100, /* One-off with common buffer size */ \ 127 /* Overflow signed 8-bit when incremented */ #define INTERESTING_16 \ -32768, /* Overflow signed 16-bit when decremented */ \ -129, /* Overflow signed 8-bit */ \ 128, /* Overflow signed 8-bit */ \ 255, /* Overflow unsig 8-bit when incremented */ \ 256, /* Overflow unsig 8-bit */ \ 512, /* One-off with common buffer size */ \ 1000, /* One-off with common buffer size */ \ 1024, /* One-off with common buffer size */ \ 4096, /* One-off with common buffer size */ \ 32767 /* Overflow signed 16-bit when incremented */ #define INTERESTING_32 \ -2147483648LL, /* Overflow signed 32-bit when decremented */ \ -100663046, /* Large negative number (endian-agnostic) */ \ -32769, /* Overflow signed 16-bit */ \ 32768, /* Overflow signed 16-bit */ \ 65535, /* Overflow unsig 16-bit when incremented */ \ 65536, /* Overflow unsig 16 bit */ \ 100663045, /* Large positive number (endian-agnostic) */ \ 2147483647 /* Overflow signed 32-bit when incremented */ /*********************************************************** * * * Really exotic stuff you probably don't want to touch: * * * ***********************************************************/ /* Call count interval between reseeding the libc PRNG from /dev/urandom: */ #define RESEED_RNG 10000 /* Maximum line length passed from GCC to 'as' and used for parsing configuration files: */ #define MAX_LINE 8192 /* Environment variable used to pass SHM ID to the called program. */ #define SHM_ENV_VAR "__AFL_SHM_ID" /* Other less interesting, internal-only variables. */ #define CLANG_ENV_VAR "__AFL_CLANG_MODE" #define AS_LOOP_ENV_VAR "__AFL_AS_LOOPCHECK" #define PERSIST_ENV_VAR "__AFL_PERSISTENT" #define DEFER_ENV_VAR "__AFL_DEFER_FORKSRV" /* In-code signatures for deferred and persistent mode. */ #define PERSIST_SIG "##SIG_AFL_PERSISTENT##" #define DEFER_SIG "##SIG_AFL_DEFER_FORKSRV##" /* Distinctive bitmap signature used to indicate failed execution: */ #define EXEC_FAIL_SIG 0xfee1dead /* Distinctive exit code used to indicate MSAN trip condition: */ #define MSAN_ERROR 86 /* Designated file descriptors for forkserver commands (the application will use FORKSRV_FD and FORKSRV_FD + 1): */ #define FORKSRV_FD 198 /* Fork server init timeout multiplier: we'll wait the user-selected timeout plus this much for the fork server to spin up. */ #define FORK_WAIT_MULT 10 /* Calibration timeout adjustments, to be a bit more generous when resuming fuzzing sessions or trying to calibrate already-added internal finds. The first value is a percentage, the other is in milliseconds: */ #define CAL_TMOUT_PERC 125 #define CAL_TMOUT_ADD 50 /* Number of chances to calibrate a case before giving up: */ #define CAL_CHANCES 3 /* Map size for the traced binary (2^MAP_SIZE_POW2). Must be greater than 2; you probably want to keep it under 18 or so for performance reasons (adjusting AFL_INST_RATIO when compiling is probably a better way to solve problems with complex programs). You need to recompile the target binary after changing this - otherwise, SEGVs may ensue. */ #define MAP_SIZE_POW2 16 #define MAP_SIZE (1 << MAP_SIZE_POW2) /* Maximum allocator request size (keep well under INT_MAX): */ #define MAX_ALLOC 0x40000000 /* A made-up hashing seed: */ #define HASH_CONST 0xa5b35705 /* Constants for afl-gotcpu to control busy loop timing: */ #define CTEST_TARGET_MS 5000 #define CTEST_CORE_TRG_MS 1000 #define CTEST_BUSY_CYCLES (10 * 1000 * 1000) /* Uncomment this to use inferior block-coverage-based instrumentation. Note that you need to recompile the target binary for this to have any effect: */ // #define COVERAGE_ONLY /* Uncomment this to ignore hit counts and output just one bit per tuple. As with the previous setting, you will need to recompile the target binary: */ // #define SKIP_COUNTS /* Uncomment this to use instrumentation data to record newly discovered paths, but do not use them as seeds for fuzzing. This is useful for conveniently measuring coverage that could be attained by a "dumb" fuzzing algorithm: */ // #define IGNORE_FINDS #endif /* ! _HAVE_CONFIG_H */ ================================================ FILE: mutators/mutators/afl_debug.h ================================================ /* american fuzzy lop - debug / error handling macros -------------------------------------------------- Written and maintained by Michal Zalewski Copyright 2013, 2014, 2015, 2016 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 This file has been modified from the original to suit the purposes of this project. */ #ifndef _HAVE_DEBUG_H #define _HAVE_DEBUG_H #include #include "afl_types.h" #include "afl_config.h" /******************* * Terminal colors * *******************/ #ifdef USE_COLOR # define cBLK "\x1b[0;30m" # define cRED "\x1b[0;31m" # define cGRN "\x1b[0;32m" # define cBRN "\x1b[0;33m" # define cBLU "\x1b[0;34m" # define cMGN "\x1b[0;35m" # define cCYA "\x1b[0;36m" # define cLGR "\x1b[0;37m" # define cGRA "\x1b[1;90m" # define cLRD "\x1b[1;91m" # define cLGN "\x1b[1;92m" # define cYEL "\x1b[1;93m" # define cLBL "\x1b[1;94m" # define cPIN "\x1b[1;95m" # define cLCY "\x1b[1;96m" # define cBRI "\x1b[1;97m" # define cRST "\x1b[0m" # define bgBLK "\x1b[40m" # define bgRED "\x1b[41m" # define bgGRN "\x1b[42m" # define bgBRN "\x1b[43m" # define bgBLU "\x1b[44m" # define bgMGN "\x1b[45m" # define bgCYA "\x1b[46m" # define bgLGR "\x1b[47m" # define bgGRA "\x1b[100m" # define bgLRD "\x1b[101m" # define bgLGN "\x1b[102m" # define bgYEL "\x1b[103m" # define bgLBL "\x1b[104m" # define bgPIN "\x1b[105m" # define bgLCY "\x1b[106m" # define bgBRI "\x1b[107m" #else # define cBLK "" # define cRED "" # define cGRN "" # define cBRN "" # define cBLU "" # define cMGN "" # define cCYA "" # define cLGR "" # define cGRA "" # define cLRD "" # define cLGN "" # define cYEL "" # define cLBL "" # define cPIN "" # define cLCY "" # define cBRI "" # define cRST "" # define bgBLK "" # define bgRED "" # define bgGRN "" # define bgBRN "" # define bgBLU "" # define bgMGN "" # define bgCYA "" # define bgLGR "" # define bgGRA "" # define bgLRD "" # define bgLGN "" # define bgYEL "" # define bgLBL "" # define bgPIN "" # define bgLCY "" # define bgBRI "" #endif /* ^USE_COLOR */ /************************* * Box drawing sequences * *************************/ #ifdef FANCY_BOXES # define SET_G1 "\x1b)0" /* Set G1 for box drawing */ # define RESET_G1 "\x1b)B" /* Reset G1 to ASCII */ # define bSTART "\x0e" /* Enter G1 drawing mode */ # define bSTOP "\x0f" /* Leave G1 drawing mode */ # define bH "q" /* Horizontal line */ # define bV "x" /* Vertical line */ # define bLT "l" /* Left top corner */ # define bRT "k" /* Right top corner */ # define bLB "m" /* Left bottom corner */ # define bRB "j" /* Right bottom corner */ # define bX "n" /* Cross */ # define bVR "t" /* Vertical, branch right */ # define bVL "u" /* Vertical, branch left */ # define bHT "v" /* Horizontal, branch top */ # define bHB "w" /* Horizontal, branch bottom */ #else # define SET_G1 "" # define RESET_G1 "" # define bSTART "" # define bSTOP "" # define bH "-" # define bV "|" # define bLT "+" # define bRT "+" # define bLB "+" # define bRB "+" # define bX "+" # define bVR "+" # define bVL "+" # define bHT "+" # define bHB "+" #endif /* ^FANCY_BOXES */ /*********************** * Misc terminal codes * ***********************/ #define TERM_HOME "\x1b[H" #define TERM_CLEAR TERM_HOME "\x1b[2J" #define cEOL "\x1b[0K" #define CURSOR_HIDE "\x1b[?25l" #define CURSOR_SHOW "\x1b[?25h" /************************ * Debug & error macros * ************************/ /* Just print stuff to the appropriate stream. */ #ifdef MESSAGES_TO_STDOUT #ifdef _WIN32 # define SAYF(x, ...) printf(x, __VA_ARGS__) #else # define SAYF(x...) printf(x) #endif #else #ifdef _WIN32 # define SAYF(x, ...) fprintf(stderr, x, __VA_ARGS__) #else # define SAYF(x...) fprintf(stderr, x) #endif #endif /* ^MESSAGES_TO_STDOUT */ /* Show a prefixed warning. */ #define WARNF(x, ...) do { \ SAYF(cYEL "[!] " cBRI "WARNING: " cRST x, __VA_ARGS__); \ SAYF(cRST "\n"); \ } while (0) /* Show a prefixed "doing something" message. */ #define ACTF(x, ...) do { \ SAYF(cLBL "[*] " cRST x, __VA_ARGS__); \ SAYF(cRST "\n"); \ } while (0) /* Show a prefixed "success" message. */ #define OKF(x, ...) do { \ SAYF(cLGN "[+] " cRST x, __VA_ARGS__); \ SAYF(cRST "\n"); \ } while (0) /* Show a prefixed fatal error message (not used in afl). */ #define BADF(x, ...) do { \ SAYF(cLRD "\n[-] " cRST x, __VA_ARGS__); \ SAYF(cRST "\n"); \ } while (0) /* Die with a verbose non-OS fatal error message. */ #define FATAL(x, ...) do { \ SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] PROGRAM ABORT : " \ cBRI x, __VA_ARGS__); \ SAYF(cLRD "\n Location : " cRST "%s(), %s:%u\n\n", \ __FUNCTION__, __FILE__, __LINE__); \ exit(1); \ } while (0) /* Die by calling abort() to provide a core dump. */ #define ABORT(x, ...) do { \ SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] PROGRAM ABORT : " \ cBRI x, __VA_ARGS__); \ SAYF(cLRD "\n Stop location : " cRST "%s(), %s:%u\n\n", \ __FUNCTION__, __FILE__, __LINE__); \ abort(); \ } while (0) /* Die while also including the output of perror(). */ #define PFATAL(x, ...) do { \ fflush(stdout); \ SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] SYSTEM ERROR : " \ cBRI x, __VA_ARGS__); \ SAYF(cLRD "\n Stop location : " cRST "%s(), %s:%u\n", \ __FUNCTION__, __FILE__, __LINE__); \ SAYF(cLRD " OS message : " cRST "%s\n", strerror(errno)); \ exit(1); \ } while (0) /* Die with FAULT() or PFAULT() depending on the value of res (used to interpret different failure modes for read(), write(), etc). */ #define RPFATAL(res, x, ...) do { \ if (res < 0) PFATAL(x, __VA_ARGS__); else FATAL(x, __VA_ARGS__); \ } while (0) /* Error-checking versions of read() and write() that call RPFATAL() as appropriate. */ #define ck_write(fd, buf, len, fn) do { \ u32 _len = (len); \ s32 _res = write(fd, buf, _len); \ if (_res != _len) RPFATAL(_res, "Short write to %s", fn); \ } while (0) #define ck_read(fd, buf, len, fn) do { \ u32 _len = (len); \ s32 _res = read(fd, buf, _len); \ if (_res != _len) RPFATAL(_res, "Short read from %s", fn); \ } while (0) #endif /* ! _HAVE_DEBUG_H */ ================================================ FILE: mutators/mutators/afl_helpers.c ================================================ #include "mutators.h" #include "afl_helpers.h" #include "afl_config.h" #include "afl_debug.h" #include "afl_types.h" #include #include #include #include #include #include #ifdef _WIN32 #include #include #else #include #include #include #include #endif static inline uint64_t rotl(const uint64_t x, int k) { return (x << k) | (x >> (64 - k)); } /** * xoroshiro128plus by David Blackman and Sebastiano Vigna * @param info - an mutate_info_t that holds the current random generator state * @return - a random uint64_t */ static inline uint64_t rnd64(mutate_info_t * info) { const uint64_t s0 = info->random_state[0]; uint64_t s1 = info->random_state[1]; const uint64_t result = s0 + s1; s1 ^= s0; info->random_state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); info->random_state[1] = rotl(s1, 36); return result; } /* Generate a random number (from 0 to limit - 1). This may have slight bias. */ MUTATORS_API u32 UR(mutate_info_t * info, u32 limit) { return rnd64(info) % limit; } //Mutates a buffer, running through each of the passed in mutate functions, updating the mutate_info_t //with the current progress through the mutation functions MUTATORS_API int mutate_one(mutate_info_t * info, mutate_buffer_t * buf, int(*const*mutate_funcs)(mutate_info_t *, mutate_buffer_t *), size_t num_funcs) { int length = MUTATOR_DONE; while ((length == MUTATOR_DONE || length == MUTATOR_TRY_AGAIN) && info->stage < num_funcs) { length = mutate_funcs[info->stage](info, buf); if (length == MUTATOR_TRY_AGAIN) info->stage_cur++; else if (length == MUTATOR_DONE) { info->stage++; info->stage_cur = 0; if (info->one_stage_only) { //if we're only doing one stage, set the stage to the end info->stage = num_funcs; //so the next call to mutate_one doesn't return a mutated buffer break; } } } info->stage_cur++; if (length == MUTATOR_DONE && info->stage == num_funcs) //If we've reached info->stage_cur = 0; //the end of the mutators cycle, reset the stage to 0 return length; } static void clear_splice_files(mutate_info_t * info) { size_t i; if (info->splice_files) { for (i = 0; i < info->splice_files_count; i++) { free(info->splice_files[i]->s); free(info->splice_files[i]); } free(info->splice_files); info->splice_files = NULL; info->splice_files_count = 0; } } /** * Loads the splice files into the given afl state * @param info - the mutate_info_t to load the splice files for * @return - 0 on success, nonzero on failure */ MUTATORS_API int load_splice_files(mutate_info_t * info, char ** splice_filenames, size_t splice_filenames_count) { char * contents; int length; size_t i; string_t * splice_file; clear_splice_files(info); for (i = 0; i < splice_filenames_count; i++) { length = read_file(splice_filenames[i], &contents); if (length < 0) { printf("Could not read file %s\n", splice_filenames[i]); clear_splice_files(info); return 1; } splice_file = (string_t *)malloc(sizeof(string_t)); info->splice_files = (string_t **)realloc(info->splice_files, sizeof(string_t *) * (info->splice_files_count + 1)); if (!info->splice_files || !splice_file) { printf("Memory error while allocating splice files\n"); free(contents); free(splice_file); clear_splice_files(info); return 1; } splice_file->len = length; splice_file->s = (u8 *)contents; info->splice_files[info->splice_files_count] = splice_file; info->splice_files_count++; } return 0; } static void clear_dictionary_files(mutate_info_t * info) { size_t i; if (info->dictq) { for (i = 0; i < info->dictionary_count; i++) { free(info->dictq[i]->s); free(info->dictq[i]); } free(info->dictq); info->dictq = NULL; info->dictionary_count = 0; } } MUTATORS_API void cleanup_mutate_info(mutate_info_t * info) { //Free any dictionary/splice files that were loaded clear_dictionary_files(info); clear_splice_files(info); destroy_mutex(info->mutate_mutex); info->mutate_mutex = NULL; } /** * Cleans up the old mutate_info_t struct and reinitializes it back to defaults * @param info - the mutate_info_t struct to reset * @return - 0 on success, nonzero on failure */ MUTATORS_API int reset_mutate_info(mutate_info_t * info) { cleanup_mutate_info(info); //Setup the default options info->random_state[0] = (((uint64_t)rand()) << 32) | rand(); info->random_state[1] = (((uint64_t)rand()) << 32) | rand(); info->queue_cycle = 1; info->havoc_div = 1; info->perf_score = 100; info->mutate_mutex = create_mutex(); return info->mutate_mutex == NULL; //1 if the mutex creation failed, 0 otherwise } MUTATORS_API int add_mutate_info_to_json(json_t * obj, mutate_info_t * info) { json_t *temp, *temp2, *dictionary_list, *dictionary_item; uint64_t i; ADD_UINT64T(temp, info->random_state[0], obj, "random_state0"); ADD_UINT64T(temp, info->random_state[1], obj, "random_state1"); ADD_INT(temp, info->stage_cur, obj, "stage_cur"); ADD_INT(temp, info->stage, obj, "stage"); ADD_INT(temp, info->should_skip_previous, obj, "should_skip_previous"); ADD_INT(temp, info->one_stage_only, obj, "one_stage_only"); ADD_INT(temp, info->queue_cycle, obj, "queue_cycle"); ADD_INT(temp, info->havoc_div, obj, "havoc_div"); ADD_INT(temp, info->perf_score, obj, "perf_score"); dictionary_list = json_array(); if (!dictionary_list) return 0; for(i = 0; i < info->dictionary_count; i++) { dictionary_item = json_object(); temp = json_mem((const char *)info->dictq[i]->s, info->dictq[i]->len); temp2 = json_integer(info->dictq[i]->len); if (!temp || !temp2 || !dictionary_item) { if(dictionary_item) json_decref(dictionary_list); if (temp) json_decref(temp); if (temp2) json_decref(temp2); return 0; } if (json_object_set_new(dictionary_item, "s", temp)) { json_decref(dictionary_list); json_decref(dictionary_item); json_decref(temp2); return 0; } if (json_object_set_new(dictionary_item, "len", temp2)) { json_decref(dictionary_list); json_decref(dictionary_item); return 0; } json_array_append_new(dictionary_list, dictionary_item); } if (json_object_set_new(obj, "dictionary", dictionary_list)) json_decref(dictionary_list); return 1; } MUTATORS_API int get_mutate_info_from_json(char * state, mutate_info_t * info) { int temp_int, result, inner_result; uint64_t temp_uint64t; char * tempstr; json_t *dictionary_obj; clear_splice_files(info); clear_dictionary_files(info); GET_UINT64T(temp_uint64t, state, info->random_state[0], "random_state0", result); GET_UINT64T(temp_uint64t, state, info->random_state[1], "random_state1", result); GET_INT(temp_int, state, info->stage_cur, "stage_cur", result); GET_INT(temp_int, state, info->stage, "stage", result); GET_INT(temp_int, state, info->should_skip_previous, "should_skip_previous", result); GET_INT(temp_int, state, info->one_stage_only, "one_stage_only", result); GET_INT(temp_int, state, info->queue_cycle, "queue_cycle", result); GET_INT(temp_int, state, info->havoc_div, "havoc_div", result); GET_INT(temp_int, state, info->perf_score, "perf_score", result); FOREACH_OBJECT_JSON_ARRAY_ITEM_BEGIN(state, modules, "dictionary", dictionary_obj, result) GET_ITEM(dictionary_obj, temp_uint64t, temp_uint64t, get_uint64t_options_from_json, "len", inner_result); tempstr = get_mem_options_from_json(dictionary_obj, "s", &inner_result); if (inner_result <= 0) { FOREACH_OBJECT_JSON_ARRAY_ITEM_FREE(modules); return 1; } info->dictq = (string_t **)realloc(info->dictq, (info->dictionary_count + 1) * sizeof(string_t *)); if (!info->dictq) { free(tempstr); FOREACH_OBJECT_JSON_ARRAY_ITEM_FREE(modules); return 1; } info->dictq[info->dictionary_count] = (string_t *)malloc(sizeof(string_t)); if (!info->dictq[info->dictionary_count]) { free(tempstr); FOREACH_OBJECT_JSON_ARRAY_ITEM_FREE(modules); return 1; } info->dictq[info->dictionary_count]->len = temp_uint64t; info->dictq[info->dictionary_count]->s = (u8*)tempstr; info->dictionary_count++; FOREACH_OBJECT_JSON_ARRAY_ITEM_END(modules); if (result < 0) return 1; return 0; } //////////////////////////////////////////////////////////////////////////////////////////// //// AFL Mutation Functions //////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////// /* The code in this section (AFL Mutation Functions) is taken from and/or based on AFL and falls under the following license: Written and maintained by Michal Zalewski Copyright 2013, 2014, 2015, 2016, 2017 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. The code in this section has been modified from the original to suit the purposes of this project. */ /* Interesting values, as per config.h */ static s8 interesting_8[] = { INTERESTING_8 }; static s16 interesting_16[] = { INTERESTING_8, INTERESTING_16 }; static s32 interesting_32[] = { INTERESTING_8, INTERESTING_16, INTERESTING_32 }; /* Helper to choose random block len for block operations in fuzz_one(). Doesn't return zero, provided that max_len is > 0. */ static u32 choose_block_len(mutate_info_t * info, u32 limit) { u32 min_value, max_value; u32 rlim = MIN(info->queue_cycle, 3); switch (UR(info, rlim)) { case 0: min_value = 1; max_value = HAVOC_BLK_SMALL; break; case 1: min_value = HAVOC_BLK_SMALL; max_value = HAVOC_BLK_MEDIUM; break; default: if (UR(info, 10)) { min_value = HAVOC_BLK_MEDIUM; max_value = HAVOC_BLK_LARGE; } else { min_value = HAVOC_BLK_LARGE; max_value = HAVOC_BLK_XL; } } if (min_value >= limit) min_value = 1; return min_value + UR(info, MIN(max_value, limit) - min_value + 1); } /* Helper function to compare buffers; returns first and last differing offset. We use this to find reasonable locations for splicing two files. */ static void locate_diffs(u8* ptr1, u8* ptr2, u32 len, s32* first, s32* last) { s32 f_loc = -1; s32 l_loc = -1; u32 pos; for (pos = 0; pos < len; pos++) { if (*(ptr1++) != *(ptr2++)) { if (f_loc == -1) f_loc = pos; l_loc = pos; } } *first = f_loc; *last = l_loc; return; } /* Helper function to see if a particular change (xor_val = old ^ new) could be a product of deterministic bit flips with the lengths and stepovers attempted by afl-fuzz. This is used to avoid dupes in some of the deterministic fuzzing operations that follow bit flips. We also return 1 if xor_val is zero, which implies that the old and attempted new values are identical and the exec would be a waste of time. */ static u8 could_be_bitflip(u32 xor_val) { u32 sh = 0; if (!xor_val) return 1; /* Shift left until first bit set. */ while (!(xor_val & 1)) { sh++; xor_val >>= 1; } /* 1-, 2-, and 4-bit patterns are OK anywhere. */ if (xor_val == 1 || xor_val == 3 || xor_val == 15) return 1; /* 8-, 16-, and 32-bit patterns are OK only if shift factor is divisible by 8, since that's the stepover for these ops. */ if (sh & 7) return 0; if (xor_val == 0xff || xor_val == 0xffff || xor_val == 0xffffffff) return 1; return 0; } /* Helper function to see if a particular value is reachable through arithmetic operations. Used for similar purposes. */ static u8 could_be_arith(u32 old_val, u32 new_val, u8 blen) { u32 i, ov = 0, nv = 0, diffs = 0; if (old_val == new_val) return 1; /* See if one-byte adjustments to any byte could produce this result. */ for (i = 0; i < blen; i++) { u8 a = old_val >> (8 * i), b = new_val >> (8 * i); if (a != b) { diffs++; ov = a; nv = b; } } /* If only one byte differs and the values are within range, return 1. */ if (diffs == 1) { if ((u8)(ov - nv) <= ARITH_MAX || (u8)(nv - ov) <= ARITH_MAX) return 1; } if (blen == 1) return 0; /* See if two-byte adjustments to any byte would produce this result. */ diffs = 0; for (i = 0; i < blen / 2U; i++) { u16 a = old_val >> (16 * i), b = new_val >> (16 * i); if (a != b) { diffs++; ov = a; nv = b; } } /* If only one word differs and the values are within range, return 1. */ if (diffs == 1) { if ((u16)(ov - nv) <= ARITH_MAX || (u16)(nv - ov) <= ARITH_MAX) return 1; ov = SWAP16(ov); nv = SWAP16(nv); if ((u16)(ov - nv) <= ARITH_MAX || (u16)(nv - ov) <= ARITH_MAX) return 1; } /* Finally, let's do the same thing for dwords. */ if (blen == 4) { if ((u32)(old_val - new_val) <= ARITH_MAX || (u32)(new_val - old_val) <= ARITH_MAX) return 1; new_val = SWAP32(new_val); old_val = SWAP32(old_val); if ((u32)(old_val - new_val) <= ARITH_MAX || (u32)(new_val - old_val) <= ARITH_MAX) return 1; } return 0; } /* Describe integer as memory size. */ #define CHK_FORMAT(_divisor, _limit_mult, _fmt, _cast) do { \ if (val < (_divisor) * (_limit_mult)) { \ snprintf((char *)tmp[cur], sizeof(tmp[cur]), _fmt, ((_cast)val) / (_divisor)); \ return tmp[cur]; \ } \ } while (0) static u8* DMS(u64 val) { static u8 tmp[12][16]; static u8 cur; cur = (cur + 1) % 12; /* 0-9999 */ CHK_FORMAT(1, 10000, "%llu B", u64); /* 10.0k - 99.9k */ CHK_FORMAT(1024, 99.95, "%0.01f kB", double); /* 100k - 999k */ CHK_FORMAT(1024, 1000, "%llu kB", u64); /* 1.00M - 9.99M */ CHK_FORMAT(1024 * 1024, 9.995, "%0.02f MB", double); /* 10.0M - 99.9M */ CHK_FORMAT(1024 * 1024, 99.95, "%0.01f MB", double); /* 100M - 999M */ CHK_FORMAT(1024 * 1024, 1000, "%llu MB", u64); /* 1.00G - 9.99G */ CHK_FORMAT(1024LL * 1024 * 1024, 9.995, "%0.02f GB", double); /* 10.0G - 99.9G */ CHK_FORMAT(1024LL * 1024 * 1024, 99.95, "%0.01f GB", double); /* 100G - 999G */ CHK_FORMAT(1024LL * 1024 * 1024, 1000, "%llu GB", u64); /* 1.00T - 9.99G */ CHK_FORMAT(1024LL * 1024 * 1024 * 1024, 9.995, "%0.02f TB", double); /* 10.0T - 99.9T */ CHK_FORMAT(1024LL * 1024 * 1024 * 1024, 99.95, "%0.01f TB", double); #undef CHK_FORMAT /* 100T+ */ strncpy((char *)tmp[cur], "infty", sizeof(tmp[cur])); return tmp[cur]; } /* Last but not least, a similar helper to see if insertion of an interesting integer is redundant given the insertions done for shorter blen. The last param (check_le) is set if the caller already executed LE insertion for current blen and wants to see if BE variant passed in new_val is unique. */ static u8 could_be_interest(u32 old_val, u32 new_val, u8 blen, u8 check_le) { u32 i, j; if (old_val == new_val) return 1; /* See if one-byte insertions from interesting_8 over old_val could produce new_val. */ for (i = 0; i < blen; i++) { for (j = 0; j < sizeof(interesting_8); j++) { u32 tval = (old_val & ~(0xff << (i * 8))) | (((u8)interesting_8[j]) << (i * 8)); if (new_val == tval) return 1; } } /* Bail out unless we're also asked to examine two-byte LE insertions as a preparation for BE attempts. */ if (blen == 2 && !check_le) return 0; /* See if two-byte insertions over old_val could give us new_val. */ for (i = 0; i < blen - 1; i++) { for (j = 0; j < sizeof(interesting_16) / 2; j++) { u32 tval = (old_val & ~(0xffff << (i * 8))) | (((u16)interesting_16[j]) << (i * 8)); if (new_val == tval) return 1; /* Continue here only if blen > 2. */ if (blen > 2) { tval = (old_val & ~(0xffff << (i * 8))) | (SWAP16(interesting_16[j]) << (i * 8)); if (new_val == tval) return 1; } } } if (blen == 4 && check_le) { /* See if four-byte insertions could produce the same result (LE only). */ for (j = 0; j < sizeof(interesting_32) / 4; j++) if (new_val == (u32)interesting_32[j]) return 1; } return 0; } /* Read the dictionary from a file */ static int load_dictionary_file(mutate_info_t * info, char * fname, u32* min_len, u32* max_len, u32 dict_level) { FILE* fp; char buf[MAX_LINE]; u8 *lptr; u32 cur_line = 0; char* hexdigits = "0123456789abcdef"; string_t ** temp_dictq; fp = fopen(fname, "r"); if (!fp) { printf("Unable to open dictionary file '%s'", fname); return 1; } while ((lptr = (u8 *)fgets(buf, MAX_LINE, fp))) { u8 *rptr, *wptr, *new_item; u32 klen = 0; cur_line++; // Trim on left and right. while (isspace(*lptr)) lptr++; rptr = lptr + strlen((char *)lptr) - 1; while (rptr >= lptr && isspace(*rptr)) rptr--; rptr++; *rptr = 0; // Skip empty lines and comments. if (!*lptr || *lptr == '#') continue; // All other lines must end with '"', which we can consume. rptr--; if (rptr < lptr || *rptr != '"') { printf("Malformed name=\"value\" pair in dictionary file %s on line %u.", fname, cur_line); fclose(fp); return 1; } *rptr = 0; // Skip alphanumerics and dashes (label). while (isalnum(*lptr) || *lptr == '_') lptr++; // If @number follows, parse that. if (*lptr == '@') { lptr++; if (atoi((char *)lptr) > dict_level) continue; while (isdigit(*lptr)) lptr++; } // Skip whitespace and = signs. while (isspace(*lptr) || *lptr == '=') lptr++; // Consume opening '"'. if (*lptr != '"') { printf("Malformed name=\"keyword\" pair in dictionary file %s on line %u.", fname, cur_line); fclose(fp); return 1; } lptr++; if (!*lptr) { printf("Empty keyword in dictionary file %s on line %u.", fname, cur_line); fclose(fp); return 1; } // Okay, let's allocate memory and copy data between "...", handling // \xNN escaping, \\, and \". wptr = new_item = (u8 *)malloc(rptr - lptr); if (!new_item) { printf("Failed allocating memory while parsing dictionary file %s, line %u.", fname, cur_line); fclose(fp); return 1; } while (*lptr) { if ((*lptr >= 1 && *lptr <= 31) || (*lptr >= 128 && *lptr <= 255)) { printf("Non-printable characters in dictionary file %s on line %u.", fname, cur_line); free(new_item); fclose(fp); return 1; } if (*lptr == '\\') { lptr++; if (*lptr == '\\' || *lptr == '"') { *(wptr++) = *(lptr++); klen++; } else { if (*lptr != 'x' || !isxdigit(lptr[1]) || !isxdigit(lptr[2])) { printf("Invalid escaping (not \\xNN) in dictionary file %s on line %u.", fname, cur_line); free(new_item); fclose(fp); return 1; } *(wptr++) = ((strchr(hexdigits, tolower(lptr[1])) - hexdigits) << 4) | (strchr(hexdigits, tolower(lptr[2])) - hexdigits); lptr += 3; klen++; } } else { *(wptr++) = *(lptr++); klen++; } } if (klen > MAX_DICT_FILE) { printf("Keyword too big in line %u (%s, limit is %s)", cur_line, DMS(klen), DMS(MAX_DICT_FILE)); free(new_item); fclose(fp); return 1; } if (*min_len > klen) *min_len = klen; if (*max_len < klen) *max_len = klen; temp_dictq = (string_t **)realloc(info->dictq, (info->dictionary_count + 1) * sizeof(string_t *)); if (!temp_dictq) { printf("Failed allocating memory while parsing dictionary file %s, line %u.", fname, cur_line); free(new_item); fclose(fp); return 1; } info->dictq = temp_dictq; info->dictq[info->dictionary_count] = (string_t *)malloc(sizeof(string_t)); if (!info->dictq[info->dictionary_count]) { printf("Failed allocating memory while parsing dictionary file %s, line %u.", fname, cur_line); free(new_item); fclose(fp); return 1; } info->dictq[info->dictionary_count]->s = new_item; info->dictq[info->dictionary_count]->len = klen; info->dictionary_count++; } fclose(fp); return 0; } /* Read the dictionary from the dictionary directory */ MUTATORS_API int load_dictionary(mutate_info_t * info, char * path) { u32 min_len = MAX_DICT_FILE, max_len = 0, dict_level = 0; char * x, * file_contents; char filename[MAX_PATH]; int length, ret; string_t ** temp_dictq; /* If the name ends with @, extract level and continue. */ if ((x = strchr(path, '@'))) { *x = 0; dict_level = atoi(x + 1); } ACTF("Loading extra dictionary from '%s' (level %u)...", path, dict_level); #ifdef _WIN32 WIN32_FIND_DATA fdata; HANDLE h; wchar_t * wide_pattern; if (path[strlen(path) - 1] == '\\') snprintf(filename, sizeof(filename), "%s*", path); else snprintf(filename, sizeof(filename), "%s\\*", path); wide_pattern = convert_char_array_to_wchar(filename, NULL); h = FindFirstFile(wide_pattern, &fdata); free(wide_pattern); if (h == INVALID_HANDLE_VALUE) { ret = load_dictionary_file(info, path, &min_len, &max_len, dict_level); if (ret) return ret; goto check_dictionary; } if (x) { printf("Dictionary levels not supported for directories."); FindClose(h); return 1; } do { if (fdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) continue; snprintf(filename, sizeof(filename), "%s\\%s", path, fdata.cFileName); if (_access(filename, 0)) { printf("Unable to access dictionary file '%s'", filename); FindClose(h); return 1; } if ((fdata.nFileSizeHigh > 0) || (fdata.nFileSizeLow > MAX_DICT_FILE)) { printf("Dictionary item '%s' is too big (%s, limit is %s)", filename, DMS(fdata.nFileSizeLow), DMS(MAX_DICT_FILE)); FindClose(h); return 1; } if (min_len > fdata.nFileSizeLow) min_len = fdata.nFileSizeLow; if (max_len < fdata.nFileSizeLow) max_len = fdata.nFileSizeLow; length = read_file(filename, &file_contents); if (length < 0) { printf("Unable to open dictionary file '%s'", filename); FindClose(h); return 1; } temp_dictq = (string_t **)realloc(info->dictq, (info->dictionary_count + 1) * sizeof(string_t *)); if (!temp_dictq) { printf("Failed allocating memory while parsing dictionary file %s.", filename); free(file_contents); FindClose(h); return 1; } info->dictq = temp_dictq; info->dictq[info->dictionary_count] = (string_t *)malloc(sizeof(string_t)); if (!info->dictq[info->dictionary_count]) { printf("Failed allocating memory while parsing dictionary file %s.", filename); free(file_contents); FindClose(h); return 1; } info->dictq[info->dictionary_count]->len = length; info->dictq[info->dictionary_count]->s = (u8*)file_contents; info->dictionary_count++; } while (FindNextFile(h, &fdata)); FindClose(h); #else DIR* d; struct dirent* de; struct stat st; int fd; d = opendir(path); if (!d) { ret = load_dictionary_file(info, path, &min_len, &max_len, dict_level); if (ret) return ret; goto check_dictionary; } if (x) { printf("Dictionary levels not supported for directories."); return 1; } while ((de = readdir(d))) { snprintf(filename, sizeof(filename), "%s/%s", path, de->d_name); if (lstat(filename, &st) || access(filename, R_OK)) { printf("Unable to access dictionary file '%s'", filename); return 1; } /* This also takes care of . and .. */ if (!S_ISREG(st.st_mode) || !st.st_size) continue; if (st.st_size > MAX_DICT_FILE) { printf("Dictionary item '%s' is too big (%s, limit is %s)", filename, DMS(st.st_size), DMS(MAX_DICT_FILE)); return 1; } if (min_len > st.st_size) min_len = st.st_size; if (max_len < st.st_size) max_len = st.st_size; info->dictq = (string_t **)realloc(info->dictq, (info->dictionary_count + 1) * sizeof(string_t *)); info->dictq[info->dictionary_count] = (string_t *)malloc(sizeof(string_t)); length = read_file(filename, (char **)&info->dictq[info->dictionary_count]->s); if (length < 0) { printf("Unable to open dictionary file '%s'", filename); return 1; } info->dictq[info->dictionary_count]->len = length; info->dictionary_count++; } closedir(d); #endif check_dictionary: if (!info->dictionary_count) { printf("No usable dictionary files in '%s'", path); return 1; } OKF("Loaded %llu dictionary tokens, size range %s to %s.", info->dictionary_count, DMS(min_len), DMS(max_len)); if (max_len > 32) WARNF("Some tokens are relatively large (%s) - consider trimming.", DMS(max_len)); if (info->dictionary_count > MAX_DET_EXTRAS) WARNF("More than %u tokens - will use them probabilistically.", MAX_DET_EXTRAS); return 0; } MUTATORS_API int single_walking_bit(mutate_info_t * info, mutate_buffer_t * buf) { if (info->stage_cur >= buf->length << 3) return MUTATOR_DONE; FLIP_BIT(buf->buffer, info->stage_cur); return (int)buf->length; } MUTATORS_API int two_walking_bit(mutate_info_t * info, mutate_buffer_t * buf) { if (info->stage_cur >= (buf->length << 3) - 1) return MUTATOR_DONE; FLIP_BIT(buf->buffer, info->stage_cur); FLIP_BIT(buf->buffer, info->stage_cur + 1); return (int)buf->length; } MUTATORS_API int four_walking_bit(mutate_info_t * info, mutate_buffer_t * buf) { if (info->stage_cur >= (buf->length << 3) - 3) return MUTATOR_DONE; FLIP_BIT(buf->buffer, info->stage_cur); FLIP_BIT(buf->buffer, info->stage_cur + 1); FLIP_BIT(buf->buffer, info->stage_cur + 2); FLIP_BIT(buf->buffer, info->stage_cur + 3); return (int)buf->length; } MUTATORS_API int walking_byte(mutate_info_t * info, mutate_buffer_t * buf) { if (info->stage_cur >= buf->length) return MUTATOR_DONE; buf->buffer[info->stage_cur] ^= 0xFF; return (int)buf->length; } MUTATORS_API int two_walking_byte(mutate_info_t * info, mutate_buffer_t * buf) { if (info->stage_cur >= buf->length - 1 || buf->length < 2) return MUTATOR_DONE; *(u16*)(buf->buffer + info->stage_cur) ^= 0xFFFF; return (int)buf->length; } MUTATORS_API int four_walking_byte(mutate_info_t * info, mutate_buffer_t * buf) { if (info->stage_cur >= buf->length - 3 || buf->length < 4) return MUTATOR_DONE; *(u32*)(buf->buffer + info->stage_cur) ^= 0xFFFFFFFF; return (int)buf->length; } MUTATORS_API int one_byte_arithmetics(mutate_info_t * info, mutate_buffer_t * buf) { uint64_t index, round; u8 old_value, new_value, arith_value; if (info->stage_cur >= 2 * buf->length * ARITH_MAX) return MUTATOR_DONE; index = info->stage_cur / (2 * ARITH_MAX); round = (info->stage_cur / ARITH_MAX) % 2; arith_value = (u8)(info->stage_cur % (ARITH_MAX)); old_value = buf->buffer[index]; if (round == 0) //one byte addition new_value = old_value + (arith_value + 1); else //one byte subtraction new_value = old_value - (arith_value + 1); // Do arithmetic operations only if the result couldn't be a product of a bitflip. if (info->should_skip_previous && could_be_bitflip(old_value ^ new_value)) return MUTATOR_TRY_AGAIN; buf->buffer[index] = new_value; return (int)buf->length; } MUTATORS_API int two_byte_arithmetics(mutate_info_t * info, mutate_buffer_t * buf) { uint64_t index, round; u16 old_value, new_value, arith_value; if (info->stage_cur >= 4 * (buf->length - 1) * ARITH_MAX || buf->length < 2) return MUTATOR_DONE; index = info->stage_cur / (4 * ARITH_MAX); round = (info->stage_cur / ARITH_MAX) % 4; arith_value = (info->stage_cur % (ARITH_MAX)) + 1; old_value = *(u16*)(buf->buffer + index); if (round == 0) //little endian addition new_value = old_value + arith_value; else if (round == 1) //little endian subtraction new_value = old_value - arith_value; else if (round == 2) //big endian addition new_value = SWAP16(SWAP16(old_value) + arith_value); else //big endian subtraction new_value = SWAP16(SWAP16(old_value) - arith_value); // Try little endian addition and subtraction first, then big endian. Do it only // if the operation would affect more than one byte (hence the & 0xff overflow checks) // and if it couldn't be a product of a bitflip. if ((info->should_skip_previous && could_be_bitflip(old_value ^ new_value)) || (round == 0 && (old_value & 0xff) + arith_value <= 0xff) || (round == 1 && (old_value & 0xff) > arith_value) || (round == 2 && (old_value >> 8) + arith_value <= 0xff) || (round == 3 && (old_value >> 8) > arith_value)) return MUTATOR_TRY_AGAIN; *(u16*)(buf->buffer + index) = new_value; return (int)buf->length; } MUTATORS_API int four_byte_arithmetics(mutate_info_t * info, mutate_buffer_t * buf) { uint64_t index, round; u32 old_value, new_value, arith_value; if (info->stage_cur >= 4 * (buf->length - 3) * ARITH_MAX || buf->length < 4) return MUTATOR_DONE; index = info->stage_cur / (4 * ARITH_MAX); round = (info->stage_cur / ARITH_MAX) % 4; arith_value = (info->stage_cur % (ARITH_MAX)) + 1; old_value = *(u32*)(buf->buffer + index); if (round == 0) //little endian addition new_value = old_value + arith_value; else if (round == 1) //little endian subtraction new_value = old_value - arith_value; else if (round == 2) //big endian addition new_value = SWAP32(SWAP32(old_value) + arith_value); else //big endian subtraction new_value = SWAP32(SWAP32(old_value) - arith_value); // Little endian first. Same deal as with 16-bit: we only want to // try if the operation would have effect on more than two bytes. if ((info->should_skip_previous && could_be_bitflip(old_value ^ new_value)) || (round == 0 && (old_value & 0xffff) + arith_value <= 0xffff) || (round == 1 && (old_value & 0xffff) > arith_value) || (round == 2 && (SWAP32(old_value) & 0xffff) + arith_value <= 0xffff) || (round == 3 && (SWAP32(old_value) & 0xffff) > arith_value)) return MUTATOR_TRY_AGAIN; *(u32*)(buf->buffer + index) = new_value; return (int)buf->length; } MUTATORS_API int interesting_one_byte(mutate_info_t * info, mutate_buffer_t * buf) { uint64_t index; u8 old_value, new_value; if (info->stage_cur >= buf->length * ARRAY_SIZE(interesting_8)) return MUTATOR_DONE; index = info->stage_cur / ARRAY_SIZE(interesting_8); old_value = buf->buffer[index]; new_value = interesting_8[info->stage_cur % ARRAY_SIZE(interesting_8)]; // Skip if the value could be a product of bitflips or arithmetics. if (info->should_skip_previous && (could_be_bitflip(old_value ^ new_value) || could_be_arith(old_value, new_value, 1))) return MUTATOR_TRY_AGAIN; buf->buffer[index] = new_value; return (int)buf->length; } MUTATORS_API int interesting_two_byte(mutate_info_t * info, mutate_buffer_t * buf) { uint64_t index, round; u16 old_value, new_value; if (info->stage_cur >= 2 * (buf->length - 1) * ARRAY_SIZE(interesting_16) || buf->length < 2) return MUTATOR_DONE; index = info->stage_cur / (2 * ARRAY_SIZE(interesting_16)); round = (info->stage_cur / ARRAY_SIZE(interesting_16)) % 2; old_value = *(u16*)(buf->buffer + index); new_value = interesting_16[info->stage_cur % ARRAY_SIZE(interesting_16)]; if (round) //second round, use reverse endian new_value = SWAP16(new_value); /* Skip if this could be a product of a bitflip, arithmetics, single-byte interesting value insertion, or if on the reverse endian round and the value is the same in both endians */ if ((info->should_skip_previous && (could_be_bitflip(old_value ^ new_value) || could_be_arith(old_value, new_value, 2) || could_be_interest(old_value, new_value, 2, round))) || (round == 1 && new_value == SWAP16(new_value))) return MUTATOR_TRY_AGAIN; *(u16*)(buf->buffer + index) = new_value; return (int)buf->length; } MUTATORS_API int interesting_four_byte(mutate_info_t * info, mutate_buffer_t * buf) { uint64_t index, round; u32 old_value, new_value; if (info->stage_cur >= 2 * (buf->length - 3) * ARRAY_SIZE(interesting_32) || buf->length < 4) return MUTATOR_DONE; index = info->stage_cur / (2 * ARRAY_SIZE(interesting_32)); round = (info->stage_cur / ARRAY_SIZE(interesting_32)) % 2; old_value = *(u32*)(buf->buffer + index); new_value = interesting_32[info->stage_cur % ARRAY_SIZE(interesting_32)]; if (round) //second round, use reverse endian new_value = SWAP32(new_value); /* Skip if this could be a product of a bitflip, arithmetics, single-byte interesting value insertion, or if on the reverse endian round and the value is the same in both endians */ if ((info->should_skip_previous && (could_be_bitflip(old_value ^ new_value) || could_be_arith(old_value, new_value, 4) || could_be_interest(old_value, new_value, 4, round))) || (round == 1 && new_value == SWAP32(new_value))) return MUTATOR_TRY_AGAIN; *(u32*)(buf->buffer + index) = new_value; return (int)buf->length; } MUTATORS_API int dictionary_overwrite(mutate_info_t * info, mutate_buffer_t * buf) { uint64_t index; string_t * dictionary_item; if (!info->dictionary_count || !info->dictq || info->stage_cur > buf->length * info->dictionary_count) return MUTATOR_DONE; index = info->stage_cur / info->dictionary_count; dictionary_item = info->dictq[info->stage_cur % info->dictionary_count]; // Skip extras probabilistically if extras_cnt > MAX_DET_EXTRAS. Also // skip if there's no room to insert the payload or if the token is redundant. if ((info->dictionary_count > MAX_DET_EXTRAS && UR(info, info->dictionary_count) >= MAX_DET_EXTRAS) || dictionary_item->len > buf->max_length - index || !memcmp(dictionary_item->s, buf->buffer + index, dictionary_item->len)) return MUTATOR_TRY_AGAIN; memcpy(buf->buffer + index, dictionary_item->s, dictionary_item->len); buf->length = MAX(buf->length, index + dictionary_item->len); return (int)buf->length; } MUTATORS_API int dictionary_insert(mutate_info_t * info, mutate_buffer_t * buf) { uint64_t index; string_t * dictionary_item; if (!info->dictionary_count || !info->dictq || info->stage_cur > buf->length * info->dictionary_count) return MUTATOR_DONE; index = info->stage_cur / info->dictionary_count; dictionary_item = info->dictq[info->stage_cur % info->dictionary_count]; // Skip extras probabilistically if extras_cnt > MAX_DET_EXTRAS. Also // skip if there's no room to insert the payload or if the token is redundant. if ((info->dictionary_count > MAX_DET_EXTRAS && UR(info, info->dictionary_count) >= MAX_DET_EXTRAS) || dictionary_item->len > buf->max_length - index || buf->length + dictionary_item->len > buf->max_length || !memcmp(dictionary_item->s, buf->buffer + index, dictionary_item->len)) return MUTATOR_TRY_AGAIN; memmove(buf->buffer + index + dictionary_item->len, buf->buffer + index, buf->length - index); memcpy(buf->buffer + index, dictionary_item->s, dictionary_item->len); buf->length += dictionary_item->len; return (int)buf->length; } MUTATORS_API int havoc(mutate_info_t * info, mutate_buffer_t * buf) { uint64_t use_stacking, i; u32 pos, num32, del_from, del_len, insert_at, use_extra; u32 copy_from, copy_to, copy_len; u32 clone_from, clone_to, clone_len; u16 num16; u8 actually_clone; string_t * dictionary_item; use_stacking = 1ULL << (1 + UR(info, HAVOC_STACK_POW2)); for (i = 0; i < use_stacking; i++) { switch (UR(info, 15 + (info->dictionary_count ? 2 : 0))) { case 0: // Flip a single bit somewhere. Spooky! FLIP_BIT(buf->buffer, UR(info, buf->length << 3)); break; case 1: // Set byte to interesting value. buf->buffer[UR(info, buf->length)] = interesting_8[UR(info, sizeof(interesting_8))]; break; case 2: // Set word to interesting value, randomly choosing endian. if (buf->length < 2) break; if (UR(info, 2)) { *(u16*)(buf->buffer + UR(info, buf->length - 1)) = interesting_16[UR(info, sizeof(interesting_16) >> 1)]; } else { *(u16*)(buf->buffer + UR(info, buf->length - 1)) = SWAP16(interesting_16[UR(info, sizeof(interesting_16) >> 1)]); } break; case 3: // Set dword to interesting value, randomly choosing endian. if (buf->length < 4) break; if (UR(info, 2)) { *(u32*)(buf->buffer + UR(info, buf->length - 3)) = interesting_32[UR(info, sizeof(interesting_32) >> 2)]; } else { *(u32*)(buf->buffer + UR(info, buf->length - 3)) = SWAP32(interesting_32[UR(info, sizeof(interesting_32) >> 2)]); } break; case 4: // Randomly subtract from byte. buf->buffer[UR(info, buf->length)] -= 1 + UR(info, ARITH_MAX); break; case 5: // Randomly add to byte. buf->buffer[UR(info, buf->length)] += 1 + UR(info, ARITH_MAX); break; case 6: // Randomly subtract from word, random endian. if (buf->length < 2) break; pos = UR(info, buf->length - 1); num16 = 1 + UR(info, ARITH_MAX); if (UR(info, 2)) *(u16*)(buf->buffer + pos) -= num16; else *(u16*)(buf->buffer + pos) = SWAP16(SWAP16(*(u16*)(buf->buffer + pos)) - num16); break; case 7: // Randomly add to word, random endian. if (buf->length < 2) break; pos = UR(info, buf->length - 1); num16 = 1 + UR(info, ARITH_MAX); if (UR(info, 2)) *(u16*)(buf->buffer + pos) += num16; else *(u16*)(buf->buffer + pos) = SWAP16(SWAP16(*(u16*)(buf->buffer + pos)) + num16); break; case 8: // Randomly subtract from dword, random endian. if (buf->length < 4) break; pos = UR(info, buf->length - 3); num32 = 1 + UR(info, ARITH_MAX); if (UR(info, 2)) *(u32*)(buf->buffer + pos) -= num32; else *(u32*)(buf->buffer + pos) = SWAP32(SWAP32(*(u32*)(buf->buffer + pos)) - num32); break; case 9: // Randomly add to dword, random endian. if (buf->length < 4) break; pos = UR(info, buf->length - 3); num32 = 1 + UR(info, ARITH_MAX); if (UR(info, 2)) *(u32*)(buf->buffer + pos) += num32; else *(u32*)(buf->buffer + pos) = SWAP32(SWAP32(*(u32*)(buf->buffer + pos)) + num32); break; case 10: /* Just set a random byte to a random value. Because, why not. We use XOR with 1-255 to eliminate the possibility of a no-op. */ buf->buffer[UR(info, buf->length)] ^= 1 + UR(info, 255); break; case 11: case 12: /* Delete bytes. We're making this a bit more likely than insertion (the next option) in hopes of keeping files reasonably small. */ if (buf->length < 2) break; del_len = choose_block_len(info, buf->length - 1); del_from = UR(info, buf->length - del_len + 1); memmove(buf->buffer + del_from, buf->buffer + del_from + del_len, buf->length - del_from - del_len); buf->length -= del_len; break; case 13: //Clone bytes (75%) or insert a block of constant bytes (25%). if (buf->length + HAVOC_BLK_XL >= MAX_FILE) break; actually_clone = UR(info, 4); if (actually_clone) { clone_len = choose_block_len(info, buf->length); clone_len = MIN(clone_len, buf->max_length - buf->length); clone_from = UR(info, buf->length - clone_len + 1); } else { clone_len = choose_block_len(info, HAVOC_BLK_XL); clone_len = MIN(clone_len, buf->max_length - buf->length); clone_from = 0; } clone_to = UR(info, buf->length); memmove(buf->buffer + clone_to + clone_len, buf->buffer + clone_to, buf->length - clone_to); if (actually_clone) memmove(buf->buffer + clone_to, buf->buffer + clone_from, clone_len); else memset(buf->buffer + clone_to, UR(info, 2) ? UR(info, 256) : buf->buffer[UR(info, buf->length)], clone_len); buf->length += clone_len; break; case 14: // Overwrite bytes with a randomly selected chunk (75%) or fixed bytes (25%). if (buf->length < 2) break; copy_len = choose_block_len(info, buf->length - 1); copy_from = UR(info, buf->length - copy_len + 1); copy_to = UR(info, buf->length - copy_len + 1); if (!UR(info, 4)) memset(buf->buffer + copy_to, UR(info, 2) ? UR(info, 256) : buf->buffer[UR(info, buf->length)], copy_len); else if (copy_from != copy_to) memmove(buf->buffer + copy_to, buf->buffer + copy_from, copy_len); break; case 15: // Overwrite bytes with a dictionary item use_extra = UR(info, info->dictionary_count); dictionary_item = info->dictq[use_extra]; if (dictionary_item->len > buf->length) break; insert_at = UR(info, buf->length - dictionary_item->len + 1); memcpy(buf->buffer + insert_at, dictionary_item->s, dictionary_item->len); break; case 16: // Insert an extra. Do the same dice-rolling stuff as for the previous case. insert_at = UR(info, buf->length + 1); use_extra = UR(info, info->dictionary_count); dictionary_item = info->dictq[use_extra]; if (buf->length + dictionary_item->len >= buf->max_length) break; memmove(buf->buffer + insert_at + dictionary_item->len, buf->buffer + insert_at, buf->length - insert_at); memcpy(buf->buffer + insert_at, dictionary_item->s, dictionary_item->len); buf->length += dictionary_item->len; break; } } return (int)buf->length; } MUTATORS_API int splice_buffers(mutate_info_t * info, mutate_buffer_t * buf) { string_t * target = NULL; u32 attempts = 0, split_at; s32 f_diff = -1, l_diff = -1; // Splicing takes the current input file, randomly selects another input, and // splices them together at some offset, then relies on the havoc code to mutate that blob. if (info->splice_files_count == 0) return MUTATOR_DONE; //Pick a target to splice with while (target == NULL || ((f_diff < 0 || l_diff < 2 || f_diff == l_diff) && (attempts < 2 * info->splice_files_count))) { attempts++; target = info->splice_files[UR(info, info->splice_files_count)]; locate_diffs(buf->buffer, target->s, MIN(buf->length, target->len), &f_diff, &l_diff); } if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) return MUTATOR_TRY_AGAIN; // Split somewhere between the first and last differing byte. split_at = f_diff + UR(info, l_diff - f_diff); buf->length = target->len; memcpy(buf->buffer + split_at, target->s + split_at, target->len - split_at); return havoc(info, buf); } ================================================ FILE: mutators/mutators/afl_helpers.h ================================================ #pragma once #include "mutators.h" #include "afl_types.h" #include #include typedef struct { u8* s; size_t len; } string_t; typedef struct { uint8_t * buffer; size_t length; size_t max_length; } mutate_buffer_t; typedef struct { int should_skip_previous; int one_stage_only; int havoc_div; int perf_score; char * dictionary_file; uint64_t dictionary_count; string_t ** dictq; char ** splice_filenames; size_t splice_filenames_count; uint64_t splice_files_count; string_t ** splice_files; //Used to protects the fields below, as well as any non-thread safe fields in mutex_t mutate_mutex; //the mutator-specific state (such as the iteration) uint64_t random_state[2]; //the state of the random number generator uint64_t stage_cur; //The current iteration number for the current mutation stage int stage; //The current mutation stage, an index into the mutation functions passed to mutate_one int queue_cycle; } mutate_info_t; MUTATORS_API u32 UR(mutate_info_t * info, u32 limit); MUTATORS_API int load_dictionary(mutate_info_t * info, char * path); MUTATORS_API int load_splice_files(mutate_info_t * info, char ** splice_filenames, size_t splice_filenames_count); MUTATORS_API int reset_mutate_info(mutate_info_t * info); MUTATORS_API void cleanup_mutate_info(mutate_info_t * info); MUTATORS_API int add_mutate_info_to_json(json_t * obj, mutate_info_t * info); MUTATORS_API int get_mutate_info_from_json(char * state, mutate_info_t * info); MUTATORS_API int mutate_one(mutate_info_t * info, mutate_buffer_t * buf, int(*const*mutate_funcs)(mutate_info_t *, mutate_buffer_t *), size_t num_funcs); //Individual mutation functions MUTATORS_API int single_walking_bit(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int two_walking_bit(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int four_walking_bit(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int walking_byte(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int two_walking_byte(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int four_walking_byte(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int one_byte_arithmetics(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int two_byte_arithmetics(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int four_byte_arithmetics(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int interesting_one_byte(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int interesting_two_byte(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int interesting_four_byte(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int dictionary_overwrite(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int dictionary_insert(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int havoc(mutate_info_t * info, mutate_buffer_t * buf); MUTATORS_API int splice_buffers(mutate_info_t * info, mutate_buffer_t * buf); //There are no more mutations possible for this mutation function #define MUTATOR_DONE 0 //This specific mutation can't be done, try again (i.e. we are trying a mutation //that was already done in an earlier round) #define MUTATOR_TRY_AGAIN -1 //A macro to parse the options used during afl fuzzing #define PARSE_MUTATE_INFO_OPTIONS(state, options, cleanup_func, dictionary_required, splice_required) \ PARSE_OPTION_UINT64T_TEMP(state, options, info.random_state[0], "random_state0", cleanup_func, random_state0); \ PARSE_OPTION_UINT64T_TEMP(state, options, info.random_state[1], "random_state1", cleanup_func, random_state1); \ PARSE_OPTION_INT_TEMP(state, options, info.stage, "stage", cleanup_func, stage); \ PARSE_OPTION_INT_TEMP(state, options, info.stage_cur, "stage_cur", cleanup_func, stage_cur); \ PARSE_OPTION_INT_TEMP(state, options, info.should_skip_previous, "skip_previous_stages", cleanup_func, should_skip_previous); \ PARSE_OPTION_INT_TEMP(state, options, info.queue_cycle, "queue_cycle", cleanup_func, queue_cycle); \ PARSE_OPTION_INT_TEMP(state, options, info.havoc_div, "havoc_div", cleanup_func, havoc_div); \ PARSE_OPTION_INT_TEMP(state, options, info.perf_score, "perf_score", cleanup_func, perf_score); \ PARSE_OPTION_STRING_TEMP(state, options, info.dictionary_file, "dictionary", cleanup_func, dictionary); \ PARSE_OPTION_ARRAY_TEMP(state, options, info.splice_filenames, info.splice_filenames_count, "splice_filenames", cleanup_func, ss); \ if ((dictionary_required && !state->info.dictionary_file) || \ (state->info.dictionary_file && load_dictionary(&state->info, state->info.dictionary_file))) \ { \ cleanup_func(state); \ return NULL; \ } \ if ((splice_required && !state->info.splice_filenames_count) || \ (state->info.splice_filenames_count && \ load_splice_files(&state->info, state->info.splice_filenames, state->info.splice_filenames_count))) \ { \ cleanup_func(state); \ return NULL; \ } \ ================================================ FILE: mutators/mutators/afl_types.h ================================================ /* american fuzzy lop - type definitions and minor macros ------------------------------------------------------ Written and maintained by Michal Zalewski Copyright 2013, 2014, 2015 Google Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 This file has been modified from the original to suit the purposes of this project. */ #ifndef _HAVE_TYPES_H #define _HAVE_TYPES_H #include #include #ifdef __APPLE__ //MacOS #include #define MAX_PATH PATH_MAX //linux/apple have PATH_MAX, windows has MAX_PATH #elif !defined(_WIN32) //Not Windows #include #define MAX_PATH PATH_MAX //linux/apple have PATH_MAX, windows has MAX_PATH #endif typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; /* Ugh. There is an unintended compiler / glibc #include glitch caused by combining the u64 type an %llu in format strings, necessitating a workaround. In essence, the compiler is always looking for 'unsigned long long' for %llu. On 32-bit systems, the u64 type (aliased to uint64_t) is expanded to 'unsigned long long' in , so everything checks out. But on 64-bit systems, it is #ifdef'ed in the same file as 'unsigned long'. Now, it only happens in circumstances where the type happens to have the expected bit width, *but* the compiler does not know that... and complains about 'unsigned long' being unsafe to pass to %llu. */ #ifdef __x86_64__ typedef unsigned long long u64; #else typedef uint64_t u64; #endif /* ^__x86_64__ */ typedef int8_t s8; typedef int16_t s16; typedef int32_t s32; typedef int64_t s64; #ifndef MIN # define MIN(_a,_b) ((_a) > (_b) ? (_b) : (_a)) # define MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b)) #endif /* !MIN */ #ifdef _WIN32 #define SWAP16(_x) _byteswap_ushort(_x) #define SWAP32(_x) _byteswap_ulong(_x) #define SWAP64(_x) _byteswap_uint64(_x) #elif defined(__APPLE__) #define SWAP16(_x) __builtin_bswap16(_x) #define SWAP32(_x) __builtin_bswap32(_x) #define SWAP64(_x) __builtin_bswap64(_x) #else #define SWAP16(_x) bswap_16(_x) #define SWAP32(_x) bswap_32(_x) #define SWAP64(_x) bswap_64(_x) #endif #ifdef AFL_LLVM_PASS # define AFL_R(x) (random() % (x)) #else # define R(x) (random() % (x)) #endif /* ^AFL_LLVM_PASS */ #define STRINGIFY_INTERNAL(x) #x #define STRINGIFY(x) STRINGIFY_INTERNAL(x) #define MEM_BARRIER() \ asm volatile("" ::: "memory") #define likely(_x) __builtin_expect(!!(_x), 1) #define unlikely(_x) __builtin_expect(!!(_x), 0) #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*x)) #endif /* ! _HAVE_TYPES_H */ ================================================ FILE: mutators/mutators/mutators.c ================================================ #include "mutators.h" #include #include MUTATORS_API void default_free_state(char * state) { free(state); } MUTATORS_API int return_unknown_or_infinite_total_iterations(void * mutator_state) { return -1; //infinite } ================================================ FILE: mutators/mutators/mutators.h ================================================ #pragma once #ifdef _WIN32 #if defined(MUTATORS_EXPORTS) #define MUTATORS_API __declspec(dllexport) #elif defined(MUTATORS_NO_IMPORT) #define MUTATORS_API #elif defined(__cplusplus) #define MUTATORS_API extern "C" __declspec(dllimport) #else #define MUTATORS_API __declspec(dllimport) #endif #else #define MUTATORS_API #endif //If you're combining all of the mutators into one project, uncomment this to give them all //unique names //#define ALL_MUTATORS_IN_ONE #ifndef ALL_MUTATORS_IN_ONE #define FUNCNAME(name) name #else #define FUNCNAME(name) MUTATOR_NAME ## _ ## name #endif MUTATORS_API void default_free_state(char * state); MUTATORS_API int return_unknown_or_infinite_total_iterations(void * mutator_state); #define GENERIC_MUTATOR_CREATE(type_t, option_parser_func, cleanup_state_func) \ type_t * new_state = option_parser_func(options); \ if (!new_state) \ return NULL; \ new_state->input = (char *)malloc(input_length); \ if (!new_state->input || !input_length) \ { \ cleanup_state_func(new_state); \ return NULL; \ } \ memcpy(new_state->input, input, input_length); \ new_state->input_length = input_length; \ if(state && FUNCNAME(set_state)(new_state, state)) { \ cleanup_state_func(new_state); \ return NULL; \ } \ return new_state; #define GENERIC_MUTATOR_CLEANUP(type_t) \ type_t * cleanup_state = (type_t *)mutator_state; \ free(cleanup_state->input); \ free(cleanup_state); #define GENERIC_MUTATOR_GET_ITERATION(type_t) \ type_t * iteration_state = (type_t *)mutator_state; \ return iteration_state->iteration; #define GENERIC_MUTATOR_SET_INPUT(type_t) \ type_t * state = (type_t *)mutator_state; \ if (state->input) \ free(state->input); \ state->input = (char *)malloc(input_length); \ if (!state->input) \ return -1; \ state->input_length = input_length; \ memcpy(state->input, new_input, input_length); \ return 0; #define GENERIC_MUTATOR_HELP(msg) \ *help_str = strdup(msg); \ if (*help_str == NULL) \ return -1; \ return 0; \ #define SINGLE_INPUT_GET_INFO(type_t) \ type_t * state = (type_t *)mutator_state; \ if (num_inputs) \ *num_inputs = 1; \ if (input_sizes) { \ *input_sizes = malloc(sizeof(size_t)); \ *input_sizes[0] = state->input_length; \ } #define SINGLE_INPUT_MUTATE_EXTENDED(type_t, mutex) \ type_t * state = (type_t *)mutator_state; \ int ret; \ if ((flags & MUTATE_MULTIPLE_INPUTS) && (flags & MUTATE_MULTIPLE_INPUTS_MASK) != 0) \ return -1; \ if ((flags & MUTATE_THREAD_SAFE) && take_mutex(mutex)) \ return -1; \ ret = FUNCNAME(mutate)(state, buffer, buffer_length); \ if ((flags & MUTATE_THREAD_SAFE) && release_mutex(mutex)) \ return -1; \ return ret; #define FLIP_BIT(_ar, _b) do { \ u8* _arf = (u8*)(_ar); \ u64 _bf = (_b); \ _arf[(_bf) >> 3] ^= (128 >> ((_bf) & 7)); \ } while (0) ================================================ FILE: mutators/ni_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (ni_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(NI_SRC ${PROJECT_SOURCE_DIR}/ni_mutator.c) source_group("Library Sources" FILES ${NI_SRC}) add_library(ni_mutator SHARED ${NI_SRC} $ $) target_link_libraries(ni_mutator utils) target_compile_definitions(ni_mutator PUBLIC NI_MUTATOR_EXPORTS) target_compile_definitions(ni_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(ni_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(ni_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(ni_mutator Shlwapi) endif (WIN32) ================================================ FILE: mutators/ni_mutator/ni_mutator.c ================================================ #include "ni_mutator.h" #include #include #include #include #include #include #include #include //Uncomment this line to make the killerbeez ni mutator use the same random number generator //as ni and take the seed value from the $RANDSEED environment variable. This is useful when //comparing the output of this mutator against the ni executable to ensure their behavior //matches //#define NI_COMPARISON_TESTING typedef struct sample { char * content; int length; } sample_t; struct ni_state { char * input; size_t input_length; //Protects the fields below, i.e. the iteration count, mutate buffer information, and random state mutex_t mutate_mutex; int iteration; uint8_t * mutated_buffer; uint64_t mutated_buffer_length; uint64_t max_mutated_buffer_length; uint64_t random_state[2]; char ** sample_filenames; size_t num_samples; sample_t ** samples; }; typedef struct ni_state ni_state_t; mutator_t ni_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), ni_free_state, FUNCNAME(set_state), FUNCNAME(get_current_iteration), ni_get_total_iteration_count, FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; //////////////////////////////////////////////////////////////////////////////////////////// //// Ni mutator methods //////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////// #define RAND(state,x) ((x)?(rnd(state)%(x)):0) /* * xoroshiro128plus by David Blackman and Sebastiano Vigna */ static inline uint64_t util_RotL(const uint64_t x, int k) { return (x << k) | (x >> (64 - k)); } static inline uint64_t rnd64(ni_state_t * state) { const uint64_t s0 = state->random_state[0]; uint64_t s1 = state->random_state[1]; const uint64_t result = s0 + s1; s1 ^= s0; state->random_state[0] = util_RotL(s0, 55) ^ s1 ^ (s1 << 14); state->random_state[1] = util_RotL(s1, 36); return result; } /** * This function generates a positive random number * @param state - a mutator specific structure previously created by the create function. * @return the randomly generated number */ static inline long long rnd(ni_state_t * state) { #ifdef NI_COMPARISON_TESTING //If testing to compare output against the ni binary, use random() return random(); //instead of our own random number generator #else long long r = rnd64(state); if (r < 0) r = -r; return r; #endif } /** * This function returns a fresh copy of the input buffer or a sample provided by the * mutator options. * @param state - a mutator specific structure previously created by the create function. * @param index - The index of the sample file to retrieve. To retrieve the input buffer, * specify -1 for the index. * @param len - A pointer to a size_t used to return the length of the retrieved buffer * @return A pointer to the copy of the input buffer or sample */ static char * get_sample(ni_state_t * state, int index, size_t *len) { char * content, * copy; size_t length; if(index < 0) { length = state->input_length; content = state->input; } else { length = state->samples[index]->length; content = state->samples[index]->content; } copy = malloc(length); memcpy(copy, content, length); *len = length; return copy; } /** * This function picks a random sample provided by the mutator options or the input buffer * and returns a copy to it. * @param state - a mutator specific structure previously created by the create function. * @param len - A pointer to a size_t used to return the length of the retrieved buffer * @return A pointer to the copy of the input buffer or sample */ static char * get_random_sample(ni_state_t * state, size_t *len) { int index = RAND(state, state->num_samples + 1); if(index == state->num_samples) return get_sample(state, -1, len); return get_sample(state, index, len); } /* * This code in this section below was taken from the ni mutator, available * at: https://github.com/aoh/ni . The ni project does not provide a * license for this code. * * It has been modified from the original to suit the purposes of this * project. */ #define AIMROUNDS 256 #define AIMAX 512 #define AIMLEN 1024 #define MIN(a, b) (((a) < (b)) ? a : b) #define BUFSIZE 4096 static char * random_block(ni_state_t * state, size_t orig_len, size_t * new_len) { size_t sample_len, start, len; char * sample, * block; sample = get_random_sample(state, &sample_len); if(sample_len < 3) { free(sample); return NULL; } start = RAND(state,sample_len-2); len = sample_len - start; if (len > 4 * orig_len) len = 4 * orig_len; len = RAND(state,len); len = MIN(len, sample_len - start); block = malloc(len); memcpy(block, sample + start, len); *new_len = len; free(sample); return block; } static void write_all(ni_state_t * state, const char *data, size_t n) { size_t num_bytes = MIN(n, state->max_mutated_buffer_length - state->mutated_buffer_length); if(num_bytes != 0) { memcpy(state->mutated_buffer + state->mutated_buffer_length, data, num_bytes); state->mutated_buffer_length += num_bytes; } } static void output_num(ni_state_t * state, char *buff, size_t buflen, long long n) { int negp = 0; if (n < 0) { n *= -1; negp = 1; } if (n == 0) { buff[0] = '0'; write_all(state, buff, 1); } else { size_t p = buflen - 1; while(n && p) { buff[p--] = n % 10 + '0'; n /= 10; } if (negp || !(rnd(state)&63)) buff[p--] = '-'; p++; write_all(state, buff + p, buflen - p); } } static int sufscore(const char *a, size_t al, const char *b, size_t bl) { int last = 256; int n = 0; while(al-- && bl-- && *a == *b && n < AIMAX) { if (*a != last) n += 32; last = *a++; b++; } return n; } /* note, could have a separate aimer for runs */ static void aim(ni_state_t * state, const char *from, size_t fend, const char *to, size_t tend, size_t *jump, size_t *land) { size_t j, l; int best_score = 0, score, rounds = 0; if (!fend) { *jump = 0; *land = tend ? RAND(state,tend) : 0; return; } *jump = RAND(state,fend); if (!tend) { *land = 0; return; } *land = RAND(state,tend); rounds = RAND(state,AIMROUNDS); score = 0; while(rounds--) { int maxs = AIMLEN; j = RAND(state,fend); l = RAND(state,tend); while(maxs-- && l < tend && from[j] != to[l]) { l++; } score = sufscore(from + j, fend - j, to + l, tend - l); if (score > best_score) { best_score = score; *jump = j; *land = l; } } } static int delim_of(char c) { int d = 0; switch(c) { //case '"': d = '"'; break; case '<': d = '>'; break; case '\n': d = '\n'; break; case '(': d = ')'; break; case '[': d = ']'; break; case '{': d = '}'; break; //case '\'': d = '\''; break; //case ' ': d = ' '; break; //case ',': d = ','; break; } return d; } static int drange_start(ni_state_t * state, const char *pos, size_t end, size_t *start, char *open, char *close) { int rounds = 32; while (rounds--) { size_t o = RAND(state, end); int n = AIMLEN; o = RAND(state, o+1); /* prefer beginning */ while(o < end && n--) { char c = pos[o], d; if (c & 128) return 1; d = delim_of(c); if (d) { *start = o; *open = c; *close = d; return 0; } o++; } } return 1; } static int drange_start_of(ni_state_t * state, const char *pos, size_t end, char del, size_t *start) { int rounds = 32; while (rounds--) { size_t o = RAND(state, end); int n = AIMLEN; while(o < end && n--) { char c = pos[o]; if (c & 128) { return 1; } else if (c == del) { *start = o; return 0; } else { o++; } } } return 1; } /* return 0 for failure, called after open */ static size_t drange_end(ni_state_t * state, const char *data, size_t end, size_t pos, char open, char close) { int depth = 1; while(pos < end) { char c = data[pos++]; if (c == close) { depth--; if (depth == 0) { size_t next; if (rnd(state) & 3) return pos; next = drange_end(state, data, end, pos, open, close); if (next) return next; return pos; } } else if (c == open) { depth++; } else if (c & 128) { return 0; } } return 0; } static int drange(ni_state_t * state, const char *data, size_t end, size_t *rs, size_t *rl) { size_t s, e; char o, c; if (drange_start(state, data, end, &s, &o, &c)) return 1; e = drange_end(state, data, end, s+1, o, c); if (e) { *rs = s; *rl = e - s; return 0; } return 1; } static int other_drange(ni_state_t * state, const char *data, size_t end, size_t fs, size_t *r2s, size_t *r2l) { char open = data[fs]; char close = delim_of(open); int tries = 10; size_t os = fs; while(tries--) { if (drange_start_of(state, data, end, open, &os)) return 1; if (os != fs) { size_t oe = drange_end(state, data, end, os+1, open, close); if (oe) { *r2s = os; *r2l = oe - os; return 0; } } } return 1; } static void seek_num(ni_state_t * state, const char *pos, size_t end, size_t *ns, size_t *ne) { size_t o = RAND(state, end); while(o < end && (pos[o] < '0' || pos[o] > '9')) { if (pos[o] & 128) return; o++; } if (o == end) return; *ns = o++; while(o < end && pos[o] >= '0' && pos[o] <= '9') { o++; } *ne = o; } static int read_num(const char *pos, size_t end, long long *res) { long long n = 0; size_t p = 0; while(p < end) { n = n * 10 + pos[p++] - '0'; if (n < 0) return 1; } *res = n; return 0; } static long long twiddle(ni_state_t * state, long long val) { do { switch(RAND(state,3)) { case 0: val = rnd(state); break; case 1: val ^= (1 << RAND(state,sizeof(long long)*8 - 1)); break; case 2: val += RAND(state,5) - 2; break; } } while (rnd(state) & 1); return(val); } static void mutate_area(ni_state_t * state, const char *data, size_t end) { char buff[BUFSIZE]; int choice; retry: choice = rnd(state) % 35; switch(choice) { case 0: { /* insert a random byte */ size_t pos = (end ? rnd(state) % end : 0); write_all(state, data, pos); buff[0] = rnd(state) & 255; write_all(state, buff, 1); write_all(state, data + pos, end - pos); break; } case 1: { /* drop a byte */ size_t pos = (end ? rnd(state) % end : 0); if (pos+1 >= end) goto retry; write_all(state, data, pos); write_all(state, data+pos+1, end-(pos+1)); break; } case 2: case 3: { /* jump in a sequence */ size_t s, e; if (!end) goto retry; s = rnd(state) % end; e = rnd(state) % end; if (s == e) goto retry; write_all(state, data, e); write_all(state, data+s, end-s); break; } case 4: case 5: { /* repeat */ size_t a, b, s, e, l; int n = 8; while (rnd(state) & 1 && n < 20000) n <<= 1; n = rnd(state) % n + 2; if (!end) goto retry; a = (end ? rnd(state) % end : 0); b = (end ? rnd(state) % end : 0); if (a == b) { goto retry; } else if (a > b) { s = b; e = a; } else { s = a; e = b; } l = e - s; write_all(state, data, s); if (l * n > 134217728) l = rnd(state) % 1024 + 2; while(n--) write_all(state, data+s, l); write_all(state, data+s, end-s); break; } case 6: { /* insert random data */ size_t pos = (end ? rnd(state) % end : 0); int n = rnd(state) % 1022 + 2; int p = 0; while (p < n) buff[p++] = rnd(state) & 255; write_all(state, data, pos); write_all(state, buff, p); write_all(state, data+pos, end-pos); break; } case 7: case 8: case 9: case 10: case 11: case 12: { /* aimed jump to self */ size_t j=0, l=0; if (end < 5) goto retry; while (j == l) aim(state, data, end, data, end, &j, &l); write_all(state, data, j); write_all(state, data+l, end-l); break; } case 13: case 14: case 15: case 16: case 17: case 18: case 19: case 20: case 21: { /* aimed random block fusion */ size_t j, l, dm, sm; char *buff, *block; size_t bend, block_len; if (end < 8) goto retry; block = random_block(state, end, &block_len); if (block_len < 8) goto retry; dm = end >> 1; sm = block_len >> 1; aim(state, data, dm, block, sm, &j, &l); write_all(state, data, j); data += j; end -= j; buff = block + l; bend = block_len - l; aim(state, buff, bend , data, end, &j, &l); write_all(state, buff, j); write_all(state, data + l, end - l); free(block); break; } case 22: case 23: { /* insert semirandom bytes */ size_t p = 0, n = RAND(state,BUFSIZE); size_t pos = (end ? rnd(state) % end : 0); n = RAND(state,n+1); n = RAND(state,n+1); n = RAND(state,n+1); n = RAND(state,n+1); n = (n > 1) ? n : 2; if (!end) goto retry; write_all(state, data, pos); while(n--) buff[p++] = data[RAND(state,end)]; write_all(state, buff, p); write_all(state, data + pos, end - pos); break; } case 24: { /* overwrite semirandom bytes */ size_t a, b, p = 0; if (end < 2) goto retry; a = RAND(state,end-2); b = a + 2 + ((rnd(state) & 1) ? RAND(state,MIN(BUFSIZE-2, end-a-2)) : RAND(state,32)); write_all(state, data, a); while(a + p < b) buff[p++] = data[RAND(state,end)]; write_all(state, buff, p); if (end > b) write_all(state, data + b, end - b); break; } case 25: case 26: case 27: case 28: { /* textual number mutation */ int n = RAND(state,AIMROUNDS); long long val; size_t ns, ne; ns = ne = 0; if (!end) goto retry; while(n-- && !ne) { seek_num(state, data, end, &ns, &ne); } if (!ne) goto retry; write_all(state, data, ns); if (read_num(data + ns, ne - ns, &val) == 0) output_num(state, buff, BUFSIZE, twiddle(state,val)); else output_num(state, buff, BUFSIZE, twiddle(state,0)); write_all(state, data + ne, end - ne); break; } case 29: case 30: case 31: case 32: case 33: case 34: { /* delimited swap */ size_t r1s, r1l, r2s, r2l; if (!end || drange(state, data, end, &r1s, &r1l) || other_drange(state, data, end, r1s, &r2s, &r2l)) goto retry; write_all(state, data, r1s); write_all(state, data + r2s, r2l); if (r2s > (r1s + r1l)) /* these can overlap */ write_all(state, data + r1s + r1l, r2s - (r1s + r1l)); write_all(state, data + r1s, r1l); write_all(state, data + r2s + r2l, end - (r2s + r2l)); break; } default: { printf("ni: bad mutation (choice=%d)\n", choice); exit(1); } } } static void ni_area(ni_state_t * state, const char *data, size_t end, int n) { if (n == 0) { write_all(state, data, end); return; } else if (n == 1 || end < 256) { mutate_area(state, data, end); } else if (!end) { return; } else { size_t r = RAND(state,end); int m = RAND(state,n / 2); ni_area(state, data, r, (n - m)); ni_area(state, data + r, end - r, m); } } /** * This function generates a new mutation from the input buffer and samples * @param state - a mutator specific structure previously created by the create function. */ static void ni(ni_state_t* state) { char *data; char *datap; size_t j, l, end, endp; int m, n = 0; data = get_sample(state, -1, &end); m = ((rnd(state) & 3) == 1) ? 1 : 2 + RAND(state,((unsigned int) state->input_length >> 12) + 8); if (RAND(state,30)) { ni_area(state, data, end, m); } else { /* small chance of global tail flip */ m--; if (m) { n = RAND(state,m); m =- n; } datap = get_random_sample(state, &endp); aim(state, data, end, datap, endp, &j, &l); ni_area(state, data, j, m); ni_area(state, datap + l, endp - l, n); free(datap); } free(data); } //////////////////////////////////////////////////////////////////////////////////////////// //// API methods /////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////// #ifndef ALL_MUTATORS_IN_ONE /** * This function fills in the supplied mutator_t with all of the function * pointers for this mutator. * @param m - a pointer to a mutator_t structure */ NI_MUTATOR_API void init(mutator_t * m) { memcpy(m, &ni_mutator, sizeof(mutator_t)); } #endif /** * This function creates and initializes a ni_state_t object based on the passed in JSON options. * @return the newly created ni_state_t object or NULL on failure */ static ni_state_t * setup_options(char * options) { ni_state_t * state; size_t i; state = (ni_state_t *)malloc(sizeof(ni_state_t)); if (!state) return NULL; memset(state, 0, sizeof(ni_state_t)); //Setup defaults state->random_state[0] = (((uint64_t)rand()) << 32) | rand(); state->random_state[1] = (((uint64_t)rand()) << 32) | rand(); state->mutate_mutex = create_mutex(); if (!state->mutate_mutex) { free(state); return NULL; } if (!options || !strlen(options)) return state; PARSE_OPTION_UINT64T_TEMP(state, options, random_state[0], "random_state0", FUNCNAME(cleanup), temp1); PARSE_OPTION_UINT64T_TEMP(state, options, random_state[1], "random_state1", FUNCNAME(cleanup), temp2); PARSE_OPTION_ARRAY(state, options, sample_filenames, num_samples, "samples", FUNCNAME(cleanup)); if(state->num_samples) { state->samples = calloc(state->num_samples, sizeof(void *)); if(!state->samples) { FUNCNAME(cleanup)(state); return NULL; } for(i = 0; i < state->num_samples; i++) { state->samples[i] = malloc(sizeof(sample_t)); if(!state->samples[i]) { FUNCNAME(cleanup)(state); return NULL; } state->samples[i]->length = read_file(state->sample_filenames[i], &state->samples[i]->content); if(state->samples[i]->length < 0) { printf("Could not read file %s\n", state->sample_filenames[i]); FUNCNAME(cleanup)(state); return NULL; } } } return state; } /** * This function will allocate and initialize the mutator state. The mutator state should be * freed by calling the cleanup function. * @param options - a json string that contains the ni specific options. * @param state - optionally, a previously dumped state (with the get_state() function) to load * @param input - The input that this mutator will later be mutating * @param input_length - the size of the input parameter * @return a mutator specific structure or NULL on failure. The returned value should * not be used for anything other than passing to the various Mutator API functions. */ NI_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { ni_state_t * ni_state = setup_options(options); if (!ni_state) return NULL; ni_state->input = (char *)malloc(input_length); if (!ni_state->input || !input_length) { FUNCNAME(cleanup)(ni_state); return NULL; } memcpy(ni_state->input, input, input_length); ni_state->input_length = input_length; if (state && FUNCNAME(set_state)(ni_state, state)) { FUNCNAME(cleanup)(ni_state); return NULL; } return ni_state; } /** * This function will release any resources that the mutator has open * and free the mutator state structure. * @param mutator_state - a mutator specific structure previously created by * the create function. This structure will be freed and should not be referenced afterwards. */ NI_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { size_t i; ni_state_t * ni_state = (ni_state_t *)mutator_state; destroy_mutex(ni_state->mutate_mutex); for(i = 0; i < ni_state->num_samples; i++) { if(ni_state->samples) { if(ni_state->samples[i]) free(ni_state->samples[i]->content); free(ni_state->samples[i]); } free(ni_state->sample_filenames[i]); } free(ni_state->sample_filenames); free(ni_state->samples); free(ni_state->input); free(ni_state); } /** * This function will mutate the input given in the create function and return it in the buffer argument. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ NI_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { ni_state_t * ni_state = (ni_state_t *)mutator_state; //Can't mutate an empty buffer if (buffer_length == 0) return -1; #ifdef NI_COMPARISON_TESTING //If we're trying to compare against the actual ni binary, we should set the random number generator //to a known state that ni can match. srandom(atoi(getenv("RANDSEED"))); #endif //Setup the mutated buffer ni_state->mutated_buffer = (uint8_t *)buffer; ni_state->mutated_buffer_length = 0; ni_state->max_mutated_buffer_length = buffer_length; //Now mutate the buffer ni_state->iteration++; ni(ni_state); return (int)ni_state->mutated_buffer_length; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ NI_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { SINGLE_INPUT_MUTATE_EXTENDED(ni_state_t, state->mutate_mutex); } /** * This function will return the state of the mutator. The returned value can be used to restart the * mutator at a later time, by passing it to the create or set_state function. It is the caller's * responsibility to free the memory allocated here by calling the free_state function. * @param mutator_state - a mutator specific structure previously created by the create function. * @return - a buffer that defines the current state of the mutator. This will be a mutator specific JSON string. */ NI_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { ni_state_t * ni_state = (ni_state_t *)mutator_state; json_t *obj, *temp; char * ret; obj = json_object(); ADD_INT(temp, ni_state->iteration, obj, "iteration"); ADD_UINT64T(temp, ni_state->random_state[0], obj, "random_state0"); ADD_UINT64T(temp, ni_state->random_state[1], obj, "random_state1"); ret = json_dumps(obj, 0); json_decref(obj); return ret; } /** * This function will set the current state of the mutator. * This can be used to restart a mutator once from a previous run. * @param mutator_state - a mutator specific structure previously created by the create function. * @param state - a previously dumped state buffer obtained by the get_state function. * @return 0 on success or non-zero on failure */ NI_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { ni_state_t * ni_state = (ni_state_t *)mutator_state; int result, temp_int; uint64_t temp_uint64t; if (!state) return 1; GET_INT(temp_int, state, ni_state->iteration, "iteration", result); GET_UINT64T(temp_uint64t, state, ni_state->random_state[0], "random_state0", result); GET_UINT64T(temp_uint64t, state, ni_state->random_state[1], "random_state1", result); return 0; } /** * This function will return the current iteration count of the mutator, i.e. * how many mutations have been generated with it. * @param mutator_state - a mutator specific structure previously created by the create function. * @return value - the number of previously generated mutations */ NI_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { GENERIC_MUTATOR_GET_ITERATION(ni_state_t); } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ NI_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { SINGLE_INPUT_GET_INFO(ni_state_t); } /** * This function will set the input(saved in the mutators state) to something new. * This can be used to reinitialize a mutator with new data, without reallocating the entire state struct. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ NI_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { GENERIC_MUTATOR_SET_INPUT(ni_state_t); } /** * This function sets a help message for the mutator. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ NI_MUTATOR_API int FUNCNAME(help)(char ** help_str) { GENERIC_MUTATOR_HELP( "ni - ni-based mutator\n" "Options:\n" " random_state0 The first half of the seed to honggfuzz's random\n" " number generator\n" " random_state1 The second half of the seed to honggfuzz's random\n" " number generator\n" " samples An array of files containing other samples to mutate\n" " with the given input\n" "\n" ); } ================================================ FILE: mutators/ni_mutator/ni_mutator.h ================================================ #pragma once #include #include #ifdef _WIN32 #ifdef NI_MUTATOR_EXPORTS #define NI_MUTATOR_API __declspec(dllexport) #else #define NI_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define NI_MUTATOR_API #endif #define MUTATOR_NAME "ni" NI_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); NI_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); NI_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); NI_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); NI_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); #define ni_free_state default_free_state NI_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); NI_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); #define ni_get_total_iteration_count return_unknown_or_infinite_total_iterations NI_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); NI_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); NI_MUTATOR_API int FUNCNAME(help)(char ** help_str); #ifndef ALL_MUTATORS_IN_ONE NI_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: mutators/nop_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (nop_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(NOP_SRC ${PROJECT_SOURCE_DIR}/nop_mutator.c) source_group("Library Sources" FILES ${NOP_SRC}) add_library(nop_mutator SHARED ${NOP_SRC} $ $) target_link_libraries(nop_mutator utils) target_compile_definitions(nop_mutator PUBLIC NOP_MUTATOR_EXPORTS) target_compile_definitions(nop_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(nop_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(nop_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(nop_mutator Shlwapi) endif (WIN32) ================================================ FILE: mutators/nop_mutator/nop_mutator.c ================================================ #include "nop_mutator.h" #include #include #include #include #ifdef _WIN32 #include #endif struct nop_state { char * input; size_t input_length; int iteration; }; typedef struct nop_state nop_state_t; mutator_t nop_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), FUNCNAME(free_state), FUNCNAME(set_state), FUNCNAME(get_current_iteration), nop_get_total_iteration_count, FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; #ifndef ALL_MUTATORS_IN_ONE NOP_MUTATOR_API void init(mutator_t * m) { memcpy(m, &nop_mutator, sizeof(mutator_t)); } #endif NOP_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { nop_state_t * nop_state; nop_state = (nop_state_t *)malloc(sizeof(nop_state_t)); if (!nop_state) return NULL; memset(nop_state, 0, sizeof(nop_state_t)); nop_state->input = (char *)malloc(input_length); if (!nop_state->input || !input_length) { FUNCNAME(cleanup)(nop_state); return NULL; } memcpy(nop_state->input, input, input_length); nop_state->input_length = input_length; return nop_state; } NOP_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { nop_state_t * nop_state = (nop_state_t *)mutator_state; free(nop_state->input); free(nop_state); } NOP_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { nop_state_t * nop_state = (nop_state_t *)mutator_state; #ifdef _WIN32 InterlockedIncrement(&nop_state->iteration); #else __sync_fetch_and_add(&nop_state->iteration, 1); #endif memcpy(buffer, nop_state->input, nop_state->input_length > buffer_length ? buffer_length : nop_state->input_length); return nop_state->input_length; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ NOP_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { if ((flags & MUTATE_MULTIPLE_INPUTS) && (flags & MUTATE_MULTIPLE_INPUTS_MASK) != 0) return -1; return FUNCNAME(mutate)(mutator_state, buffer, buffer_length); } NOP_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { return "{}"; } NOP_MUTATOR_API void FUNCNAME(free_state)(char * mutator_state) { } NOP_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { return 0; } NOP_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { GENERIC_MUTATOR_GET_ITERATION(nop_state_t); } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ NOP_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { SINGLE_INPUT_GET_INFO(nop_state_t); } /** * This function will set the input(saved in the mutators state) to something new. * This can be used to reinitialize a mutator with new data, without reallocating the entire state struct. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ NOP_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { GENERIC_MUTATOR_SET_INPUT(nop_state_t); } /** * This function sets a help message for the mutator. This is useful * if the mutator takes a JSON options string in the create() function. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ NOP_MUTATOR_API int FUNCNAME(help)(char** help_str) { GENERIC_MUTATOR_HELP( "nop - NOP mutator (doesn't mutate the input)\n" "Options:\n" " None\n" "\n" ); } ================================================ FILE: mutators/nop_mutator/nop_mutator.h ================================================ #pragma once #include #include #ifdef _WIN32 #ifdef NOP_MUTATOR_EXPORTS #define NOP_MUTATOR_API __declspec(dllexport) #else #define NOP_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define NOP_MUTATOR_API #endif #define MUTATOR_NAME "nop" NOP_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); NOP_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); NOP_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); NOP_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); NOP_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); NOP_MUTATOR_API void FUNCNAME(free_state)(char * mutator_state); NOP_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); NOP_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); #define nop_get_total_iteration_count return_unknown_or_infinite_total_iterations NOP_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); NOP_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); NOP_MUTATOR_API int FUNCNAME(help)(char ** help_str); #ifndef ALL_MUTATORS_IN_ONE NOP_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: mutators/radamsa_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (radamsa_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(RADAMSA_SRC ${PROJECT_SOURCE_DIR}/radamsa_mutator.c) source_group("Library Sources" FILES ${RADAMSA_SRC}) add_library(radamsa_mutator SHARED ${RADAMSA_SRC} $ $) target_link_libraries(radamsa_mutator utils) target_compile_definitions(radamsa_mutator PUBLIC RADAMSA_MUTATOR_EXPORTS) target_compile_definitions(radamsa_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(radamsa_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(radamsa_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(radamsa_mutator Shlwapi) target_link_libraries(radamsa_mutator ws2_32) endif (WIN32) ================================================ FILE: mutators/radamsa_mutator/radamsa_mutator.c ================================================ #include "radamsa_mutator.h" #include #include #include #include #include #include #include #ifdef _WIN32 #include #include #include #include #else #include #include #include #include #include #include #include #endif typedef struct radamsa_state { char * input; size_t input_length; //The iteration number int iteration; //Whether we have been able to connect to radamsa yet or not int radamsa_up; //The seed for radamsa int seed; //The path to the radamsa binary char * path; //The port to bind radamsa to int port; //The number of times we've connected to radamsa's port. This is different from iteration //since sometimes radamsa doesn't return input, and we have to call radamsa again. Thus, //we need to keep track of radamsa's iteration count, so that we can later fast forward //if asked to load a previous mutator state. int radamsa_iteration; //The handle/pid of the radamsa instance #ifdef _WIN32 HANDLE process; #else int process; #endif //A mutex used when doing thread safe mutations mutex_t mutate_mutex; } radamsa_state_t; static void cleanup_process(radamsa_state_t * state); static int start_process(radamsa_state_t * state); mutator_t radamsa_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), radamsa_free_state, FUNCNAME(set_state), FUNCNAME(get_current_iteration), radamsa_get_total_iteration_count, FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; #ifndef ALL_MUTATORS_IN_ONE RADAMSA_MUTATOR_API void init(mutator_t * m) { memcpy(m, &radamsa_mutator, sizeof(mutator_t)); } #endif #ifdef _WIN32 #define PATH_SEP "\\" #define RADAMSA_BIN_NAME "radamsa.exe" #define DEVELOP_PREFIX "..\\..\\..\\..\\" //CMake puts things at root/build/(Win32/x64)/(Debug/Release)/killerbeez/ #else #define PATH_SEP "/" #define RADAMSA_BIN_NAME "radamsa" #define DEVELOP_PREFIX "../../" //CMake puts things at root/build/killerbeez/ #endif RADAMSA_MUTATOR_API radamsa_state_t * setup_options(char * options) { radamsa_state_t * state; state = (radamsa_state_t *)malloc(sizeof(radamsa_state_t)); if (!state) return NULL; memset(state, 0, sizeof(radamsa_state_t)); srand(time(NULL)); //Setup defaults state->port = 10000 + (rand() % 50000); state->seed = rand(); state->mutate_mutex = create_mutex(); if (!state->mutate_mutex) { free(state); return NULL; } if (options && strlen(options)) { PARSE_OPTION_STRING(state, options, path, "path", FUNCNAME(cleanup)); PARSE_OPTION_INT(state, options, seed, "seed", FUNCNAME(cleanup)); PARSE_OPTION_INT(state, options, port, "port", FUNCNAME(cleanup)); PARSE_OPTION_INT(state, options, radamsa_iteration, "radamsa_iteration", FUNCNAME(cleanup)); } if (!state->path) { // Usual location for binary distribution char *default_path = filename_relative_to_binary_dir(".." PATH_SEP "radamsa" PATH_SEP "bin" PATH_SEP RADAMSA_BIN_NAME); if (!default_path) { // Usual location for 32-bit developer environment default_path = filename_relative_to_binary_dir(DEVELOP_PREFIX "radamsa" PATH_SEP "bin" PATH_SEP RADAMSA_BIN_NAME); } if (!default_path) { FUNCNAME(cleanup)(state); return NULL; } state->path = default_path; } return state; } RADAMSA_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { radamsa_state_t * new_state; #ifdef _WIN32 WSADATA wsaData; if (WSAStartup(MAKEWORD(2, 2), &wsaData)) return NULL; #endif new_state = setup_options(options); if (!new_state) return NULL; new_state->input = (char *)malloc(input_length); if (!new_state->input || !input_length) { FUNCNAME(cleanup)(new_state); return NULL; } memcpy(new_state->input, input, input_length); new_state->input_length = input_length; if (FUNCNAME(set_state)(new_state, state)) { FUNCNAME(cleanup)(new_state); return NULL; } return new_state; } RADAMSA_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { radamsa_state_t * state = (radamsa_state_t *)mutator_state; cleanup_process(state); destroy_mutex(state->mutate_mutex); free(state->input); free(state->path); free(state); } static int mutate_inner(radamsa_state_t * state, char * buffer, size_t buffer_length) { struct sockaddr_in addr; int attempts, result, total_read = 0; #ifdef _WIN32 SOCKET sock; #else int sock; #endif //Create a socket for us to connect to the radamsa daemon sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); #ifdef _WIN32 if (sock == INVALID_SOCKET) #else if (sock < 0) #endif return -1; //connect to the radamsa daemon. Sometimes it takes a bit to startup and bind to the port, so if we just //started radamsa, we'll try multiple times with a little sleep in between if it fails. for(attempts = 0; attempts == 0 || (attempts < 5 && !state->radamsa_up); attempts++) { addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr("127.0.0.1"); addr.sin_port = htons(state->port); #ifdef _WIN32 result = connect(sock, (SOCKADDR *)&addr, sizeof(addr)); if (result != SOCKET_ERROR) break; Sleep(250); #else result = connect(sock, (struct sockaddr *)&addr, sizeof(addr)); if (result >= 0) break; sleep(1); #endif } if(attempts >= 5) return -1; state->radamsa_up = 1; state->radamsa_iteration++; //Read radamsa's response result = 1; while (total_read < (int)buffer_length && result > 0) { result = recv(sock, buffer + total_read, buffer_length - total_read, 0); if (result > 0) total_read += result; else if (result < 0) //Error, then break total_read = -1; } #ifdef _WIN32 closesocket(sock); #else close(sock); #endif if (total_read == 0) //In some non-error cases, radamsa just returns 0 bytes { //Since we don't want to do this, just call the mutator again total_read = mutate_inner(state, buffer, buffer_length); } return total_read; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ RADAMSA_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { radamsa_state_t * state = (radamsa_state_t *)mutator_state; state->iteration++; return mutate_inner(state, buffer, buffer_length); } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ RADAMSA_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { SINGLE_INPUT_MUTATE_EXTENDED(radamsa_state_t, state->mutate_mutex); } RADAMSA_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { radamsa_state_t * state = (radamsa_state_t *)mutator_state; json_t *state_obj, *temp; char * ret; state_obj = json_object(); if (!state_obj) return NULL; ADD_INT(temp, state->iteration, state_obj, "iteration"); ADD_INT(temp, state->radamsa_iteration, state_obj, "radamsa_iteration"); ADD_INT(temp, state->seed, state_obj, "seed"); ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } RADAMSA_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { radamsa_state_t * current_state = (radamsa_state_t *)mutator_state; int result, temp; if (state) { GET_INT(temp, state, current_state->iteration, "iteration", result); GET_INT(temp, state, current_state->radamsa_iteration, "radamsa_iteration", result); GET_INT(temp, state, current_state->seed, "seed", result); } cleanup_process(current_state); return start_process(current_state); } RADAMSA_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { GENERIC_MUTATOR_GET_ITERATION(radamsa_state_t); } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ RADAMSA_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { SINGLE_INPUT_GET_INFO(radamsa_state_t); } /** * This function will set the input(saved in the mutators state) to something new. * This can be used to reinitialize a mutator with new data, without reallocating the entire state struct. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ RADAMSA_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { radamsa_state_t * state = (radamsa_state_t *)mutator_state; if (state->input) { free(state->input); state->input = NULL; } state->input = (char *)malloc(input_length); if (!state->input) { return -1; } state->input_length = input_length; memcpy(state->input, new_input, input_length); FUNCNAME(set_state)(mutator_state, NULL); //give the new input to radamsa.exe return 0; } /** * This function sets a help message for the mutator. This is useful * if the mutator takes a JSON options string in the create() function. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ RADAMSA_MUTATOR_API int FUNCNAME(help)(char** help_str) { GENERIC_MUTATOR_HELP( "radamsa - Radamsa mutator (Starts and calls radamsa to mutate input)\n" "Options:\n" " path The path to radamsa.exe\n" " port The port to tell radamsa to bind to when starting up\n" " radamsa_iteration The number of iterations to seek forward in the\n" " radamsa output\n" " seed The random seed to use when mutating\n" "\n" ); } static void cleanup_process(radamsa_state_t * state) { if (state->process) { #ifdef _WIN32 TerminateProcess(state->process, 9); CloseHandle(state->process); state->process = NULL; #else int status; kill(state->process, 9); wait(&status); state->process = 0; #endif } state->radamsa_up = 0; } static int start_process(radamsa_state_t * state) { char cmd_line[256]; snprintf(cmd_line, sizeof(cmd_line), "%s -o :%d -n inf -s %d ", state->path, state->port, state->seed); if (state->radamsa_iteration != 0) snprintf(cmd_line, sizeof(cmd_line), "%s -S %d ", cmd_line, state->radamsa_iteration + 1); //radamsa counts from 1 return start_process_and_write_to_stdin(cmd_line, state->input, state->input_length, &state->process); } ================================================ FILE: mutators/radamsa_mutator/radamsa_mutator.h ================================================ #pragma once #include #include #ifdef _WIN32 #ifdef RADAMSA_MUTATOR_EXPORTS #define RADAMSA_MUTATOR_API __declspec(dllexport) #else #define RADAMSA_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define RADAMSA_MUTATOR_API #endif #define MUTATOR_NAME "radamsa" RADAMSA_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); RADAMSA_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); RADAMSA_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); RADAMSA_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); RADAMSA_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); #define radamsa_free_state default_free_state RADAMSA_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); RADAMSA_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); #define radamsa_get_total_iteration_count return_unknown_or_infinite_total_iterations RADAMSA_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); RADAMSA_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); RADAMSA_MUTATOR_API int FUNCNAME(help)(char **); #ifndef ALL_MUTATORS_IN_ONE RADAMSA_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: mutators/splice_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (splice_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(SPLICE_SRC ${PROJECT_SOURCE_DIR}/splice_mutator.c) source_group("Library Sources" FILES ${SPLICE_SRC}) add_library(splice_mutator SHARED ${SPLICE_SRC} $ $) target_link_libraries(splice_mutator utils) target_compile_definitions(splice_mutator PUBLIC SPLICE_MUTATOR_EXPORTS) target_compile_definitions(splice_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(splice_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(splice_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(splice_mutator Shlwapi) endif (WIN32) ================================================ FILE: mutators/splice_mutator/splice_mutator.c ================================================ #include "splice_mutator.h" #include #include #include #include #include #include #include #include #include struct splice_state { char * input; size_t input_length; int iteration; mutate_info_t info; }; typedef struct splice_state splice_state_t; static int(*const mutate_funcs[])(mutate_info_t *, mutate_buffer_t *) = { splice_buffers, }; mutator_t splice_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), splice_free_state, FUNCNAME(set_state), FUNCNAME(get_current_iteration), splice_get_total_iteration_count, FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; /** * This function fills in m with all of the function pointers for this mutator. * @param m - a pointer to a mutator_t structure * @return none */ #ifndef ALL_MUTATORS_IN_ONE SPLICE_MUTATOR_API void init(mutator_t * m) { memcpy(m, &splice_mutator, sizeof(mutator_t)); } #endif static splice_state_t * setup_options(char * options) { splice_state_t * state = (splice_state_t *)malloc(sizeof(splice_state_t)); if (!state) return NULL; memset(state, 0, sizeof(splice_state_t)); //Setup defaults if (reset_mutate_info(&state->info)) { free(state); return NULL; } if (!options || !strlen(options)) return state; PARSE_MUTATE_INFO_OPTIONS(state, options, FUNCNAME(cleanup), 0, 1); return state; } /** * This function will allocate and initialize the mutator state used in the other Mutator API * functions. * @param options - a json string that contains the mutator specific string of options. * @param state - Optionally, used to load a previously dumped state (with the get_state() * function), that defines the current iteration of the mutator. * @param input - used to produce new mutated inputs later when the mutate function is called * @param input_length - the size of the input buffer * @return a mutator specific structure or NULL on failure. */ SPLICE_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { GENERIC_MUTATOR_CREATE(splice_state_t, setup_options, FUNCNAME(cleanup)); } /** * This function will release any resources that the mutator has open * and free the mutator state structure. * @param mutator_state - a mutator specific structure previously created by * the create function. This structure will be freed and should not be referenced afterwards. */ SPLICE_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { cleanup_mutate_info(&((splice_state_t *)mutator_state)->info); GENERIC_MUTATOR_CLEANUP(splice_state_t) } static int mutate_inner(void * mutator_state, char * buffer, size_t buffer_length, int is_thread_safe) { splice_state_t * state = (splice_state_t *)mutator_state; mutate_buffer_t buf; int ret; if (buffer_length < state->input_length) return -1; buf.buffer = (uint8_t *)buffer; buf.length = MIN(buffer_length, state->input_length); buf.max_length = buffer_length; memcpy(buf.buffer, state->input, buf.length); if(is_thread_safe && take_mutex(state->info.mutate_mutex)) return -1; if (state->info.stage_cur > MAX(HAVOC_MIN, SPLICE_HAVOC * (state->info.perf_score / state->info.havoc_div) / 100)) { state->info.stage = 0; state->info.stage_cur = 0; state->info.queue_cycle++; } state->iteration++; ret = mutate_one(&state->info, &buf, mutate_funcs, ARRAY_SIZE(mutate_funcs)); if (is_thread_safe && release_mutex(state->info.mutate_mutex)) return -1; return ret; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ SPLICE_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { return mutate_inner(mutator_state, buffer, buffer_length, 0); } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. It must be at least as large as * the original input buffer. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ SPLICE_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { SINGLE_INPUT_MUTATE_EXTENDED(splice_state_t, state->info.mutate_mutex); } /** * This function will return the state of the mutator. The returned value can be used to restart the * mutator at a later time, by passing it to the create or set_state function. It is the caller's * responsibility to free the memory allocated here by calling the free_state function. * @param mutator_state - a mutator specific structure previously created by the create function. * @return - a buffer that defines the current state of the mutator. */ SPLICE_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { splice_state_t * state = (splice_state_t *)mutator_state; json_t *state_obj, *temp; char * ret; state_obj = json_object(); ADD_INT(temp, state->iteration, state_obj, "iteration"); if (!add_mutate_info_to_json(state_obj, &state->info)) return NULL; ret = json_dumps(state_obj, 0); json_decref(state_obj); return ret; } /** * This function will set the current state of the mutator. * This can be used to restart a mutator once from a previous run. * @param mutator_state - a mutator specific structure previously created by the create function. * @param state - a previously dumped state buffer obtained by the get_state function. * @return 0 on success or non-zero on failure */ SPLICE_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { splice_state_t * current_state = (splice_state_t *)mutator_state; int result, temp_int; if (!state) return 1; GET_INT(temp_int, state, current_state->iteration, "iteration", result); if (get_mutate_info_from_json(state, ¤t_state->info)) return 1; return 0; } /** * This function will return the current iteration count of the mutator, i.e. * how many mutations have been generated with it. * @param mutator_state - a mutator specific structure previously created by the create function. * @return value - the number of previously generated mutations */ SPLICE_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { GENERIC_MUTATOR_GET_ITERATION(splice_state_t); } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ SPLICE_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { SINGLE_INPUT_GET_INFO(splice_state_t); } /** * This function will set the mutator's input to something new. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ SPLICE_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { GENERIC_MUTATOR_SET_INPUT(splice_state_t); } /** * This function sets a help message for the mutator. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ SPLICE_MUTATOR_API int FUNCNAME(help)(char **help_str) { GENERIC_MUTATOR_HELP( "splice - afl-based splice mutator\n" "Options:\n" " dictionary A file or directory containing dictionary to use while\n" " mangling input\n" " havoc_div A divisor for determining the number of rounds that\n" " the havoc stage should run (typically 1, 2, 5, or 10)\n" " perf_score A performance score used to determine how long a havoc\n" " round lasts. Typically 100, higher results in a\n" " larger number of mutations in these stages before\n" " moving on.\n" " queue_cycle The queue round counter. Used in determining how to\n" " mutate input. Generally this shouldn't need to be set\n" " random_state0 The first half of the seed to afl's random number\n" " generator\n" " random_state1 The second half of the seed to afl's random number\n" " generator\n" " splice_filenames An array of files to use during afl's splice stage,\n" " for mixing with the input\n" "\n" ); } ================================================ FILE: mutators/splice_mutator/splice_mutator.h ================================================ #pragma once #include #include #ifdef _WIN32 #ifdef SPLICE_MUTATOR_EXPORTS #define SPLICE_MUTATOR_API __declspec(dllexport) #else #define SPLICE_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define SPLICE_MUTATOR_API #endif #define MUTATOR_NAME "splice" SPLICE_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); SPLICE_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); SPLICE_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); SPLICE_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); SPLICE_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); #define splice_free_state default_free_state SPLICE_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); SPLICE_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); #define splice_get_total_iteration_count return_unknown_or_infinite_total_iterations SPLICE_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); SPLICE_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); SPLICE_MUTATOR_API int FUNCNAME(help)(char **help_str); #ifndef ALL_MUTATORS_IN_ONE SPLICE_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: mutators/zzuf_mutator/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (zzuf_mutator) include_directories (${PROJECT_SOURCE_DIR}/../mutators/) set(ZZUF_SRC ${PROJECT_SOURCE_DIR}/zzuf_mutator.c) source_group("Library Sources" FILES ${ZZUF_SRC}) add_library(zzuf_mutator SHARED ${ZZUF_SRC} $ $) target_link_libraries(zzuf_mutator utils) target_compile_definitions(zzuf_mutator PUBLIC ZZUF_MUTATOR_EXPORTS) target_compile_definitions(zzuf_mutator PUBLIC MUTATORS_NO_IMPORT) target_compile_definitions(zzuf_mutator PUBLIC UTILS_NO_IMPORT) target_compile_definitions(zzuf_mutator PUBLIC JANSSON_NO_IMPORT) if (WIN32) # utils.dll needs Shlwapi target_link_libraries(zzuf_mutator Shlwapi) endif (WIN32) ================================================ FILE: mutators/zzuf_mutator/zzuf_mutator.c ================================================ #include "zzuf_mutator.h" #include #include #include #include #include #include #include #include // Fuzzing mode enum fuzzing_mode { FUZZING_XOR = 0, FUZZING_SET, FUZZING_UNSET, FUZZING_UNKNOWN }; // We arbitrarily split files into 1024-byte chunks. Each chunk has an // associated seed that can be computed from the zzuf seed, the chunk // index and the fuzziness density. This allows us to predictably fuzz // any part of the file without reading the whole file. #define CHUNKBYTES 1024 // The default fuzzing ratio is, arbitrarily, 0.4%. The minimal fuzzing // ratio is 0.000000001% (less than one bit changed on a whole DVD). #define DEFAULT_RATIO 0.004 #define MIN_RATIO 0.00000000001 #define MAX_RATIO 5.0 struct zzuf_state { char * input; size_t input_length; //Option strings char * protect_string; char * refuse_string; char * range_string; char * mode_string; //Parsed Options enum fuzzing_mode mode; // Fuzzing mode (xor, set, unset) int seed; // Random number generator seed double ratio; unsigned char protect[256]; // Per-value byte protection unsigned char refuse[256]; // Per-value byte exclusion int64_t *ranges; // Per-offset byte protection //Protects the fields below, i.e. the iteration count, data array, and random state mutex_t mutate_mutex; int iteration; unsigned long ctx; int current_chunk; uint8_t data[CHUNKBYTES]; }; typedef struct zzuf_state zzuf_state_t; mutator_t zzuf_mutator = { FUNCNAME(create), FUNCNAME(cleanup), FUNCNAME(mutate), FUNCNAME(mutate_extended), FUNCNAME(get_state), zzuf_free_state, FUNCNAME(set_state), FUNCNAME(get_current_iteration), zzuf_get_total_iteration_count, FUNCNAME(get_input_info), FUNCNAME(set_input), FUNCNAME(help) }; //////////////////////////////////////////////////////////////////////////////////////////// //// zzuf mutator methods ////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////// /* * The code in this section was taken from the zzuf project (available at * https://github.com/samhocevar/zzuf) and is licensed under the below * terms. It has been modified from the original version to suit the * purposes of this project. * * zzuf - general purpose fuzzer * * Copyright © 2002—2015 Sam Hocevar * * This program is free software. It comes without any warranty, to * the extent permitted by applicable law. You can redistribute it * and/or modify it under the terms of the Do What the Fuck You Want * to Public License, Version 2, as published by the WTFPL Task Force. * See http://www.wtfpl.net/ for more details. */ #define MAGIC1 0x33ea84f7 #define MAGIC2 0x783bc31f #define MAGIC3 0x9b5da2fb void zzuf_srand(zzuf_state_t * state, uint32_t seed) { state->ctx = (seed ^ 0x12345678); } uint32_t zzuf_rand(zzuf_state_t * state, uint32_t max) { /* Could be better, but do we care? */ long hi = state->ctx / 12773L; long lo = state->ctx % 12773L; long x = 16807L * lo - 2836L * hi; if (x <= 0) x += 0x7fffffffL; return (state->ctx = x) % (unsigned long)max; } /* This function converts a string containing a list of ranges in the format * understood by cut(1) such as "1-5,8,10-" into a C array for lookup. It is * the caller's duty to call free() on the returned value */ int64_t *_zz_allocrange(char const *list) { char const *parser; int64_t *ranges; unsigned int i, chunks; /* Count commas */ for (parser = list, chunks = 1; *parser; ++parser) if (*parser == ',') chunks++; ranges = malloc((chunks + 1) * 2 * sizeof(int64_t)); /* Fill ranges list */ for (parser = list, i = 0; i < chunks; ++i) { char const *comma = strchr(parser, ','); char const *dash = strchr(parser, '-'); ranges[i * 2] = (dash == parser) ? 0 : atoi(parser); if (dash && (dash + 1 == comma || dash[1] == '\0')) ranges[i * 2 + 1] = ranges[i * 2]; /* special case */ else if (dash && (!comma || dash < comma)) ranges[i * 2 + 1] = atoi(dash + 1) + 1; else ranges[i * 2 + 1] = ranges[i * 2] + 1; parser = comma + 1; } ranges[i * 2] = ranges[i * 2 + 1] = 0; return ranges; } int _zz_isinrange(int64_t value, int64_t const *ranges) { int64_t const *r; if (!ranges) return 1; for (r = ranges; r[1]; r += 2) if (value >= r[0] && (r[0] == r[1] || value < r[1])) return 1; return 0; } static void add_char_range(unsigned char *table, char const *list) { static char const hex[] = "0123456789abcdef0123456789ABCDEF"; char const *tmp; int a, b; memset(table, 0, 256 * sizeof(unsigned char)); for (tmp = list, a = b = -1; *tmp; ++tmp) { int ch; if (*tmp == '\\' && tmp[1] == '\0') ch = '\\'; else if (*tmp == '\\') { tmp++; if (*tmp == 'n') ch = '\n'; else if (*tmp == 'r') ch = '\r'; else if (*tmp == 't') ch = '\t'; else if (tmp[0] >= '0' && tmp[0] <= '7' && tmp[1] >= '0' && tmp[1] <= '7' && tmp[2] >= '0' && tmp[2] <= '7') { ch = tmp[2] - '0'; ch |= (int)(tmp[1] - '0') << 3; ch |= (int)(tmp[0] - '0') << 6; tmp += 2; } else if ((*tmp == 'x' || *tmp == 'X') && tmp[1] && strchr(hex, tmp[1]) && tmp[2] && strchr(hex, tmp[2])) { ch = ((int)(strchr(hex, tmp[1]) - hex) & 0xf) << 4; ch |= (int)(strchr(hex, tmp[2]) - hex) & 0xf; tmp += 2; } else ch = (unsigned char)*tmp; /* XXX: OK for \\, but what else? */ } else ch = (unsigned char)*tmp; if (a != -1 && b == '-' && a <= ch) { while (a <= ch) table[a++] = 1; a = b = -1; } else { if (a != -1) table[a] = 1; a = b; b = ch; } } if (a != -1) table[a] = 1; if (b != -1) table[b] = 1; } static enum fuzzing_mode _zz_fuzzing(char const *mode) { if (!strcmp(mode, "xor")) return FUZZING_XOR; else if (!strcmp(mode, "set")) return FUZZING_SET; else if (!strcmp(mode, "unset")) return FUZZING_UNSET; return FUZZING_UNKNOWN; } void _zz_fuzz(zzuf_state_t * state, char * buf, int64_t len) { uint32_t chunkseed; int64_t i, j, start, stop; unsigned char byte, fuzzbyte; int todo; unsigned int idx; uint8_t bit; for (i = 0; i < (len + CHUNKBYTES - 1) / CHUNKBYTES; ++i) { /* Cache bitmask array */ if (state->current_chunk != (int)i) { chunkseed = (uint32_t)i; chunkseed ^= MAGIC2; chunkseed += (uint32_t)(state->ratio * MAGIC1); chunkseed ^= (state->seed + state->iteration); //Increment the zzuf seed each mutation chunkseed += (uint32_t)(i * MAGIC3); zzuf_srand(state, chunkseed); memset(state->data, 0, CHUNKBYTES); /* Add some random dithering to handle ratio < 1.0/CHUNKBYTES */ todo = (int)((state->ratio * (8 * CHUNKBYTES) * 1000000.0 + zzuf_rand(state, 1000000)) / 1000000.0); while (todo--) { idx = zzuf_rand(state, CHUNKBYTES); bit = (1 << zzuf_rand(state, 8)); state->data[idx] ^= bit; } state->current_chunk = i; } // Apply our bitmask array to the buffer start = (i * CHUNKBYTES > 0) ? i * CHUNKBYTES : 0; stop = ((i + 1) * CHUNKBYTES < len) ? (i + 1) * CHUNKBYTES : len; for (j = start; j < stop; ++j) { if (state->ranges && !_zz_isinrange(j, state->ranges)) continue; // Not in one of the ranges, skip byte byte = (uint8_t)buf[j]; if(state->protect[byte]) continue; fuzzbyte = state->data[j % CHUNKBYTES]; if(!fuzzbyte) continue; switch (state->mode) { case FUZZING_XOR: byte ^= fuzzbyte; break; case FUZZING_SET: byte |= fuzzbyte; break; case FUZZING_UNSET: byte &= ~fuzzbyte; break; } if(state->refuse[byte]) continue; buf[j] = (uint8_t)byte; } } } //////////////////////////////////////////////////////////////////////////////////////////// //// API methods /////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////// #ifndef ALL_MUTATORS_IN_ONE /** * This function fills in the supplied mutator_t with all of the function * pointers for this mutator. * @param m - a pointer to a mutator_t structure */ ZZUF_MUTATOR_API void init(mutator_t * m) { memcpy(m, &zzuf_mutator, sizeof(mutator_t)); } #endif /** * This function sets up the refuse, protect, and range tables from the associated strings in a * state. Additionally, the fuzzing mode is set from the mode string. * @param mutator_state - a mutator specific structure previously created by the create function. */ static void setup_state_from_strings(zzuf_state_t * state) { if(state->refuse_string) add_char_range(state->refuse, state->refuse_string); if(state->protect_string) add_char_range(state->protect, state->protect_string); if(state->range_string) state->ranges = _zz_allocrange(state->range_string); if(state->mode_string) state->mode = _zz_fuzzing(state->mode_string); } /** * This function frees the memory associated with the protect, refuse, mode, and range strings. Also * the range table is freed. * @param mutator_state - a mutator specific structure previously created by the create function. */ static void free_ranges(zzuf_state_t * state) { free(state->protect_string); state->protect_string = NULL; free(state->refuse_string); state->refuse_string = NULL; free(state->range_string); state->range_string = NULL; free(state->mode_string); state->mode_string = NULL; free(state->ranges); state->ranges = NULL; } /** * This function creates and initializes a zzuf_state_t object based on the passed in JSON options. * @return the newly created zzuf_state_t object or NULL on failure */ static zzuf_state_t * setup_options(char * options) { zzuf_state_t * state; size_t i; state = (zzuf_state_t *)malloc(sizeof(zzuf_state_t)); if (!state) return NULL; memset(state, 0, sizeof(zzuf_state_t)); state->current_chunk = -1; state->mutate_mutex = create_mutex(); if (!state->mutate_mutex) { free(state); return NULL; } //Setup defaults state->seed = rand(); state->mode = FUZZING_XOR; state->ratio = DEFAULT_RATIO; if (!options || !strlen(options)) return state; PARSE_OPTION_STRING(state, options, mode_string, "mode", FUNCNAME(cleanup)); PARSE_OPTION_DOUBLE(state, options, ratio, "ratio", FUNCNAME(cleanup)); PARSE_OPTION_STRING(state, options, range_string, "range", FUNCNAME(cleanup)); PARSE_OPTION_STRING(state, options, refuse_string, "refuse", FUNCNAME(cleanup)); PARSE_OPTION_INT(state, options, seed, "seed", FUNCNAME(cleanup)); PARSE_OPTION_STRING(state, options, protect_string, "protect", FUNCNAME(cleanup)); state->ratio = state->ratio < MIN_RATIO ? MIN_RATIO : state->ratio > MAX_RATIO ? MAX_RATIO : state->ratio; setup_state_from_strings(state); if(state->mode == FUZZING_UNKNOWN) { FUNCNAME(cleanup)(state); return NULL; } return state; } /** * This function will allocate and initialize the mutator state. The mutator state should be * freed by calling the cleanup function. * @param options - a json string that contains the zzuf specific options. * @param state - optionally, a previously dumped state (with the get_state() function) to load * @param input - The input that this mutator will later be mutating * @param input_length - the size of the input parameter * @return a mutator specific structure or NULL on failure. The returned value should * not be used for anything other than passing to the various Mutator API functions. */ ZZUF_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length) { zzuf_state_t * zzuf_state = setup_options(options); if (!zzuf_state) return NULL; zzuf_state->input = (char *)malloc(input_length); if (!zzuf_state->input || !input_length) { FUNCNAME(cleanup)(zzuf_state); return NULL; } memcpy(zzuf_state->input, input, input_length); zzuf_state->input_length = input_length; if (state && FUNCNAME(set_state)(zzuf_state, state)) { FUNCNAME(cleanup)(zzuf_state); return NULL; } return zzuf_state; } /** * This function will release any resources that the mutator has open * and free the mutator state structure. * @param mutator_state - a mutator specific structure previously created by * the create function. This structure will be freed and should not be referenced afterwards. */ ZZUF_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state) { size_t i; zzuf_state_t * state = (zzuf_state_t *)mutator_state; destroy_mutex(state->mutate_mutex); free_ranges(state); free(state->input); free(state); } /** * This function will mutate the input given in the create function and return it in the buffer argument. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ ZZUF_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length) { zzuf_state_t * state = (zzuf_state_t *)mutator_state; size_t mutated_buffer_length; //Can't mutate an empty buffer if (buffer_length == 0) return -1; mutated_buffer_length = buffer_length > state->input_length ? state->input_length : buffer_length; memcpy(buffer, state->input, mutated_buffer_length); _zz_fuzz(state, buffer, mutated_buffer_length); state->iteration++; return (int)mutated_buffer_length; } /** * This function will mutate the input given in the create function and return it in the buffer argument. * This function also accepts a set of flags which instruct it how to mutate the input. See global_types.h * for the list of available flags. * @param mutator_state - a mutator specific structure previously created by the create function. * @param buffer - a buffer that the mutated input will be written to * @param buffer_length - the size of the passed in buffer argument. * @param flags - A set of mutate flags that modify how this mutator mutates the input. * @return - the length of the mutated data, 0 when the mutator is out of mutations, or -1 on error */ ZZUF_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags) { SINGLE_INPUT_MUTATE_EXTENDED(zzuf_state_t, state->mutate_mutex); } /** * This function will return the state of the mutator. The returned value can be used to restart the * mutator at a later time, by passing it to the create or set_state function. It is the caller's * responsibility to free the memory allocated here by calling the free_state function. * @param mutator_state - a mutator specific structure previously created by the create function. * @return - a buffer that defines the current state of the mutator. This will be a mutator specific JSON string. */ ZZUF_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state) { zzuf_state_t * state = (zzuf_state_t *)mutator_state; json_t *obj, *temp; char * ret; obj = json_object(); ADD_INT(temp, state->iteration, obj, "iteration"); ADD_INT(temp, state->seed, obj, "seed"); ADD_INT(temp, state->mode, obj, "mode"); ADD_DOUBLE(temp, state->ratio, obj, "ratio"); ADD_STRING(temp, state->protect_string, obj, "protect"); ADD_STRING(temp, state->refuse_string, obj, "refuse"); ADD_STRING(temp, state->range_string, obj, "range"); ret = json_dumps(obj, 0); json_decref(obj); return ret; } /** * This function will set the current state of the mutator. * This can be used to restart a mutator once from a previous run. * @param mutator_state - a mutator specific structure previously created by the create function. * @param state - a previously dumped state buffer obtained by the get_state function. * @return 0 on success or non-zero on failure */ ZZUF_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state) { zzuf_state_t * zzuf_state = (zzuf_state_t *)mutator_state; int result, temp_int; double temp_double; char * temp_str; if (!state) return 1; free_ranges(zzuf_state); zzuf_state->current_chunk = -1; GET_INT(temp_int, state, zzuf_state->iteration, "iteration", result); GET_INT(temp_int, state, zzuf_state->seed, "seed", result); GET_INT(temp_int, state, zzuf_state->mode, "mode", result); GET_DOUBLE(temp_double, state, zzuf_state->ratio, "ratio", result); GET_STRING(temp_str, state, zzuf_state->protect_string, "protect", result); GET_STRING(temp_str, state, zzuf_state->refuse_string, "refuse", result); GET_STRING(temp_str, state, zzuf_state->range_string, "range", result); setup_state_from_strings(zzuf_state); return 0; } /** * This function will return the current iteration count of the mutator, i.e. * how many mutations have been generated with it. * @param mutator_state - a mutator specific structure previously created by the create function. * @return value - the number of previously generated mutations */ ZZUF_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state) { GENERIC_MUTATOR_GET_ITERATION(zzuf_state_t); } /** * Obtains information about the inputs that were given to the mutator when it was created * @param mutator_state - a mutator specific structure previously created by the create function. * @param num_inputs - a pointer to an integer used to return the number of inputs given to this mutator * when it was created. This parameter is optional and can be NULL, if this information is not needed * @param input_sizes - a pointer to a size_t array used to return the sizes of the inputs given to this * mutator when it was created. This parameter is optional and can be NULL, if this information is not needed. */ ZZUF_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes) { SINGLE_INPUT_GET_INFO(zzuf_state_t); } /** * This function will set the input(saved in the mutators state) to something new. * This can be used to reinitialize a mutator with new data, without reallocating the entire state struct. * @param mutator_state - a mutator specific structure previously created by the create function. * @param new_input - The new input used to produce new mutated inputs later when the mutate function is called * @param input_length - the size in bytes of the input buffer. * @return 0 on success and -1 on failure */ ZZUF_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length) { GENERIC_MUTATOR_SET_INPUT(zzuf_state_t); } /** * This function sets a help message for the mutator. * @param help_str - A pointer that will be updated to point to the new help string. * @return 0 on success and -1 on failure */ ZZUF_MUTATOR_API int FUNCNAME(help)(char ** help_str) { GENERIC_MUTATOR_HELP( "zzuf - zzuf-based mutator\n" "Options:\n" "\tmode fuzzing mode to use: xor, set, or unset\n" "\tprotect protect bytes and characters in \n" "\trange only fuzz bytes at offsets within \n" "\tratio bit fuzzing ratio\n" "\trefuse refuse bytes and characters in \n" "\tseed random seed\n" "\n" ); } ================================================ FILE: mutators/zzuf_mutator/zzuf_mutator.h ================================================ #pragma once #include #include #ifdef _WIN32 #ifdef ZZUF_MUTATOR_EXPORTS #define ZZUF_MUTATOR_API __declspec(dllexport) #else #define ZZUF_MUTATOR_API __declspec(dllimport) #endif #else //_WIN32 #define ZZUF_MUTATOR_API #endif #define MUTATOR_NAME "zzuf" ZZUF_MUTATOR_API void * FUNCNAME(create)(char * options, char * state, char * input, size_t input_length); ZZUF_MUTATOR_API void FUNCNAME(cleanup)(void * mutator_state); ZZUF_MUTATOR_API int FUNCNAME(mutate)(void * mutator_state, char * buffer, size_t buffer_length); ZZUF_MUTATOR_API int FUNCNAME(mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); ZZUF_MUTATOR_API char * FUNCNAME(get_state)(void * mutator_state); #define zzuf_free_state default_free_state ZZUF_MUTATOR_API int FUNCNAME(set_state)(void * mutator_state, char * state); ZZUF_MUTATOR_API int FUNCNAME(get_current_iteration)(void * mutator_state); #define zzuf_get_total_iteration_count return_unknown_or_infinite_total_iterations ZZUF_MUTATOR_API void FUNCNAME(get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); ZZUF_MUTATOR_API int FUNCNAME(set_input)(void * mutator_state, char * new_input, size_t input_length); ZZUF_MUTATOR_API int FUNCNAME(help)(char ** help_str); #ifndef ALL_MUTATORS_IN_ONE ZZUF_MUTATOR_API void init(mutator_t * m); #endif ================================================ FILE: picker/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (picker) include_directories (${CMAKE_SOURCE_DIR}/driver/) include_directories (${CMAKE_SOURCE_DIR}/instrumentation/) set(PICKER_SRC ${PROJECT_SOURCE_DIR}/main.c) source_group("Executable Sources" FILES ${PICKER_SRC}) add_executable(picker ${PICKER_SRC} $ $) target_compile_definitions(picker PUBLIC INSTRUMENTATION_NO_IMPORT) target_compile_definitions(picker PUBLIC DRIVER_NO_IMPORT) target_link_libraries(picker utils) target_link_libraries(picker jansson) if (WIN32) target_link_libraries(picker Shlwapi) # utils needs Shlwapi target_link_libraries(picker ws2_32) # driver needs ws2_32 target_link_libraries(picker iphlpapi) # network driver needs iphlpapi endif (WIN32) ================================================ FILE: picker/main.c ================================================ //This program helps the user decide which libraries should be instrumented //while fuzzing. This is accomplished by running the target program and //recording coverage information on each of the loaded libraries. It then //analyzes the coverage information for each library to determine which //libraries the coverage information varies based on the input file. These //libraries are most likely the ones that process the input file, and thus the //most likely targets for fuzzing. #include #include #include #include #include #include #include /** * This function prints out the usage information for the fuzzer and each of the individual components. * @param program_name - the name of the program currently being run (for use in the outputted message) */ void usage(char * program_name) { char * help_text; printf( "Usage: %s driver_name instrumentation_name seed_directory [options]\n" "\n" "Options:\n" "\t -d driver_options Set the options for the driver\n" "\t -i instrumentation_options Set the options for the instrumentation\n" "\t -ib ignore_bytes_dir The directory to write the list of bytes in the instrumentation to ignore\n" "\t -l logging_options Set the options for logging\n" "\t -n num_iterations The number of iterations to run per file [default 10 per file]\n" "\n", program_name ); #define PRINT_HELP(x, y) \ x = y; \ if(x) { \ puts(x); \ free(x); \ } PRINT_HELP(help_text, driver_help()); PRINT_HELP(help_text, instrumentation_help()); exit(1); } int main(int argc, char ** argv) { driver_t * driver; instrumentation_t * instrumentation; char *driver_name, *driver_options = NULL, *seed_directory = NULL, *seed_buffer = NULL, * module_name = NULL, *logging_options = NULL, *instrumentation_name = NULL, *instrumentation_options = NULL, *ignore_bytes_dir = NULL; void * instrumentation_state = NULL; int seed_length = 0, file_count, module_index, new_path, cur_index; int iteration = 0; WIN32_FIND_DATA fdFile; HANDLE file_handle; char filename[4096]; char ** module_names = NULL, ** filenames = NULL; char * module_infos = NULL; int * module_results = NULL; char * info, * ignore_bytes; int num_modules = 0, num_files = 0, info_size, module_info_size = -1, i; ////////////////////////////////////////////////////////////////////////////////////////////////////// // Parse Arguments /////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// //Default options int num_iterations = 10; if (argc < 4) { usage(argv[0]); } driver_name = argv[1]; instrumentation_name = argv[2]; seed_directory = argv[3]; for (int i = 4; i < argc; i++) { IF_ARG_OPTION("-d", driver_options) ELSE_IF_ARG_OPTION("-i", instrumentation_options) ELSE_IF_ARG_OPTION("-ib", ignore_bytes_dir) ELSE_IF_ARG_OPTION("-l", logging_options) ELSE_IF_ARGINT_OPTION("-n", num_iterations) else { if (strcmp("-h", argv[i])) printf("Unknown argument: %s\n", argv[i]); usage(argv[0]); } } if (setup_logging(logging_options)) { printf("Failed setting up logging, exitting\n"); return 1; } if (num_iterations < 2) FATAL_MSG("Bad iteration number (%d). Must have a iteration count greater than 1.", num_iterations); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Ojbect Setup ////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// instrumentation = instrumentation_factory(instrumentation_name); if (!instrumentation) FATAL_MSG("Unknown instrumentation '%s'", instrumentation_name); instrumentation_state = instrumentation->create(instrumentation_options, NULL); if (!instrumentation_state) FATAL_MSG("Bad options/state for instrumentation %s", instrumentation_name); if (!instrumentation->get_module_info) FATAL_MSG("Instrumentation '%s' does not support per module coverage", instrumentation_name); //Create the driver driver = driver_instrumentation_factory(driver_name, driver_options, instrumentation, instrumentation_state); if (!driver) FATAL_MSG("Unknown driver '%s' or bad options: %s", driver_name, driver_options); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Get the list of files to test ///////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// memset(filename, 0, sizeof(filename)); snprintf(filename, sizeof(filename) - 1, "%s\\*", seed_directory); file_count = 0; int success = 1; for (file_handle = FindFirstFile(filename, &fdFile); file_handle != INVALID_HANDLE_VALUE && success; success = FindNextFile(file_handle, &fdFile)) { //Skip directories if (fdFile.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) continue; //Read the seed file memset(filename, 0, sizeof(filename)); snprintf(filename, sizeof(filename) - 1, "%s\\%s", seed_directory, fdFile.cFileName); seed_length = read_file(filename, &seed_buffer); if (seed_length <= 0) //Couldn't read file, or empty file continue; free(seed_buffer); num_files++; filenames = (char **)realloc(filenames, num_files * sizeof(char *)); filenames[num_files - 1] = strdup(filename); } FindClose(file_handle); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Main Test Loop //////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// #define INITIAL_STATE 0 #define PATH_SET 1 #define NEW_PATH_ON_SAME_FILE 2 #define NEW_PATH_ON_DIFF_FILE 3 char * module_results_descriptions[] = { "no paths set", //INITIAL_STATE "a single path for all files", //PATH_SET "multiple paths for the same file", //NEW_PATH_ON_SAME_FILE "one path for each file" //NEW_PATH_ON_DIFF_FILE }; for(file_count = 0; file_count < num_files; file_count++) { //Read the seed file seed_length = read_file(filenames[file_count], &seed_buffer); if (seed_length <= 0) //Couldn't read file, or empty file continue; INFO_MSG("Testing file '%s'", filenames[file_count]); for (iteration = 0; iteration < num_iterations; iteration++) { driver->test_input(driver->state, seed_buffer, seed_length); module_index = 0; while (!instrumentation->get_module_info(instrumentation_state, module_index, &new_path, &module_name, &info, &info_size)) { cur_index = module_index; module_index++; if (module_info_size == -1) module_info_size = info_size; if (info_size != module_info_size) FATAL_MSG("Module instrumentation data varies per size, not supported (yet)"); if (!info) FATAL_MSG("Instrumentation data unavailable from the %s instrumentation.\n", instrumentation_name); if (num_modules < module_index) { //module_infos is a dynamically allocated array that holds all of the instrumentation data for each of the //instrumented modules. While it is declared/used as a char *, it can be thought of as a 4-dimension array: //char module_infos[NUM_MODULES_TRACED][NUM_FILES_TRACED][NUM_ITERATIONS_PER_FILE][MODULE_INFO_SIZE]; module_names = (char **)realloc(module_names, module_index * sizeof(char *)); module_names[cur_index] = module_name; module_results = (int *)realloc(module_results, module_index * sizeof(int *)); module_results[cur_index] = INITIAL_STATE; module_infos = (char *)realloc(module_infos, module_index * num_files * num_iterations * module_info_size); num_modules = module_index; } int pos = ((cur_index * num_files * num_iterations) + (file_count * num_iterations) + iteration) * module_info_size; memcpy(module_infos + pos, info, module_info_size); //Logic: //If it's the first time we've run this module, mark that we've set the path //If it's the first iteration that we've tried a new file and we found a new path, it has // at least one new path per file. So until we determine that it can take multiple // paths for the same file, mark it as having one path per file. //Otherwise, we've found a new path, then it must be a new path for the same file. Mark it as // such. After we've decided that, there is no coming back from that state. if (module_results[cur_index] == INITIAL_STATE) module_results[cur_index] = PATH_SET; else if (new_path && iteration == 0 && module_results[cur_index] != NEW_PATH_ON_SAME_FILE) module_results[cur_index] = NEW_PATH_ON_DIFF_FILE; else if (new_path) module_results[cur_index] = NEW_PATH_ON_SAME_FILE; } } free(seed_buffer); } ////////////////////////////////////////////////////////////////////////////////////////////////////// // Compare the runs and calculate ignore bytes /////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// //In the next portion, we have the variables: //module_index = the module index (since you can use the picker on many different modules at once). //num_files = the number of files being tested //num_iterations = the number of times each file was traced //file_count = the iterator over the files for a specific module //iteration = the iterator over the individual traces for a specific file //module_info_size = the size of the instrumentation data from the traced module (a constant value, that doesn't change per file/module/iteration) //cur_pos = an index into module_infos to the instrumentation data for the $iteration trace of the $file_count file //prev_pos = an index into module_infos to the instrumentation data for the ($iteration-1) trace of the $file_count file ignore_bytes = (char *)malloc(module_info_size); for (module_index = 0; module_index < num_modules; module_index++) { if (module_results[module_index] != NEW_PATH_ON_SAME_FILE) continue; memset(ignore_bytes, 0xff, module_info_size); int total_ignore_count = 0; for (file_count = 0; file_count < num_files; file_count++) { for (iteration = 1; iteration < num_iterations; iteration++) { int ignore_count = 0; //The calculations for cur_pos and prev_pos are done as such: //index * num_files * num_iterations = skip over the modules we've already checked //file_count * num_iterations = skip over the files we've already checked for this module //iteration = skip to the iteration that we're currently checking (prev_pos uses iteration - 1, since it's jumping to the previous iteration's instrumentation data) //and then it's all multiplied by the module_info_size since each one of the instrumentation data records that we're skipping is that many bytes large int prev_pos = ((module_index * num_files * num_iterations) + (file_count * num_iterations) + iteration - 1) * module_info_size; int cur_pos = ((module_index * num_files * num_iterations) + (file_count * num_iterations) + iteration) * module_info_size; for (i = 0; i < module_info_size; i++) { if (module_infos[prev_pos + i] != module_infos[cur_pos + i]) { if (ignore_bytes[i]) total_ignore_count++; ignore_bytes[i] = 0x00; ignore_count++; } } DEBUG_MSG("Module %s File %s iteration (%d/%d) ignore count %d total ignore count %d", module_names[module_index], filenames[file_count], iteration - 1, iteration, ignore_count, total_ignore_count); } } if (ignore_bytes_dir) { memset(filename, 0, sizeof(filename)); snprintf(filename, sizeof(filename) - 1, "%s\\%s.dat", ignore_bytes_dir, module_names[module_index]); /* //Swap the byte ordering here, so we don't have to in the hashing function later #if defined(_M_X64) || defined(__x86_64__) for (i = 0; i < sizeof(ignore_bytes); i += sizeof(u64)) *((u64 *)&ignore_bytes[i]) = _byteswap_uint64(*((u64 *)&ignore_bytes[i])); #else for (i = 0; i < sizeof(ignore_bytes); i += sizeof(u32)) *((u32 *)&ignore_bytes[i]) = SWAP32(*((u32 *)&ignore_bytes[i])); #endif */ write_buffer_to_file(filename, ignore_bytes, module_info_size); } } free(ignore_bytes); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Print the results ///////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// CRITICAL_MSG("Results:"); for (module_index = 0; module_index < num_modules; module_index++) CRITICAL_MSG("Module %s had %s", module_names[module_index], module_results_descriptions[module_results[module_index]]); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Cleanup /////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// //free the generated info free(module_results); free(module_names); free(module_infos); for (file_count = 0; file_count < num_files; file_count++) free(filenames[file_count]); free(filenames); //Cleanup the objects and exit driver->cleanup(driver->state); instrumentation->cleanup(instrumentation_state); free(driver); free(instrumentation); return 0; } ================================================ FILE: python/manager/app/__init__.py ================================================ import time, traceback, os, uuid from flask import Flask, jsonify, request, json from flask_sqlalchemy import SQLAlchemy from flask_restful import Api import app.config as config_file from app.encoder import JSONEncoder app = Flask(__name__, static_folder='static', static_url_path='') """ Setup DB Models """ app.config['SQLALCHEMY_DATABASE_URI'] = config_file.DATABASE_URI app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False app.config['RESTFUL_JSON'] = {'cls': JSONEncoder} app.config['ERROR_404_HELP'] = False app.config['db'] = db = SQLAlchemy(app) # TODO verification of version via metadata table. """ Setup Routes """ from controller.Hello import HelloCtrl from controller.Minimize import MinimizeCtrl # TODO The controllers below here still need to be finished from controller.File import FileCtrl from controller.Job import JobCtrl from controller.Target import TargetCtrl from controller.Config import ConfigCtrl from controller.Results import ResultsCtrl api = Api(app) api.add_resource(HelloCtrl, '/') api.add_resource(MinimizeCtrl, '/api/minimize') api.add_resource(FileCtrl, '/api/file', methods=['GET', 'POST']) api.add_resource(JobCtrl, '/api/job', methods=['GET', 'POST']) api.add_resource(JobCtrl, '/api/job/', methods=['GET', 'PUT'], endpoint='jobctrl_id') api.add_resource(JobCtrl, '/api/boinc_job/', methods=['GET', 'PUT'], endpoint='jobctrl_boincid') api.add_resource(ResultsCtrl, '/api/results', methods=['GET']) api.add_resource(ResultsCtrl, '/api/job//results', endpoint='resultsctrl_job') api.add_resource(ResultsCtrl, '/api/boinc_job//results', endpoint='resultsctrl_boincjob') api.add_resource(TargetCtrl, '/api/target', methods=['GET', 'POST']) api.add_resource(TargetCtrl, '/api/target/', methods=['GET', 'PUT', 'DELETE'], endpoint='targetctrl_id') # api.add_resource(CrashBucketCtrl, '/api/bucket', '/api/bucket/') api.add_resource(ConfigCtrl, '/api/config') #api.add_resource(UpdateCtrl, '/api/update/') ================================================ FILE: python/manager/app/config.py ================================================ import os # For feature testing, it's ok to use sqlite. SQLAlchemy will magic away the difference under the hood DATABASE_URI = "sqlite:///../../../../moflow.db" #DATABASE_URI = "postgresql://localhost/killerbeez?user=killerbeez&password=killerbeez" MANAGER_VERSION = 0.1 CLIENT_FOLDER = "client" UPLOAD_FOLDER = 'static' + os.sep + 'upload' ================================================ FILE: python/manager/app/encoder.py ================================================ import json class JSONEncoder(json.JSONEncoder): def default(self, o): try: ts = o.timestamp() return ts except AttributeError: pass return super(JSONEncoder, self).default(o) ================================================ FILE: python/manager/controller/Config.py ================================================ from flask_restful import Resource, reqparse, fields, marshal_with, abort import sys from model.FuzzingJob import fuzz_jobs from model.Config import FuzzingConfig from app import app db = app.config['db'] config_fields = { 'name': fields.String, 'target_id': fields.Integer, 'job_id': fields.Integer, 'value': fields.String, } class ConfigCtrl(Resource): def create(self, target_id, job_id, name, value): config = FuzzingConfig(name, value, target=target_id, job=job_id) db.session.add(config) db.session.commit() return config def read(self, id): # TODO allow for querying of configs pass @marshal_with(config_fields) def post(self): err = list() target_id = None job_id = None parser = reqparse.RequestParser() parser.add_argument('job_id', type=int) parser.add_argument('target_id', type=int) parser.add_argument('name', type=str) parser.add_argument('value', type=str) args = parser.parse_args() if args.name is None or args.name == "": err.append("no configuration name supplied; it is required") if args.value is None or args.value == "": err.append("no configuration value supplied; it is required") # Determine if this is for a target or a job if args.target_id is not None and args.target_id != 0: target_id = args.target_id if args.job_id is not None and args.job_id != 0: job_id = args.job_id if target_id is not None and job_id is not None: err.append("target_id and job_id are mutually exclusive") if target_id is None and job_id is None: err.append('must supply either a target_id or a job_id') if len(err) != 0: abort(400, err=', '.join(err)) # Ok, make one. if target_id is None: target_id = 0 if job_id is None: job_id = 0 return self.create(target_id, job_id, args.name, args.value) @marshal_with(config_fields) def get(self): target_id = None job_id = None parser = reqparse.RequestParser() parser.add_argument('job_id', type=int) parser.add_argument('target_id', type=int) parser.add_argument('name', type=str) args = parser.parse_args() # Determine if this is for a target or a job if args.target_id is not None and args.target_id != 0: target_id = args.target_id if args.job_id is not None and args.job_id != 0: job_id = args.job_id if target_id is not None and job_id is not None: abort(400, err='target_id and job_id are mutually exclusive') query = db.session.query(FuzzingConfig) if args.name is not None: query = query.filter_by(name=args.name) if job_id is not None: query = query.filter_by(job_id=job_id) if target_id is not None: query = query.filter_by(target_id=target_id) configs = query.all() #configs = [config.as_dict() for config in configs] return configs ================================================ FILE: python/manager/controller/File.py ================================================ import base64 import logging import os.path import sys import urllib.parse from flask import request, make_response, json from flask_restful import Resource, reqparse, fields, marshal_with, abort from app import app from lib import boinc from lib import errors from model.FuzzingJob import fuzz_jobs db = app.config['db'] logger = logging.getLogger(__name__) file_fields = { 'filename': fields.String(), # TODO: this should be the whole URL 'path': fields.String(), 'hash': fields.String(), } class FileCtrl(Resource): def create(self, contents): try: filename = boinc.filename_to_download_path( boinc.stage_file('input', contents)) except errors.Error: logger.exception('unable to stage file') abort(400, err='unable to stage file') return {'filename': os.path.basename(filename), 'path': filename, 'hash': filename.split('_')[-1]}, 200 def _get_hash(self, content): return hashlib.md5(content).hexdigest() @marshal_with(file_fields) def search(self, hash=None, content=None): if hash is not None: filename = boinc.get_filename('input', hash) elif content is not None: hash = self._get_hash(content) filename = boinc.get_filename('input', hash) else: abort(400, err='no filtering criteria provided') if os.path.exists(filename): with open(filename, 'rb') as input_file: if content is not None and input_file.read() != content: abort(400, err='file does not match specified contents') return [{'filename': os.path.basename(filename), 'path': boinc.filename_to_download_path(filename), 'hash': hash}] else: # Nothing valid found abort(404, err='not found') def _content_decode(self, args): if args.encoding == 'url': content = urllib.parse.unquote_to_bytes(args.content) elif args.encoding == 'base64': try: content = base64.b64decode(args.content) except binascii.Error: abort(400, err='Invalid base64 encoding') else: abort(400, err='Invalid encoding (accepted values are "url" and "base64")') if args.hash: hash = self._get_hash(content) if hash != args.hash: abort(400, err='Content does not match provided hash') return content @marshal_with(file_fields) def post(self): parser = reqparse.RequestParser() parser.add_argument('encoding', type=str, default='url') parser.add_argument('content', type=str, required=True) parser.add_argument('hash', type=str) args = parser.parse_args() content = self._content_decode(args) return self.create(content) def get(self): """ Query the file DB for a matching file :return: Dict containing the info on the file and 200, if present, else error message and 400 or 404 """ parser = reqparse.RequestParser() parser.add_argument('encoding', type=str, default='url') parser.add_argument('content', type=str) parser.add_argument('hash', type=str) args = parser.parse_args() content = self._content_decode(args) if args.content else None return self.search(args.hash, content) ================================================ FILE: python/manager/controller/Hello.py ================================================ from flask_restful import Resource class HelloCtrl(Resource): def get(self): return {'hello': 'world'} ================================================ FILE: python/manager/controller/Job.py ================================================ import datetime from flask_restful import Resource, reqparse, fields, marshal_with, abort from lib import boinc from lib import fuzzer from model.FuzzingJob import fuzz_jobs from model.FuzzingTarget import targets from model.job_inputs import job_inputs from app import app import logging db = app.config['db'] logger = logging.getLogger(__name__) job_fields = { 'job_id': fields.Integer(), 'boinc_id': fields.Integer(), 'job_type': fields.String(), 'status': fields.String(), 'mutator_state': fields.String(), 'mutator': fields.String(), 'instrumentation_type': fields.String(), 'driver': fields.String(), 'assign_time': fields.DateTime(dt_format='iso8601'), 'end_time': fields.DateTime(dt_format='iso8601'), 'input_ids': fields.List(fields.Integer(attribute='input_id'), attribute='inputs'), 'seed_file': fields.String(), } class JobCtrl(Resource): def read(self, id=None, boinc_id=None): """ Fetch the db entry for a given job id or boinc job id, or error if not found :param id: job_id of the job to be fetched :param boinc_id: boinc_id of the job to be fetched :return: list containing the dictionary representing the job object, or a dictionary indicating error. """ query = fuzz_jobs.query if id is not None: query = query.filter_by(job_id=id) if boinc_id is not None: query = query.filter_by(boinc_id=boinc_id) job = query.first() if job is None: abort(404, err="not found") return job, 200 def readAll(self, target_id): """ Get all jobs associated with the specified target_id, or error if not found :param target_id: target_id for which all jobs should be returned :return: list containing all jobs for the given target, or a dictionary indicating error. """ target = targets.query.filter_by(target_id=target_id).first() if target is None: abort(404, err="not found") jobs = fuzz_jobs.query.filter_by(target_id=target_id).all() #jobs = [{'job': job} for job in jobs] return jobs, 200 def create(self, data): """ Create a new job. :param data: dictionary of attributes for the new job object :return: newly created job object on 200, error dictionary on 400 """ type = data.job_type if type is None: # Default to "fuzz" type type = "fuzz" if data.target_id is None or data.target_id == 0: abort(400, err="target_id must be supplied and non-zero") else: # verify the target exists target = targets.query.filter_by(target_id=data.target_id).first() if target is None: abort(400, err="supplied target_id not found") if data.input_files: for input_file in data.input_files: if not os.path.exists(boinc.path_for_file(input_file)): abort(400, err="supplied input_file not found") try: job = fuzz_jobs(type, data.target_id, mutator=data.mutator, mutator_state=data.mutator_state, instrumentation_type=data.instrumentation_type, driver=data.driver, seed_file=data.seed_file, iterations=data.iterations ) if data.input_files: job.inputs = [job_inputs(input_file=input_file) for input_file in data.input_files] db.session.add(job) db.session.commit() except Exception as e: logger.exception('failed to add job') abort(400, err="invalid request") mutator_options = job.lookup_config('mutator', data.mutator) instrumentation_options = job.lookup_config('instrumentation', data.instrumentation_type) driver_options = job.lookup_config('driver', data.driver) shell_format = job.lookup_config('platform', 'shell_format') command_line = fuzzer.format_cmdline( job.driver, job.instrumentation_type, job.mutator, job.iterations, shell_format, driver_options=driver_options, instrumentation_options=instrumentation_options, mutator_options=mutator_options) logger.debug('Submitting job with command line: %s', command_line) job_id = boinc.submit_job(str(target), command_line, seed_file=job.seed_file) job.boinc_id = job_id db.session.commit() return job, 200 def update(self, data, id=None, boinc_id=None): query = fuzz_jobs.query if id is not None: query = query.filter_by(job_id=id) if boinc_id is not None: query = query.filter_by(boinc_id=boinc_id) job = query.first() if job is None: abort(404, err='Unknown job ID') if data.seed_file is not None: job.seed_file = data.seed_file if data.status is not None: job.status = data.status db.session.commit() return job, 200 @marshal_with(job_fields) def get(self, id=None, boinc_id=None): """ Request either a single job (by id/boinc_id) or all jobs for a target (by target_id) :return: List of jobs that match the query on 200; error dict on 400 """ parser = reqparse.RequestParser() parser.add_argument("target_id", type=int) args = parser.parse_args() # The two options are mutually exclusive if (id is not None or boinc_id is not None) and args.target_id is not None: abort(400, err='id/boinc_id and target_id are mutually exclusive') # But at least one must be supplied if id is None and boinc_id is None and args.target_id is None: abort(400, err='either id, boinc_id, or target_id must be supplied') if id is not None or boinc_id is not None: return self.read(id, boinc_id) else: return self.readAll(args.target_id) @marshal_with(job_fields) def post(self): """ Create a new job. :return: The job created on 200, error on 400 """ parser = reqparse.RequestParser() parser.add_argument("job_type", type=str) parser.add_argument("target_id", type=int, required=True) parser.add_argument("mutator", type=str, required=True) parser.add_argument("mutator_state", type=str) parser.add_argument("instrumentation_type", type=str, required=True) parser.add_argument("driver", type=str, required=True) parser.add_argument("input_files", type=str, action='append', location='json') parser.add_argument("seed_file", type=str, required=True) parser.add_argument("iterations", type=int, required=True) args = parser.parse_args() return self.create(args) @marshal_with(job_fields) def put(self, id=None, boinc_id=None): """ Update a job. """ if id is None and boinc_id is None: abort(400, err='one of id and boinc_id must be provided') parser = reqparse.RequestParser() parser.add_argument("seed_file", type=str) parser.add_argument("status", type=str) args = parser.parse_args() return self.update(args, id, boinc_id) ================================================ FILE: python/manager/controller/Log.py ================================================ from flask_restful import Resource, reqparse from app import app from app import logFile db = app.config['db'] class LogCtrl(Resource): def post(self): parser = reqparse.RequestParser() parser.add_argument('message', required=True, location='json') args = parser.parse_args() with open(logFile, 'a') as f: f.write(args['message']) return { "msg" : "log created successfully" }, 201 def get(self): try: log = open(logFile, 'r').read() except: return {"err": "not found"}, 404 return { "log" : log }, 200 ================================================ FILE: python/manager/controller/Minimize.py ================================================ import collections, operator from app import app from flask_restful import Resource, reqparse from flask import request from model.tracer_info import tracer_info from sqlalchemy.sql.expression import func db = app.config['db'] def minimize(target_id, num_files_per_edge = None): if num_files_per_edge == None: num_files_per_edge = 1 # get the data query = db.session.query(tracer_info.target_id, tracer_info.from_edge, tracer_info.to_edge, tracer_id.input_file) \ .filter_by(target_id=target_id) data = query.all() # Group the data by edge edges = collections.defaultdict(list) edges_per_input = collections.defaultdict(list) for target_id, from_edge, to_edge, input_file in data: edges[(int(from_edge), int(to_edge))].append(input_file) edges_per_input[input_file].append((from_edge, to_edge)) edges_by_popularity = sorted(edges, key=lambda k: len(edges[k]), reverse=True) already_have = collections.defaultdict(int) working_set = [] for edge in edges_by_popularity: if already_have[edge] > num_files_per_edge: continue files = edges[edge][:(num_files_per_edge - already_have[edge])] working_set.extend(files) for file in files: for edge in edges_per_input[file]: already_have[edge] += 1 return set(working_set) class MinimizeCtrl(Resource): def get(self): parser = reqparse.RequestParser() parser.add_argument('target_id', type=int, required=True) parser.add_argument('num_files_per_edge', type=int) args = parser.parse_args() return list(minimize(args.target_id, args.num_files_per_edge)) # The return value must be JSON serializable, so turn it back to a list if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Calculate the working set for a given target") parser.add_argument("target_id", type=int, help="The target id to get the working set of") args = parser.parse_args() files = minimize(args.target_id, args.s) print("Working set: {}".format(", ".join(files))) ================================================ FILE: python/manager/controller/Results.py ================================================ from flask_restful import Resource, reqparse, fields, marshal_with, abort from model.FuzzingJob import fuzz_jobs from model.FuzzingResults import results from app import app db = app.config['db'] result_fields = { 'result_id': fields.Integer(), 'job_id': fields.Integer(), 'repro_file': fields.String(), 'result_type': fields.String(), } class ResultsCtrl(Resource): def create(self, data, job_id=None, boinc_id=None): if job_id is not None: job = fuzz_jobs.query.get(job_id) if job is None: abort(404, err="job not found") elif boinc_id is not None: job = fuzz_jobs.query.filter_by(boinc_id=boinc_id).first() if job is None: abort(404, err="boinc_job not found") job_id = job.job_id try: result = results(job_id, data['repro_file'], type=data['result_type']) db.session.add(result) db.session.commit() except Exception as e: abort(400, err="invalid request") return result, 201 # TODO if needed def read(self, job_id=None): query = results.query if job_id: query = query.filter_by(job_id=job_id) job_results = query.all() # TODO: maybe don't error depending on job status if not job_results: abort(404, err="not found") return job_results, 200 # TODO if needed def update(self, id, data): crash = FuzzingCrash.query.filter_by(id=id).first() if crash is None: abort(404, err="not found") job = FuzzingJob.query.filter_by(id=data['job_id']).first() if job is not None: crash.job = job if data['repro_file'] is not None: crash.repro_file= data['dump_file'] if data['dump_file'] is not None: crash.dump_file = data['dump_file'] if data['dbg_file'] is not None: crash.dbg_file = data['dbg_file'] try: db.session.commit() except Exception as e: abort(400, err="invalid request") return crash.as_dict(), 201 # TODO if needed def delete(self, id): crash = FuzzingCrash.query.filter_by(id=id).first() if crash is None: abort(404, err="not found") try: db.session.delete(crash) db.session.commit() except Exception as e: abort(400, err="invalid request") return {"msg" : "record removed successfully"}, 201 def list(self, offset=None, limit=None, job_id=None, boinc_id=None, repro_file=None): query = results.query # results filters if job_id: query = query.filter_by(job_id=job_id) if repro_file: query = query.filter_by(repro_file=repro_file) # filters requiring join with fuzz_jobs if boinc_id: query = query.join(fuzz_jobs).filter(fuzz_jobs.boinc_id == boinc_id) if offset is None: offset = 0 if limit is None: limit = 20 crashes = query.offset(offset).limit(limit).all() return crashes, 200 @marshal_with(result_fields) def get(self, job_id=None, boinc_id=None): parser = reqparse.RequestParser() parser.add_argument('offset', type=int) parser.add_argument('limit', type=int) parser.add_argument('repro_file', type=str) args = parser.parse_args() return self.list(args['offset'], args['limit'], job_id, boinc_id, args['repro_file']) @marshal_with(result_fields) def post(self, job_id=None, boinc_id=None): parser = reqparse.RequestParser() parser.add_argument('repro_file', required=True, location='json') parser.add_argument('result_type', required=True, location='json') parser.add_argument('parent_file', location='json') return self.create(parser.parse_args(), job_id, boinc_id) ================================================ FILE: python/manager/controller/Status.py ================================================ from flask_restful import Resource from datetime import datetime #from model.FuzzingJobState import FuzzingJobState from model.FuzzingJob import fuzz_jobs #from model.FuzzingHost import FuzzingHost from model.FuzzingCrash import results class StatusCtrl(Resource): def get(self): status_active = FuzzingJobState.query.filter_by(name='Active').first() status_completed = FuzzingJobState.query.filter_by(name='Completed').first() status_queued = FuzzingJobState.query.filter_by(name='Queued').first() total_job_count = FuzzingJob.query.count() active_job_count = FuzzingJob.query.filter_by(state_id=status_active.id).count() completed_job_count = FuzzingJob.query.filter_by(state_id=status_completed.id).count() queued_job_count = FuzzingJob.query.filter_by(state_id=status_queued.id).count() crash_count = FuzzingCrash.query.count() node_count = FuzzingHost.query.count() return { 'total_job_count': total_job_count, 'active_job_count': active_job_count, 'completed_job_count': completed_job_count, 'queued_job_count': queued_job_count, 'crash_count': crash_count, 'node_count': node_count, 'serverTime' : str(datetime.now()) }, 200 ================================================ FILE: python/manager/controller/Target.py ================================================ import logging from flask_restful import Resource, reqparse, fields, marshal_with, abort from model.FuzzingTarget import targets #from model.FuzzingArch import FuzzingArch #from model.FuzzingPlatform import FuzzingPlatform #from model.FuzzingConfig import FuzzingConfig from app import app logger = logging.getLogger(__name__) db = app.config['db'] target_fields = { 'id': fields.Integer(attribute='target_id'), 'platform': fields.String, 'target_executable': fields.String, } class TargetCtrl(Resource): def create(self, data): try: target = targets(data['platform'], data['target_executable']) db.session.add(target) db.session.commit() except Exception: logger.exception('Error creating target') abort(400, err="invalid request") return target, 201 def read(self, id): target = targets.query.get(id) if target is None: abort(404, err="not found") return target def update(self, id, data): target = targets.query.get(id) if target is None: abort(404, err="not found") target.platform = data['platform'] target.target_executable = data['target_executable'] try: db.session.commit() except Exception as e: abort(400, err="invalid request") return target, 200 def list(self, offset=0, limit=10000): targets_found = targets.query.offset(offset).limit(limit).all() return targets_found @marshal_with(target_fields) def get(self, id=None): parser = reqparse.RequestParser() parser.add_argument('offset', type=int) parser.add_argument('limit', type=int) args = parser.parse_args() if id is None: if args['offset'] is not None and args['limit'] is not None: return self.list(args['offset'], args['limit']) else: return self.list() else: return self.read(id) @marshal_with(target_fields) def post(self): parser = reqparse.RequestParser() parser.add_argument('target_executable', required=True, location='json') parser.add_argument('platform', required=True, location='json') return self.create(parser.parse_args()) @marshal_with(target_fields) def put(self, id): parser = reqparse.RequestParser() parser.add_argument('target_executable', required=True, location='json') parser.add_argument('platform', required=True, location='json') return self.update(id, parser.parse_args()) def delete(self, id): target = targets.query.get(id) if target is None: return {"err": "not found"}, 404 try: db.session.delete(target) db.session.commit() except Exception as e: return {"err": "invalid request"}, 400 return {"msg" : "record removed successfully"}, 200 ================================================ FILE: python/manager/controller/Update.py ================================================ from flask_restful import Resource from flask import request from app.config import CLIENT_FOLDER import os, zipfile, hashlib class UpdateCtrl(Resource): def get(self, hash): path = 'static' + os.sep + 'client.zip' try: os.remove(path) except: None zip = zipfile.ZipFile(path, 'w', zipfile.ZIP_DEFLATED) for root, dirs, files in os.walk(CLIENT_FOLDER): for f in files: zip.write(os.path.join(root, f)) zip.close() client = open(path).read() if hash == hashlib.md5(client).hexdigest(): return {"err": "invalid request"}, 400 else: return {"url": request.url_root + path}, 200 ================================================ FILE: python/manager/controller/__init__.py ================================================ ================================================ FILE: python/manager/lib/__init__.py ================================================ ================================================ FILE: python/manager/lib/boinc.py ================================================ import hashlib import os.path import re import subprocess import xml.etree.ElementTree as ET from app import app from lib import errors def clean_download_path(path): """Turns an absolute path in the BOINC download dir into a relative path. This allows paths to be used as URL components, and doesn't expose unnecessary server configuration data. """ _, _, relpath = path.rpartition('/download/') return relpath def dir_hier_path(filename): """Convert a filename to an absolute path in the BOINC download tree. In the download tree, files are stored in a subdirectory based on the hash of the filename. This function calls out to BOINC to find the correct path for a given filename. """ abspath = subprocess.check_output( ['bin/dir_hier_path', filename], cwd=app.config['BOINC_PROJECT_DIR']) return abspath.strip().decode('utf8') def filename_to_download_path(filename): """Convert a filename to a path relative to the download directory. Given a filename, returns a path that can be appended to the URL of the download directory to download that file. """ abspath = dir_hier_path(filename) return clean_download_path(abspath) def stage_file(prefix, contents): filename = _filename_for_contents(prefix, contents) abspath = dir_hier_path(filename) if os.path.exists(abspath): with open(abspath, 'rb') as existing: if existing.read() != contents: raise errors.InternalError( 'Attempted to stage {} with differing contents'.format(filename)) else: with open(abspath, 'wb') as new_file: new_file.write(contents) os.chmod(abspath, 0o755) return abspath def get_filename(prefix, hash): return dir_hier_path('{}_{}'.format(prefix, hash)) def _filename_for_contents(prefix, contents): file_hash = hashlib.md5(contents).hexdigest() return '{}_{}'.format(prefix, file_hash) def submit_job(appname, cmdline, seed_file=None, seed_contents=None): if seed_file and seed_contents: raise errors.InternalError( 'Only one of seed_file and seed_contents can be specified') if seed_contents: seed_file = stage_file('input', seed_contents) elif not seed_file: raise errors.InternalError('No seed specified') # TODO: should the cmdline files have guaranteed unique filenames? cmd_contents = cmdline.encode('utf8') cmd_file = os.path.basename(stage_file('cmdline', cmd_contents)) create_work_args = ['bin/create_work', '--appname', appname, '--verbose', seed_file, cmd_file] try: result = subprocess.check_output( create_work_args, cwd=app.config['BOINC_PROJECT_DIR'], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: raise errors.BoincError('create_work returned error: {}'.format(e.output)) for line in result.splitlines(): match = re.match(rb'created workunit; .*, ID ([0-9]+)', line) if match: return int(match.group(1)) raise errors.BoincError('Could not find ID in create_work output: {}'.format(result)) ================================================ FILE: python/manager/lib/errors.py ================================================ class Error(Exception): """Base class for all Killerbeez exceptions""" class InternalError(Error): """Internal code used incorrectly""" class BoincError(Error): """Error interacting with BOINC""" class InputError(Error): """Error from invalid input""" ================================================ FILE: python/manager/lib/fuzzer.py ================================================ import shlex from lib import errors def _create_state_file(state): # TODO: possibly obsolete tmpdir = os.path.join(self.outdir, 'tmp') os.makedirs(tmpdir, exist_ok=True) with tempfile.NamedTemporaryFile(prefix=tmpdir, delete=False) as f: f.write(contents.encode('utf-8')) return f.name def bat_escape(args): """Quote a set of arguments for a windows command line. Double-quote each argument, and backslash-escape any backslashes before double quotes and the double quotes themselves. Finally, put a ^ before each shell metacharacter so it will survive cmd.exe. Based on the algorithm in https://blogs.msdn.microsoft.com/twistylittlepassagesallalike/2011/04/23/everyone-quotes-command-line-arguments-the-wrong-way/ """ escaped_args = [] for arg in args: escaped_parts = ['"'] num_backslashes = 0 for c in arg: if c == '\\': num_backslashes += 1 elif c == '"': escaped_parts.append('\\'*(2*num_backslashes+1) + c) num_backslashes = 0 else: escaped_parts.append('\\'*num_backslashes + c) num_backslashes = 0 escaped_parts.append('\\'*(2*num_backslashes) + '"') escaped_args.append(''.join(escaped_parts)) final_cmdline = ' '.join(escaped_args) # That is what we want to be passed to CreateProcess; however, cmd is going # to mangle it first so we must escape all chars it considers special. metachars = ['(', ')', '%', '!', '^', '"', '<', '>', '&', '|'] for char in metachars: final_cmdline = final_cmdline.replace(char, '^'+char) return '%1 {}'.format(final_cmdline) def sh_escape(args): escaped_args = [shlex.quote(arg) for arg in args] # The >&2 redirects stdout to stderr, which will make it show up in the # BOINC UI return '$1 >&2 {}'.format(' '.join(escaped_args)) def format_cmdline( driver, instrumentation, mutator, iterations, shell_format, driver_options=None, instrumentation_options=None, mutator_options=None, instrumentation_state=None, mutator_state=None): # BOINC takes care of renaming the seed file for us args = [driver, instrumentation, mutator, '-sf', 'seed', '-n', str(iterations)] if instrumentation_options: args.extend(["-i", instrumentation_options]) if mutator_options: args.extend(["-m", mutator_options]) if driver_options: args.extend(["-d", driver_options]) # TODO - we can't create files client-side so we have to have a way to # bundle these # if instrumentation_state: # filename = self.create_state_file(instrumentation_state) # args.extend(["-isf", filename]) # if mutator_state): # filename = self.create_state_file(mutator_state) # args.extend(["-msf", filename]) # In order to make this command line work on the target platform, it needs # to be escaped. The shell_format value comes from a config option # platform_opts_shell_format, however it could also be a column of the # target, or we could just choose a format based on the target's platform. # When we are working on automated target adding, we should revisit this to # see what works best. if shell_format == 'sh': return sh_escape(args) elif shell_format == 'bat': return bat_escape(args) else: if shell_format: raise errors.InputError('Unknown shell_format "{}"'.format(shell_format)) else: raise errors.InputError( 'This target has no shell_format configured. Set the ' 'platform_opts_shell_format config option.') ================================================ FILE: python/manager/model/Config.py ================================================ from app import app from model.FuzzingTarget import targets from model.FuzzingJob import fuzz_jobs db = app.config['db'] class FuzzingConfig(db.Model): config_id = db.Column(db.Integer, primary_key=True, nullable=False) target_id = db.Column(db.Integer, db.ForeignKey('targets.target_id')) job_id = db.Column(db.Integer, db.ForeignKey('fuzz_jobs.job_id')) name = db.Column(db.String(), nullable=False,) value = db.Column(db.String(), nullable=False) target = db.relationship('targets', back_populates='configs') job = db.relationship('fuzz_jobs', back_populates='configs') def __init__(self, config_name, config_value, config_id=None, target=0, job=0): # TODO: sanity check that target and job aren't both 0? # TODO: The check is already done in the post, but should san check it here anyway. self.config_id = config_id self.target_id = target self.job_id = job self.name = config_name self.value = config_value def as_dict(self): out = {c.name: str(getattr(self, c.name)) for c in self.__table__.columns} return out ================================================ FILE: python/manager/model/FuzzingJob.py ================================================ from app import app from datetime import * from sqlalchemy.orm.collections import attribute_mapped_collection db = app.config['db'] class fuzz_jobs(db.Model): job_id = db.Column(db.Integer(), primary_key=True, nullable=False) boinc_id = db.Column(db.Integer()) job_type = db.Column(db.String()) status = db.Column(db.String()) # unassigned, assigned, complete mutator_state = db.Column(db.String()) # json of the current state mutator = db.Column(db.String()) instrumentation_type = db.Column(db.String()) assign_time = db.Column(db.DateTime()) end_time = db.Column(db.DateTime()) driver = db.Column(db.String()) target_id = db.Column(db.Integer(), db.ForeignKey('targets.target_id')) seed_file = db.Column(db.String()) iterations = db.Column(db.Integer()) target = db.relationship('targets') inputs = db.relationship('job_inputs', back_populates='job') configs = db.relationship('FuzzingConfig', back_populates='job', collection_class=attribute_mapped_collection('name')) def __init__(self, type, target_id, mutator=None, mutator_state=None, instrumentation_type=None, status='unassigned', job_id=None, assign_time=None, driver=None, seed_file=None, iterations=None): self.job_id = job_id self.job_type = type self.target_id = target_id self.mutator = mutator self.mutator_state = mutator_state self.status = status self.instrumentation_type = instrumentation_type self.assign_time = assign_time self.end_time = None self.driver = driver self.seed_file = seed_file self.iterations = iterations def lookup_config(self, config_type, config_name): """ Gets options configured for a specific instrumentation, mutator, or driver. Looks for job-specific configs first, then falls back to target configs. :param job: fuzz_jobs, the job the configs should apply to. :param config_type: str, the type of object to get configs for, such as 'instrumentation', 'mutator', or 'driver'. :param config_name: str, the name of the mutator, instrumentation, or driver to get configs for. :return: str if any configuration is stored for the given instrumentation/mutator/driver type, otherwise None. """ config_fullname = "{}_opts_{}".format(config_type, config_name) # First, check job-specific config config = self.configs.get(config_fullname) if config is None: # If nothing, fall back to target-specific config config = self.target.configs.get(config_fullname) if config is None: return None # if we got a result from either of the queries, get the string value return config.value def as_dict(self): return {c.name: getattr(self, c.name) for c in self.__table__.columns} ================================================ FILE: python/manager/model/FuzzingResults.py ================================================ from app import app from datetime import * db = app.config['db'] ''' For simplicity I am just using one simple model for each crash and not using the previous db schema. We may want to move the crash system to a bugzilla friendly format. Based on the decision on the design of crash analysis (clientside or serverside) , improvement could be made. ''' class results(db.Model): result_id = db.Column(db.Integer, primary_key=True) job_id = db.Column(db.Integer, db.ForeignKey('fuzz_jobs.job_id'), nullable=False) repro_file = db.Column(db.String, nullable=False) result_type = db.Column(db.String) # 'hang' or 'crash' def __init__(self, job_id, repro, type='crash'): self.job_id = job_id self.repro_file = repro self.result_type = type def as_dict(self): return {c.name: str(getattr(self, c.name)) for c in self.__table__.columns} ================================================ FILE: python/manager/model/FuzzingTarget.py ================================================ from app import app from sqlalchemy.orm.collections import attribute_mapped_collection db = app.config['db'] class targets(db.Model): target_id = db.Column(db.Integer, primary_key=True, nullable=False) platform = db.Column(db.String(), nullable=False) target_executable = db.Column(db.String(), nullable=False) configs = db.relationship('FuzzingConfig', back_populates='target', collection_class=attribute_mapped_collection('name')) def __init__(self, platform, exe): self.platform = platform self.target_executable = exe def __str__(self): return '{}_{}'.format(self.target_executable, self.platform) def as_dict(self): return {c.name: str(getattr(self, c.name)) for c in self.__table__.columns} ================================================ FILE: python/manager/model/__init__.py ================================================ ================================================ FILE: python/manager/model/instrumentation_state.py ================================================ from app import app db = app.config['db'] class instrumentation_state(db.Model): instrumentation_type = db.Column(db.String(), primary_key=True, nullable=False) state = db.Column(db.String()) target_id = db.Column(db.Integer, db.ForeignKey('targets.target_id'), nullable=False, primary_key=True) target = db.relationship('targets') def __init__(self, instrumentation_type, state, target_id): self.target_id = target_id self.state = state self.instrumentation_type = instrumentation_type def as_dict(self): return {c.name: str(getattr(self, c.name)) for c in self.__table__.columns} ================================================ FILE: python/manager/model/job_inputs.py ================================================ from app import app db = app.config['db'] class job_inputs(db.Model): job_id = db.Column(db.Integer, db.ForeignKey('fuzz_jobs.job_id'), nullable=False, primary_key=True) job = db.relationship('fuzz_jobs', back_populates='inputs') input_file = db.Column(db.String, nullable=False, primary_key=True) def as_dict(self): return {c.name: str(getattr(self, c.name)) for c in self.__table__.columns} ================================================ FILE: python/manager/model/tracer_info.py ================================================ from app import app db = app.config['db'] class tracer_info(db.Model): target_id = db.Column(db.Integer, db.ForeignKey('targets.target_id'), nullable=False, primary_key=True) target = db.relationship('targets') input_file = db.Column(db.String, nullable=False, primary_key=True) from_edge = db.Column(db.Numeric(asdecimal=True), nullable=False, primary_key=True) to_edge = db.Column(db.Numeric(asdecimal=True), nullable=False, primary_key=True) def __init__(self, target_id, input_file, from_edge, to_edge): self.target_id = target_id self.input_file = input_file self.from_edge = from_edge self.to_edge = to_edge def as_dict(self): return {c.name: str(getattr(self, c.name)) for c in self.__table__.columns} ================================================ FILE: python/manager/requirements.txt ================================================ flask flask-restful flask-sqlalchemy requests ================================================ FILE: python/manager/server.py ================================================ from app import app import argparse import logging import os.path import sys if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( '-project_dir', help="BOINC project directory (default: /home/boincadm/projects/killerbeez)", default='/home/boincadm/projects/killerbeez') parser.add_argument('-seed', help="For debugging. Which test to load seed data for. -listseeds for list.") parser.add_argument('-clear', action="store_true", help="For debugging. Clear all data from the database") parser.add_argument('-create', action="store_true", help="For debugging. Force creation of databases") parser.add_argument('-listseeds', action="store_true", help="List debug seeds that you can choose the exit") args = parser.parse_args() if os.path.isdir(args.project_dir): app.config['BOINC_PROJECT_DIR'] = os.path.abspath(args.project_dir) else: print("project_dir does not exist or is not a directory") sys.exit(1) db = app.config['db'] logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(name)s %(message)s') logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) if args.listseeds: import tests.seeds as seeds seeds.listseeds() sys.exit(0) if args.clear: db.drop_all() # TODO: Determine if DB already exists and check its schema version if args.create: db.create_all() db.session.commit() if args.seed is not None: import tests.seeds as seeds # Seed data into the database for testing. if not seeds.seed(db, args.seed): print("DEBUG ERROR: Attempting to seed db for invalid test: {}".format(args.seed)) app.run(host='0.0.0.0') ================================================ FILE: python/manager/tests/job_query_test.py ================================================ from flask_sqlalchemy import sqlalchemy import os, sys, random # Find the directory with our source in it and add it to the lookup path if "app" in os.listdir(): sys.path.insert(0, os.path.abspath('.')) else: sys.path.insert(0, os.path.abspath('..')) from app import app from model.FuzzingTarget import targets from model.FuzzingJob import fuzz_jobs from model.job_inputs import job_inputs from model.FuzzingInputs import inputs #from model.tracer_info import tracer_info #from controller.Minimize import minimize db = app.config['db'] if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description="Test things") parser.add_argument("-setup", action="store_true", help="Setup the database") parser.add_argument("-clear", action="store_true", help="Clear the database") parser.add_argument("-target_id", type=int, default=1, help="target id to use") args = parser.parse_args() if args.clear: db.drop_all() db.create_all() db.session.commit() #if args.setup: if True: # Add some fake data to test against #base = (args.target_id << 8) # Add a few targets db.session.add(targets(None, "x86", "Windows 10", "test2.exe")) db.session.add(targets(None, "x86", "Windows 8", "test1.exe")) db.session.add(targets(None, "x86_64", "Windows 10", "test2.exe")) db.session.add(fuzz_jobs("fuzz", 1, status='assigned')) db.session.add(fuzz_jobs("fuzz", 2)) db.session.add(fuzz_jobs("fuzz", 2)) db.session.add(fuzz_jobs("fuzz", 1)) db.session.add(fuzz_jobs("fuzz", 1)) db.session.add(fuzz_jobs("fuzz", 3)) db.session.add(inputs("AAAAAAAA")) db.session.add(inputs("BBBBBBBB")) db.session.add(job_inputs(4, 1)) db.session.add(job_inputs(4, 2)) #db.session.add(job_inputs(5, 1)) db.session.add(job_inputs(5, 2)) db.session.add(job_inputs(1, 1)) #db.session.add(job_inputs(1, 2)) db.session.commit() # Can we get a result back out? tars = db.session.query(targets).all() print("all targets:{}".format(len(tars))) tars = db.session.query(targets).filter_by(architecture="x86").all() print("x86 targets:{}".format(len(tars))) tars = db.session.query(targets).filter_by(target_executable="test2.exe").all() print("test2 tars :{}".format(len(tars))) # Get all jobs associated with x86 architecture and windows 10 that are unassigned query = db.session.query(fuzz_jobs) \ .filter_by(status='unassigned') \ .join(targets, targets.target_id == fuzz_jobs.target_id) \ .filter_by(architecture="x86", os="Windows 10") jobs = query.all() print(len(jobs)) job = random.choice(jobs) print(job.job_id) #inputs = db.session.query(job_inputs.input_id).filter_by(job_id=job.job_id).all() #inputs = db.session.query(job_inputs.input_id, inputs) \\ inputs = db.session.query(job_inputs.input_id, inputs) \ .filter_by(job_id=job.job_id) \ .join(inputs, job_inputs.input_id == inputs.input_id).all() if len(inputs): inputs = [input[1].as_dict() for input in inputs] else: print("No inputs, we can't do anything") print(inputs) sys.exit() ================================================ FILE: python/manager/tests/minimizer_test.py ================================================ from flask_sqlalchemy import sqlalchemy import os, sys # Find the directory with our source in it and add it to the lookup path if "app" in os.listdir(): sys.path.insert(0, os.path.abspath('.')) else: sys.path.insert(0, os.path.abspath('..')) from app import app from model.FuzzingTarget import targets from model.FuzzingJob import fuzz_jobs from model.FuzzingInputs import inputs, inputs_hash from model.tracer_info import tracer_info from controller.Minimize import minimize db = app.config['db'] if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description="Test things") parser.add_argument("-setup", action="store_true", help="Setup the database") parser.add_argument("-clear", action="store_true", help="Clear the database") parser.add_argument("-target_id", type=int, default=1, help="target id to use") parser.add_argument("-num_files_per_edge", type=int, default=1, help="The number of files per edge to include in the working set") args = parser.parse_args() if args.clear: db.drop_all() db.create_all() db.session.commit() if args.setup: # Add some fake data to test against base = (args.target_id << 8) tests_info = [ {"job_id": base + 1, "input_id" : base + 1, "data": "AAAA", "edges" : [(1, 2), (3, 4)]}, {"job_id": base + 2, "input_id" : base + 2, "data": "ABCD", "edges" : [(1, 2), (5, 6)]}, {"job_id": base + 3, "input_id" : base + 3, "data": "XXXXXXX", "edges" : [(1, 2), (3, 4), (5,6)]}, {"job_id": base + 4, "input_id" : base + 4, "data": "XXXXXX", "edges" : [(1, 2), (3, 4), (5,6)]}, {"job_id": base + 5, "input_id" : base + 5, "data": "XXXXX", "edges" : [(1, 2), (3, 4), (5,6)]}, {"job_id": base + 6, "input_id" : base + 6, "data": "ZZZZZZZZZ", "edges" : [(7,8)]}, ] db.session.add(targets(args.target_id, "x86", "Windows10", "test2.exe")) for test_info in tests_info: db.session.add(fuzz_jobs("user", args.target_id, None, None, None, None, "finished", test_info["job_id"])) hash = inputs_hash(test_info["data"]) input = inputs.query.filter_by(hash = hash).all() if len(input) == 0: db.session.add(inputs(test_info["data"], test_info["job_id"], input_id = test_info["input_id"])) else: test_info["input_id"] = input[0].input_id for edge in test_info["edges"]: db.session.add(tracer_info(args.target_id, test_info["input_id"], edge[0], edge[1])) db.session.commit() files = minimize(args.target_id, args.num_files_per_edge) print(files) ================================================ FILE: python/manager/tests/seeds.py ================================================ import base64 import json from model.FuzzingTarget import targets from model.FuzzingJob import fuzz_jobs from model.FuzzingInputs import inputs from model.job_inputs import job_inputs from model.Config import FuzzingConfig from model.instrumentation_state import instrumentation_state def listseeds(): """ Prints the list of seed funcctions that we accept. Make sure to keep this up to date. Having this kept up to date is cleaner than trying to parse the AST or getattr and isfunction. :return: Nothing; just prints """ print("The following seed options exist:") print("\tclient_request: For testing that a client can request a job successfully. Creates x86 and x86_64") print("\t\t jobs that the client get endpoint should be able to find.") def seed(db, forwhich): if forwhich == "client_request": client_request(db) return True return False def client_request(db): #db.session.add(targets(None, "x86", "Windows 10", "test2.exe")) #db.session.add(targets(None, "x86", "Windows 8", "test1.exe")) #db.session.add(targets(None, "x86_64", "Windows 10", "test2.exe")) db.session.add(targets("x86", "CYGWIN_NT-10.0 2.10.0(0.325/5/3)", "test2.exe")) db.session.add(targets("x86", "CYGWIN_NT-10.0 2.10.0(0.325/5/3)", "test1.exe")) db.session.add(targets("x86_64", "Windows 10", "test2.exe")) db.session.add(fuzz_jobs("fuzz", 1, status='assigned')) db.session.add(fuzz_jobs("fuzz", 3, mutator='nop', instrumentation_type='dynamorio', driver='wmp')) db.session.add(fuzz_jobs("fuzz", 3, mutator='radamsa', instrumentation_type='dynamorio', driver='wmp')) db.session.add(fuzz_jobs("fuzz", 2)) db.session.add(fuzz_jobs("fuzz", 1)) db.session.add(fuzz_jobs("fuzz", 3, instrumentation_type="testinstrumentor")) db.session.add(inputs("AAAAAAAA")) db.session.add(inputs("BBBBBBBB")) db.session.add(job_inputs(4, 1)) db.session.add(job_inputs(4, 2)) # db.session.add(job_inputs(5, 1)) db.session.add(job_inputs(5, 2)) db.session.add(job_inputs(2, 2)) db.session.add(job_inputs(3, 2)) db.session.add(job_inputs(1, 1)) db.session.add(job_inputs(6, 1)) # db.session.add(job_inputs(1, 2)) db.session.add(FuzzingConfig("instrumentation_opts_testinstrumentor", "testfallbacktarget", target=3)) db.session.add(FuzzingConfig("driver_opts_stdin", "stdinopts", target=3)) db.session.add(FuzzingConfig("mutator_opts_radamsa", json.dumps({'seed': 5}), target=3)) db.session.add(FuzzingConfig("mutator_opts_radamsa", "{radamsoo}", target=2)) #db.session.add(FuzzingConfig("instrumentation_opts_testinstrumentor", "testfallbackjob", job=6)) db.session.add(FuzzingConfig('instrumentation_opts_dynamorio', json.dumps({ "per_module_coverage": 1, "timeout": 10000, "coverage_modules": ["wmp.DLL"], "client_params": "-target_module wmplayer.exe -target_offset 0x1F20 -nargs 3", "fuzz_iterations":1, "target_path": "C:\\Program Files (x86)\\Windows Media Player\\wmplayer.exe" }), target=3)) db.session.commit() ================================================ FILE: server/add_target.py ================================================ #!/usr/bin/env python2 # # This script will add a target to the BOINC system, which would normally # require a bunch of manual work in terms of editing files, and creating # a whole directory structure. The gist of it is this: # - Edit project.xml if the platform is new # - Edit config.xml to add daemons such as validators and assimilators # - Create the following directory structure: # apps/$TARGET_NAME/$VERSION/$PLATFORM # (where $VERSION is currently hard-coded to "1") # - The contents of skel/$PLATFORM are then copied to the new app directory # with the filenames getting the $TARGET_NAME injected into them and # version.xml has all instances of {app} replaced with $TARGET_NAME # - Creates templates/$TARGET_NAME_$PLATFORM_{in,out} based on files in # skel/templates. See: https://boinc.berkeley.edu/trac/wiki/JobTemplates # - Calls xadd, stop, and start to ensure BOINC knows about all these changes # import argparse import fcntl import os import os.path import shutil import socket import subprocess import sys import boinc_path_config from Boinc import configxml, projectxml def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('app') parser.add_argument('platforms', nargs=argparse.REMAINDER) return parser.parse_args() def add_app(project_file, name, platform): app_name = '{}_{}'.format(name, platform) for node in project_file.elements: if node._name == 'app' and node.name == app_name: print('App {} already in project.xml, not adding app'.format(app_name)) return a = project_file.elements.make_node_and_append('app') a.name = app_name a.user_friendly_name = '{} running on {}'.format(name, platform) def create_app_dir(name, platform): app_name = '{}_{}'.format(name, platform) app_dir = os.path.join('apps', app_name) if os.path.exists(app_dir): print('App directory {} already exists, not adding app versions'.format(app_dir)) return app_version_dir = os.path.join(app_dir, '1', platform) os.makedirs(app_version_dir) skel_dir = os.path.join('skel', platform) for filename in os.listdir(skel_dir): if filename != 'version.xml': name, dot, ext = filename.partition('.') new_filename = ''.join((name, '.', app_name, dot, ext)) shutil.copy(os.path.join(skel_dir, filename), os.path.join(app_version_dir, new_filename)) else: with open(os.path.join(skel_dir, filename)) as template: version_xml = template.read().format(app=app_name) with open(os.path.join(app_version_dir, filename), 'w') as version_file: version_file.write(version_xml) def create_app_templates(name, platform): in_template_path = os.path.join('templates', '{}_{}_in'.format(name, platform)) if os.path.exists(in_template_path): print('Input template {} already exists, not adding templates'.format(in_template_path)) return out_template_path = os.path.join('templates', '{}_{}_out'.format(name, platform)) if os.path.exists(out_template_path): print('Output template {} already exists, not adding templates'.format(out_template_path)) return shutil.copyfile(os.path.join('skel', 'templates', '{}_in'.format(platform)), in_template_path) shutil.copyfile(os.path.join('skel', 'templates', '{}_out'.format(platform)), out_template_path) def add_daemons(config_file, name, platform): app_name = '{}_{}'.format(name, platform) cmd = 'killerbeez_assimilator.py -app {}'.format(app_name) for node in config_file.daemons: if node.cmd == cmd: print('Assimilator daemon for app {} already exists, not adding it'.format(app_name)) return daemon = config_file.daemons.make_node_and_append('daemon') daemon.cmd = cmd daemon.pid_file = 'killerbeez_assimilator_{}.pid'.format(app_name) daemon.lock_file = 'killerbeez_assimilator_{}.lock'.format(app_name) daemon.output = 'killerbeez_assimilator_{}.log'.format(app_name) daemon = config_file.daemons.make_node_and_append('daemon') daemon.cmd = 'sample_trivial_validator --app {}'.format(app_name) daemon.pid_file = 'sample_trivial_validator_{}.pid'.format(app_name) daemon.lock_file = 'sample_trivial_validator_{}.lock'.format(app_name) daemon.output = 'sample_trivial_validator_{}.log'.format(app_name) def lock_file(filename): os.umask(02) file = open(filename,'w') fcntl.lockf(file.fileno(), fcntl.LOCK_EX|fcntl.LOCK_NB) def main(): args = parse_args() if not os.path.isfile('project.xml'): print('Must be run from the project directory') sys.exit(1) hostname = socket.gethostname().split('.')[0] lockfile_name = os.path.join('pid_{}'.format(hostname), 'add_target.lock') try: lock_file(lockfile_name) except IOError: print('Another {} process is running, please try again'.format(sys.argv[0])) project_file = projectxml.ProjectFile('project.xml').read() config_file = configxml.ConfigFile('config.xml').read() name = args.app platforms = args.platforms for platform in platforms: # Add name_platform to apps in project.xml add_app(project_file, name, platform) # Create app directory with wrapper, version.xml create_app_dir(name, platform) create_app_templates(name, platform) add_daemons(config_file, name, platform) project_file.write() config_file.write() os.unlink(lockfile_name) # Update db from project file subprocess.check_call(['bin/xadd']) # Restart project subprocess.check_call(['bin/stop']) subprocess.check_call(['bin/start']) print('New app versions installed into apps/{}_*. Make any changes you ' 'need, then run bin/update_versions to install them.'.format(name)) if __name__ == '__main__': main() ================================================ FILE: server/boinc_submit.py ================================================ #!/usr/bin/env python import base64 import collections import requests # Set the following to configure your job PROJECT = 'http://localhost:5000/api' SEED = b"1234seed" def main(): # Make sure we have a DB entry for the target target_resp = requests.post( '%s/target' % PROJECT, json={"platform": "windows_x86_64", "target_executable": "wmp"}) target_resp.raise_for_status() target_id = target_resp.json()['id'] # Set up shell format for this target - change value to "sh" for linux requests.post( '%s/config' % PROJECT, json={"target_id": target_id, "name": "platform_opts_shell_format", "value": "bat"}).raise_for_status() # Create the driver and instrumentation configs requests.post( '%s/config' % PROJECT, json={"target_id": target_id, "name": "driver_opts_wmp", "value": r'{"path": "C:\\Program Files\\Windows Media Player\\wmplayer.exe"}'}).raise_for_status() requests.post( '%s/config' % PROJECT, json={"target_id": target_id, "name": "instrumentation_opts_dynamorio", "value": r'{"per_module_coverage": 1, "timeout": 10000, "coverage_modules": ["wmp.DLL"], "client_params": "-target_module wmplayer.exe -target_offset 0x1F20 -nargs 3", "fuzz_iterations": 1, "target_path": "C:\\Program Files\\Windows Media Player\\wmplayer.exe"}'}).raise_for_status() # Create the seed file seed_resp = requests.post( '%s/file' % PROJECT, json={"content": base64.b64encode(SEED).decode(), "encoding": "base64"}) seed_resp.raise_for_status() seed_file = seed_resp.json()['filename'] # Create the job! job_resp = requests.post( '%s/job' % PROJECT, json={"job_type": "fuzz", "target_id": target_id, "mutator": "radamsa", "instrumentation_type": "dynamorio", "driver": "wmp", "seed_file": seed_file, "iterations": 2}) job_resp.raise_for_status() job_json = job_resp.json() print('Created job %s with BOINC id %s' % (job_json['job_id'], job_json['boinc_id'])) if __name__ == '__main__': main() ================================================ FILE: server/killerbeez_assimilator.py ================================================ #!/usr/bin/env python import logging import os.path import re import shutil import subprocess import tempfile import zipfile import requests import assimilator import xml.etree.ElementTree as ET logger = logging.getLogger(__name__) API_SERVER = 'http://localhost:5000/api' def clean_download_path(path): """Turns an absolute path in the BOINC download dir into a relative path. This allows paths to be used as URL components, and doesn't expose unnecessary server configuration data. """ _, _, relpath = path.rpartition('/download/') return relpath def filename_to_download_path(path): abspath = subprocess.check_output( ['bin/dir_hier_path', path], cwd='..').strip() return clean_download_path(abspath) def dirname_to_result_type(dirname): result_types = {'crashes': 'crash', 'hangs': 'hang', 'new_paths': 'new_path'} return result_types[dirname] class KillerbeezAssimilator(assimilator.Assimilator): def __init__(self): assimilator.Assimilator.__init__(self) def _stage_file(self, filename): logger.debug('Staging %s', filename) process = subprocess.Popen( ['bin/stage_file', '--verbose', filename], cwd='..', stdout=subprocess.PIPE) stdout, stderr = process.communicate() if process.returncode: self.logError('Error staging file: {} | {}\n'.format(stdout, stderr)) return None # Try to parse stdout to find out where the file was staged to new_path = None for line in stdout.splitlines(): if line.startswith(b'staging '): _, _, path = line.partition(b' to ') new_path = path.decode('utf8') elif b'already exists as' in line: _, _, path = line.partition(b' as ') new_path = path.decode('utf8') return clean_download_path(new_path) def _record_job(self, wu): job_id = wu.id # ET doesn't like multiple root elements, so we need to wrap the whole # document in one element xml_doc = ET.fromstring('{}'.format(wu.xml_doc)) file_name_element = xml_doc.find("workunit/file_ref[open_name='seed']/file_name") if file_name_element is None: return # TODO: error handling seed_file = filename_to_download_path(file_name_element.text) requests.put('{}/boinc_job/{}'.format(API_SERVER, job_id), json={'seed_file': seed_file, 'status': 'completed'}) def _record_result(self, file_path, result_type, job_id): # TODO: use client helper module, maybe requests.post('{}/boinc_job/{}/results'.format(API_SERVER, job_id), json={'repro_file': file_path, 'result_type': result_type}) def _process_zipfile(self, job_id, output_file): tempdir = tempfile.mkdtemp() try: with zipfile.ZipFile(output_file, 'r') as results_file: for result_name in results_file.namelist(): match = re.match(r'killerbeez_result_([a-z]+)_([A-Za-z0-9]+)', result_name) if not match: continue result_type = match.group(1) md5 = match.group(2) filename = os.path.join(tempdir, 'input_{}'.format(md5.lower())) with open(filename, 'wb') as dest, results_file.open(result_name) as src: dest.write(src.read()) staged_path = self._stage_file(filename) self._record_result(staged_path, dirname_to_result_type(result_type), job_id) finally: shutil.rmtree(tempdir) def assimilate_handler(self, wu, results, canonical_result): """ This method is called for each workunit (wu) that needs to be processed. A canonical result is not guarenteed and several error conditions may be present on the wu. Call report_errors(wu) when overriding this method. Note that the -noinsert flag (self.noinsert) must be accounted for when overriding this method. """ if self.report_errors(wu) or canonical_result is None: return # TODO: handle error status, maybe self._record_job(wu) zipfile_name = self.get_file_path(canonical_result) self._process_zipfile(wu.id, zipfile_name) if __name__ == '__main__': asm = KillerbeezAssimilator() asm.run() ================================================ FILE: server/skel/templates/windows_x86_64_in ================================================ 0 1 0 seed 1 cmdline.bat 1 1 ================================================ FILE: server/skel/templates/windows_x86_64_out ================================================ 5000000 results.zip ================================================ FILE: server/skel/templates/x86_64-pc-linux-gnu_in ================================================ 0 1 0 seed 1 cmdline.sh 1 1 ================================================ FILE: server/skel/templates/x86_64-pc-linux-gnu_out ================================================ 5000000 results.zip ================================================ FILE: server/skel/windows_x86_64/flatten_results.ps1 ================================================ echo "Results from a killerbeez run. This file ensures an empty zip file is not generated." > README.txt Foreach($type in "crashes", "hangs", "new_paths") { Get-ChildItem output\$type | Foreach-Object { cp $_.FullName $('killerbeez_result_{0}_{1}' -f $type, $_.Name) } } ================================================ FILE: server/skel/windows_x86_64/job.xml ================================================ C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe boinc_resolve(unpack_killerbeez.ps1) -from boinc_resolve(killerbeez-x64.zip) -into $PROJECT_DIR cmdline.bat $PROJECT_DIR\killerbeez-x64\killerbeez\fuzzer.exe C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe boinc_resolve(flatten_results.ps1) results.zip killerbeez_result_.* README.txt ================================================ FILE: server/skel/windows_x86_64/unpack_killerbeez.ps1 ================================================ param ( $from = "killerbeez-x64.zip", $into = $(pwd), $lockfile = "$into\unpack_killerbeez-x64.lock", $hashfile = "$into\killerbeez-x64.sha256", $extracted_name = "$into\killerbeez-x64" ) # Try to prevent multiple copies from running at once. Yes, this has a TOCTOU # race condition, but fixing it makes the script a lot more complicated, and it # should just cause failed jobs that will be retried anyway. while (Test-Path $lockfile) { sleep 10 } echo $pid > $lockfile # Check the hash of our killerbeez zip against the last one that was extracted # to see if we have a new zip $hash = (Get-FileHash $from -Algorithm SHA256).Hash if (Test-Path $hashfile) { $old_hash = $(cat $hashfile).Trim() if ($old_hash -eq $hash) { echo "Killerbeez already unpacked, skipping archive extraction" rm $lockfile exit 0 } } # Hash wasn't saved or didn't match, extract the zip rmdir -r $extracted_name -ErrorAction 'silentlycontinue' Expand-Archive $from -DestinationPath $into echo $hash > $hashfile rm $lockfile ================================================ FILE: server/skel/windows_x86_64/version.xml ================================================ wrapper_26014_windows_x86_64.{app}.exe killerbeez-x64.{app}.zip killerbeez-x64.zip job.{app}.xml job.xml unpack_killerbeez.{app}.ps1 unpack_killerbeez.ps1 flatten_results.{app}.ps1 flatten_results.ps1 ================================================ FILE: server/skel/x86_64-pc-linux-gnu/flatten_results.sh ================================================ #!/bin/bash exec >&2 # Redirect stdout to stderr so that it's captured for BOINC echo "Results from a killerbeez run. This file ensures an empty zip file is not generated." > README.txt for result_type in crashes hangs new_paths; do for file in $(ls output/$result_type); do cp output/$result_type/$file killerbeez_result_${result_type}_${file} done done ================================================ FILE: server/skel/x86_64-pc-linux-gnu/job.xml ================================================ unpack_killerbeez.sh boinc_resolve(killerbeez.zip) $PROJECT_DIR /bin/bash boinc_resolve(cmdline.sh) $PROJECT_DIR/killerbeez-Linux/killerbeez/fuzzer flatten_results.sh results.zip killerbeez_result_.* README.txt ================================================ FILE: server/skel/x86_64-pc-linux-gnu/unpack_killerbeez.sh ================================================ #!/bin/bash exec >&2 # Redirect stdout to stderr so that it's captured for BOINC # Ensure absolute paths FROM=$(readlink -f $1) INTO=$(readlink -f $2) HASHFILE=$INTO/$(basename $FROM).sha256 EXTRACTED_NAME=$INTO/killerbeez-Linux # Ensure only one copy of this script runs at a time [ "${FLOCKER}" != "$0" ] && exec env FLOCKER="$0" flock -en "$0" "$0" "$@" || : if [[ -e "$HASHFILE" ]]; then sha256sum -c "$HASHFILE" if [[ $? -eq 0 ]]; then echo >&2 "Already have a build of this version of Killerbeez, skipping" exit 0 fi fi set -e rm -rf $EXTRACTED_NAME cd $INTO unzip $FROM sha256sum $FROM > $HASHFILE ================================================ FILE: server/skel/x86_64-pc-linux-gnu/version.xml ================================================ wrapper.{app} killerbeez-Linux.{app}.zip killerbeez.zip job.{app}.xml job.xml unpack_killerbeez.{app}.sh unpack_killerbeez.sh flatten_results.{app}.sh flatten_results.sh ================================================ FILE: tests/build.bat ================================================ rem Dependencies: rem git https://git-scm.com/download/win rem wget http://gnuwin32.sourceforge.net/packages/wget.htm rem cmake https://cmake.org/download/ rem unzip ? mkdir C:\killerbeez cd \killerbeez git clone https://github.com/grimm-co/killerbeez.git git clone https://github.com/grimm-co/killerbeez-mutators.git git clone https://github.com/grimm-co/killerbeez-utils.git wget https://github.com/DynamoRIO/dynamorio/releases/download/release_6_2_0/DynamoRIO-Windows-6.2.0-2.zip unzip DynamoRIO-Windows-6.2.0-2.zip mv DynamoRIO-Windows-6.2.0-2 dynamorio cd killerbeez mkdir build cd build cmake -DCMAKE_GENERATOR_PLATFORM=x64 -DCMAKE_BUILD_TYPE=Release .. cmake --build . ================================================ FILE: tests/smoke_test.sh ================================================ #!/bin/bash # # Run this from the directory above the killerbeez checkout # # Dependencies: # sudo apt install build-essential cmake clang libtool-bin automake bison flex libglib2.0-dev libc6-dev-i386 if [[ "$1" == "kill" ]]; then # Clean out the old rm -fR killerbeez fi # Check out the new (if needed) if [[ ! -d killerbeez ]]; then git clone --recursive https://github.com/grimm-co/killerbeez fi function generic_error { # $1 = return code # $2 = command output # $3 = error string if [[ $1 -ne 0 ]]; then echo "$3" echo "Output: $2" exit 1 fi } function test_linux_error { # $1 = return code # $2 = command output # $3 = mutator # $4 = crashing or non-crashing? err="Error running fuzzer with $3 on test-linux ($4)" generic_error "$1" "$2" "$err" } cd killerbeez # Compile things mkdir -p build; cd build cmake .. && make || exit 1 # Try running the fuzzer and make sure we have some basic functionality cd killerbeez # Run the test-linux program with input which should not cause a crash echo "AAAA" > test0 echo "Running expected non-crashing test" echo '{"path":"corpus/test-linux","arguments":"@@"}' > driver.json output=`./fuzzer -n 300 -s test0 -d driver.json file return_code honggfuzz` test_linux_error $? "$output" "honggfuzz" "non-crashing" # There should not have been anything critical in this run (rc should == 1) echo "$output" | grep CRITICAL &> /dev/null x=$? # test_linux_error expects rc = 0 test_linux_error $((x-1)) "$output" "honggfuzz" "non-crashing" # There should be a number of iterations done echo "$output" | grep -i ran.*iterations &> /dev/null test_linux_error $? "$output" "honggfuzz" "non-crashing" # Ensure that it can also find crashes, given a reasonable seed echo "ABC@" > test1 echo "Running expected crashing test" output=`./fuzzer -n 300 -s test1 -d driver.json file return_code honggfuzz` test_linux_error $? "$output" "honggfuzz" "crashing" echo "$output" | grep CRITICAL &> /dev/null test_linux_error $? "$output" "honggfuzz" "non-crashing" echo "$output" | grep -i ran.*iterations &> /dev/null test_linux_error $? "$output" "honggfuzz" "non-crashing" function string_not_present { needle="$1" haystack="$2" # rc = 0 means string was not present echo "$haystack" | grep "$needle" &> /dev/null x=$? # we expect this to be 1 if not found, we want to return 0 return $((x-1)) # if the needle is not found, so we subtract 1 } function string_count { needle="$1" haystack="$2" # prints the number of times the needle was found in the haystack echo "$haystack" | grep "$needle" | wc -l } function no_warnings_no_errors { output="$1" mutator="$2" #echo "output=$output" string_not_present "WARNING" "$output" test_linux_error $? "$output" "$mutator" "WARNING" string_not_present "FATAL" "$output" test_linux_error $? "$output" "$mutator" "FATAL" string_not_present "CRITICAL" "$output" test_linux_error $? "$output" "$mutator" "CRITICAL" string_not_present "Ran 0 iterations" "$output" test_linux_error $? "$output" "$mutator" "Ran 0 iterations" } function find_llvm_config { for name in llvm-config llvm-config-3.8 llvm-config-3.7 llvm-config-3.6 llvm-config-3.5; do which $name > /dev/null if [ "$?" = "0" ]; then echo $name return 0 fi done generic_error 1 "Could not find llvm-config" "Failed to build afl-clang-fast" } ##################################################################################### ## AFL Instrumentation Tests ######################################################## ##################################################################################### echo "Running tests - instrumentation - afl - building" # Build afl-gcc make -C ../../afl_progs/ generic_error $? "make failed" "Failed to build afl-gcc" # Build afl-clang-fast LLVM_CONFIG=$(find_llvm_config) make -C ../../afl_progs/llvm_mode/ LLVM_CONFIG=$LLVM_CONFIG generic_error $? "make failed" "Failed to build afl-clang-fast" # Build afl-qemu-trace pushd ../../afl_progs/qemu_mode/ ./build_qemu_support.sh generic_error $? "make failed" "Failed to build afl-qemu-trace" popd # Build afl test programs afl_testdir="../../corpus/afl_test" make -C $afl_testdir AFL_PATH=../../afl_progs/ generic_error $? "make failed" "Failed to build afl test programs" # Run the test programs with various different AFL based instrumentations echo "Running tests - instrumentation - afl - testing" for test_file in test test32 test-qemu test-fast test-fast-deferred test-fast-persist test-fast-persist-deferred; do # Note: in Debian 9 (stretch), there appears to be a bug in the code coverage # of afl-gcc which only detects 2 paths instead of 3 (paths: AA, BA, AB). Running # the program manually confirms that there are 3 code paths which are hit, it's # just that the instrumentation only picks up 2 of them. The version info on gcc # on that Debian system is: gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516 # The same target and compiler code, running on Fedora 29 and gcc 8.3.1 work fine. expected=3 # Unfortunately the persistence mode tests overly report new paths, so we need to adjust the count for them if [ "$test_file" = "test-fast-persist" -o "$test_file" = "test-fast-persist-deferred" ]; then expected=4 fi # Build the instrumentation options inst_options="{" if [ "$test_file" = "test-fast-deferred" -o "$test_file" = "test-fast-persist-deferred" ]; then inst_options="$inst_options\"deferred_startup\":1" fi if [ "$test_file" = "test-fast-persist" -o "$test_file" = "test-fast-persist-deferred" ]; then if [ "$test_file" = "test-fast-persist-deferred" ]; then inst_options="$inst_options," fi inst_options="$inst_options\"persistence_max_cnt\":5" fi if [ "$test_file" = "test-qemu" ]; then inst_options="$inst_options\"qemu_mode\":1,\"qemu_path\":\"../../afl_progs/afl-qemu-trace\"" fi inst_options="$inst_options}" # Set up our JSON files echo "{\"path\":\"$afl_testdir/$test_file\"}" > driver.json echo $inst_options > instrumentation.json # Run the test and check the number of new paths found echo "Running bit_flip with seed file test0 on $afl_testdir/$test_file" output=$(./fuzzer -n 127 -s test0 -d driver.json -i instrumentation.json stdin afl bit_flip) test_linux_error $? "$output" bit_flip "AFL instrumentation with $test_file new path test" no_warnings_no_errors "$output" bit_flip new_path_count=$(string_count "Found new_paths" "$output") test $new_path_count -eq $expected # TODO/FIXME: This check is broken due to a bug in AFL's instrumentation # It should able to be re-enabled when we pull in the new code # from AFL++, which will take care of tickets #154 and #155 # generic_error $? "AFL new paths test failed" \ # "AFL instrumentation with $test_file failed to detect new paths (found: $new_path_count expected: $expected)" # Run the test again and make sure it finds a crashing input output=$(./fuzzer -n 100 -s test1 -d driver.json -i instrumentation.json stdin afl bit_flip) test_linux_error $? "$output" bit_flip "AFL instrumentation with $test_file crash test" echo "$output" | grep "Found crashes" > /dev/null generic_error $? "AFL crash test failed" "AFL instrumentation with $test_file failed to detect a crash" done ##################################################################################### ## Return Code Instrumentation Tests ################################################ ##################################################################################### # Test the return_code instrumentation with and without the fork server echo "Running tests - instrumentation - return_code" echo '{"path":"corpus/test-linux","arguments":"@@"}' > driver.json output=$(./fuzzer -n 100 -s test0 -d driver.json file return_code nop) test_linux_error $? "$output" nop "return_code forkserver test" no_warnings_no_errors "$output" nop echo '{"path":"corpus/test-linux"}' > driver.json output=$(./fuzzer -n 100 -s test1 -d driver.json stdin return_code bit_flip) test_linux_error $? "$output" bit_flip "return_code forkserver crash test" echo "$output" | grep "Found crashes" > /dev/null generic_error $? "return_code forkserver crash test failed" "return_code instrumentation failed to detect a crash" echo '{"path":"corpus/test-linux","arguments":"@@"}' > driver.json echo '{"use_fork_server":0}' > instrumentation.json output=$(./fuzzer -n 100 -s test0 -d driver.json -i instrumentation.json file return_code nop) test_linux_error $? "$output" nop "return_code no forkserver test" no_warnings_no_errors "$output" nop echo '{"path":"corpus/test-linux"}' > driver.json echo '{"use_fork_server":0}' > instrumentation.json output=$(./fuzzer -n 100 -s test1 -d driver.json -i instrumentation.json stdin return_code bit_flip) test_linux_error $? "$output" bit_flip "return_code no forkserver crash test" echo "$output" | grep "Found crashes" > /dev/null generic_error $? "return_code no forkserver crash test failed" "return_code instrumentation without the forkserver failed to detect a crash" ##################################################################################### ## Mutator Tests #################################################################### ##################################################################################### # Now we do more basic tests using other mutators for mutator in ni bit_flip nop interesting_value havoc arithmetic afl zzuf; do echo "Running tests - mutator - $mutator" echo '{"path":"corpus/test-linux","arguments":"@@"}' > driver.json output=$(./fuzzer -n 30 -s test0 -d driver.json file return_code $mutator) test_linux_error $? "$output" $mutator "$mutator file basic test" no_warnings_no_errors "$output" $mutator echo '{"path":"corpus/test-linux"}' > driver.json output=$(./fuzzer -n 30 -s test0 -d driver.json stdin return_code $mutator) test_linux_error $? "$output" $mutator "$mutator stdin basic test" no_warnings_no_errors "$output" $mutator done # TODO: add tests for multipart, radamsa, dictionary, and splice #echo '{"path":"corpus/test-linux","arguments":"@@"}' > driver.json #output=`./fuzzer -n 30 -s test0 -d driver.json file return_code splice` #test_linux_error $? "$output" splice "basic test" #no_warnings_no_errors "$output" splice # Want to test the no_warnings_no_errors function? Uncomment # the blog below to try a made-up mutator #output=`./fuzzer -n 30 -s test0 -d driver.json file return_code doesnotexist` #test_linux_error $? "$output" thisdoesnotexist "basic test" #no_warnings_no_errors "$output" thisdoesnotexist exit 0 # If we got here, we're good ================================================ FILE: tests/test-fuzzer.sh ================================================ #!/bin/sh # For Windows, this assumes you're using Cygwin, and everything is at C:\killerbeez # For Linux, this assumes LINUX_BASE_PATH ($HOME/killerbeez/ by default) contains: # killerbeez, killerbeez-mutators, killerbeez-utils if [ -z "$KILLERBEEZ_TEST" ] then echo "Please set KILLERBEEZ_TEST in your environment. Recommended: export KILLERBEEZ_TEST='simple'" exit 1 fi WINDOWS_BASE_PATH='C:\killerbeez\killerbeez\' WINDOWS_JSON_ESCAPED_BASE_PATH='C:\\killerbeez\\Killerbeez\\' # JSON uses '\' as an escape. WINDOWS_CYGWIN_BASE_PATH="/cygdrive/c/killerbeez/" WINDOWS_BUILD_PATH=$WINDOWS_CYGWIN_BASE_PATH"build/X64/Debug/killerbeez" LINUX_BASE_PATH="$HOME/killerbeez/" LINUX_BUILD_PATH="$LINUX_BASE_PATH/build/killerbeez/" FUZZER="./fuzzer" FUZZER_WITH_GDB="gdb -q -ex run -ex quit --args ./fuzzer" # Remove -ex quit to stay in gdb after completion. FUZZER_WITH_LLDB='lldb -o run -- ./fuzzer' # https://stackoverflow.com/a/3466183 unameOut="$(uname -s)" case "${unameOut}" in Linux*) machine=Linux;; Darwin*) machine=Mac;; CYGWIN*) machine=Cygwin;; MINGW*) machine=MinGw;; *) machine="UNKNOWN:${unameOut}" esac # test is a 32-bit binary that crashes on ABCD via stdin or in a file passed as argv[1]. # hang is a 32-bit binary that hangs. if [ $machine = "Cygwin" ] then # cygwin permissions are strange, so make sure the executables are executable. chmod +x $WINDOWS_CYGWIN_BASE_PATH/killerbeez/corpus/test/test.exe chmod +x $WINDOWS_CYGWIN_BASE_PATH/killerbeez/corpus/hang/hang.exe if [ $KILLERBEEZ_TEST = "debug" ] then cd $WINDOWS_BUILD_PATH ./fuzzer.exe \ file debug bit_flip \ -n 9 \ -l '{"level":0}' \ -sf $WINDOWS_BASE_PATH'corpus\test\inputs\close.txt' \ -d '{"timeout":20, "path":"'$WINDOWS_JSON_ESCAPED_BASE_PATH'corpus\\test\\test.exe", "arguments":"@@"}' fi if [ $KILLERBEEZ_TEST = "simple" ] then cd $WINDOWS_BUILD_PATH ./fuzzer.exe \ file dynamorio radamsa \ -n 3 \ -sf $WINDOWS_BASE_PATH'\corpus\test\inputs\input.txt' \ \ -d '{"timeout":20, "path":"'$WINDOWS_JSON_ESCAPED_BASE_PATH'corpus\\test\\test.exe", "arguments":"@@"}' \ \ -i '{"per_module_coverage": 1, "coverage_modules":["test.exe"], "timeout": 2000, "client_params": "-target_module test.exe -target_offset 0x1000 -nargs 3", "fuzz_iterations":1, "target_path":"'$WINDOWS_JSON_ESCAPED_BASE_PATH'corpus\\test\\test.exe"}' \ -l '{"level":0}' fi if [ $KILLERBEEZ_TEST = "hang" ] then cd $WINDOWS_BUILD_PATH ./fuzzer \ file debug bit_flip \ -n 1 \ -l '{"level":0}' \ -sf $WINDOWS_BASE_PATH'corpus\test\inputs\input.txt' \ -d '{"timeout":3, "path":"'$WINDOWS_JSON_ESCAPED_BASE_PATH'corpus\\hang\\hang.exe", "arguments":"@@"}' fi # Tests a single packet via the server driver. If you're sending multiple # packets, consider the manager mutator instead. if [ $KILLERBEEZ_TEST = "network_server" ] then cd $WINDOWS_BUILD_PATH ./fuzzer \ network_server debug bit_flip \ -n 10 \ -l '{"level":0}' \ -sf $WINDOWS_BASE_PATH'\corpus\network\close.txt' \ -d '{"timeout":20, "path":"'$WINDOWS_JSON_ESCAPED_BASE_PATH'corpus\\network\\server\\server.exe", "ip":"127.0.0.1", "port":4444}' fi if [ $KILLERBEEZ_TEST = "network_client" ] then cd $WINDOWS_BUILD_PATH ./fuzzer \ network_client debug bit_flip \ -n 10 \ -l '{"level":0}' \ -sf $WINDOWS_BASE_PATH'\corpus\network\close.txt' \ -d '{"timeout":20, "path":"'$WINDOWS_JSON_ESCAPED_BASE_PATH'corpus\\network\\client\\client.exe", "ip":"127.0.0.1", "port":4444}' fi fi if [ $machine = "Linux" ] || [ $machine = "Mac" ] then if [ $machine = "Linux" ] then FUZZER=$FUZZER_WITH_GDB fi # LLDB interprets commas as some kind of syntax, so they need to be # escaped. You'll need to do so manually (in the -d option's json string, # usually) if you'd like to use this script w/ LLDB. # FUZZER=$FUZZER_WITH_LLDB # uncomment me to use if [ $KILLERBEEZ_TEST = "simple" ] then cd $LINUX_BUILD_PATH $FUZZER \ file return_code bit_flip \ -n 9 \ -sf $HOME'/killerbeez/killerbeez/corpus/test/inputs/close.txt' \ -d '{"timeout":20, "path":"'$LINUX_BUILD_PATH'/corpus/test-linux", "arguments":"@@"}' \ -l '{"level":0}' \ -m '{"num_bits":1}' fi if [ $KILLERBEEZ_TEST = "hang" ] then cd $LINUX_BUILD_PATH $FUZZER \ file return_code bit_flip \ -n 3 \ -l '{"level":0}' \ -sf $HOME'/killerbeez/killerbeez/corpus/test/inputs/input.txt' \ -d '{"timeout":2, "path":"'$LINUX_BUILD_PATH'corpus/hang-linux", "arguments":"@@"}' fi if [ $KILLERBEEZ_TEST = "radamsa" ] then cd $LINUX_BUILD_PATH $FUZZER \ file return_code radamsa \ -n 3 \ -l '{"level":0}' \ -sf $HOME'/killerbeez/killerbeez/corpus/test/inputs/input.txt' \ -d '{"timeout":20, "path":"'$LINUX_BUILD_PATH'corpus/test-linux", "arguments":"@@"}' fi if [ $KILLERBEEZ_TEST = "stdin" ] then cd $LINUX_BUILD_PATH $FUZZER \ stdin return_code bit_flip \ -n 9 \ -l '{"level":0}' \ -sf $LINUX_BASE_PATH'/killerbeez/corpus/test/inputs/close.txt' \ -d '{"timeout":20, "path":"'$LINUX_BUILD_PATH'corpus/test-linux"}' fi # Tests a single packet via the server driver. If you're sending # multiple packets, consider the manager mutator instead. if [ $KILLERBEEZ_TEST = "network_server" ] then cd $LINUX_BUILD_PATH $FUZZER \ network_server return_code bit_flip \ -n 10 \ -l '{"level":0}' \ -sf $LINUX_BASE_PATH'/killerbeez/corpus/network/close.txt' \ -d '{"timeout":20,"path":"'$LINUX_BUILD_PATH'/corpus/server-linux","ip":"127.0.0.1","port":4444}' fi if [ $KILLERBEEZ_TEST = "network_client" ] then cd $LINUX_BUILD_PATH $FUZZER \ network_client return_code bit_flip \ -n 10 \ -l '{"level":0}' \ -sf $LINUX_BASE_PATH'/killerbeez/corpus/network/close.txt' \ -d '{"timeout":20,"path":"'$LINUX_BUILD_PATH'corpus/client-linux","ip":"127.0.0.1","port":4444}' fi if [ $KILLERBEEZ_TEST = "multipart" ] then cd $LINUX_BUILD_PATH $FUZZER_WITH_GDB \ network_server return_code manager \ -n 10 \ -l '{"level":0}' \ -m '{"mutators":["bit_flip","bit_flip"]}' \ -sf $LINUX_BASE_PATH'/killerbeez/corpus/network/multipart.txt' \ -d '{"timeout":20,"path":"'$LINUX_BUILD_PATH'/corpus/server-linux","ip":"127.0.0.1","port":4444}' fi fi # successful output should look like: # Mon Jun 11 19:59:28 2018 - INFO - Logging Started # Mon Jun 11 19:59:28 2018 - DEBUG - Fuzzing the 0 iteration # Mon Jun 11 19:59:28 2018 - DEBUG - Setting up shm region: afl_shm_ce57db765140e79b_0 # Mon Jun 11 19:59:29 2018 - DEBUG - Dynamorio Instrumentation got hash 4aad8251 temp 4aad8251 (last hash 00000000) # Mon Jun 11 19:59:29 2018 - DEBUG - has_new_bits = 2 # Mon Jun 11 19:59:29 2018 - DEBUG - Module test.exe has new bits (hash 4aad8251, last hash 00000000) # Mon Jun 11 19:59:29 2018 - CRITICAL - Found new_paths # Mon Jun 11 19:59:29 2018 - DEBUG - Fuzzing the 1 iteration # Mon Jun 11 19:59:30 2018 - DEBUG - Dynamorio Instrumentation got hash 5052d8f9 temp 5052d8f9 (last hash 4aad8251) # Mon Jun 11 19:59:30 2018 - DEBUG - has_new_bits = 2 # Mon Jun 11 19:59:30 2018 - DEBUG - Module test.exe has new bits (hash 5052d8f9, last hash 4aad8251) # Mon Jun 11 19:59:30 2018 - CRITICAL - Found new_paths # Mon Jun 11 19:59:30 2018 - DEBUG - Fuzzing the 2 iteration # Mon Jun 11 19:59:30 2018 - DEBUG - Dynamorio Instrumentation got hash 4aad8251 temp 4aad8251 (last hash 5052d8f9) # Mon Jun 11 19:59:30 2018 - DEBUG - has_new_bits = 0 # Mon Jun 11 19:59:30 2018 - INFO - Ran 3 iterations in 2 seconds ================================================ FILE: tools/README.md ================================================ # Build/CI Tools ## Windows * **setup_build_env.ps1** - script that installs build dependencies on a Windows machine (to set it up as a CI runner) * **release_vs2017.bat** - script run by CI to build a binary release of Killerbeez for windows on Visual Studio 2017 * **release_vs2019.bat** - script run by CI to build a binary release of Killerbeez for windows on Visual Studio 2019 * **release_excludes.txt** - file used by `release_*.bat` during packaging step ================================================ FILE: tools/release_excludes.txt ================================================ .exp\ .iobj\ .ipdb\ .lib\ .pdb\ .manifest\ ================================================ FILE: tools/release_vs2017.bat ================================================ REM Meant to be run by CI from the root directory of one of the repos REM Usage: cd killerbeez; tools\release if "%RADAMSA_URL%" == "" ( set RADAMSA_URL=https://gitlab.com/akihe/radamsa.git ) if "%DYNAMORIO_URL%" == "" ( set DYNAMORIO_URL=https://storage.googleapis.com/chromium-dynamorio/builds/DynamoRIO-Windows-6.2.17295-0xa77808f.zip ) if "%CI_PROJECT_DIR%" == "" ( set CI_PROJECT_DIR=%cd% ) rmdir /s /q build if not exist radamsa ( git clone %RADAMSA_URL% || exit /b 1 ) else ( cd radamsa git checkout master || exit /b 1 git pull || exit /b 1 cd .. ) if not exist dynamorio ( powershell.exe -nologo -noprofile -command "& { Add-Type -A 'System.IO.Compression.FileSystem'; $wc = New-Object System.Net.WebClient; $wc.DownloadFile('%DYNAMORIO_URL%', '.\dynamorio.zip'); [IO.Compression.Zipfile]::ExtractToDirectory('.\dynamorio.zip', '.\dynamorio-unzip'); }" move dynamorio-unzip\DynamoRIO* dynamorio rmdir dynamorio-unzip del dynamorio.zip ) REM On some systems, vcvarsall.bat will change your working directory REM To work around this infuriating bug, pushd and popd are used pushd . call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x86 popd call :compile || exit /b 1 call :buildradamsa C:\cygwin\bin call :package X86 pushd . call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 popd call :compile || exit /b 1 call :buildradamsa C:\cygwin64\bin call :package x64 popd exit /b 0 :compile rmdir /s /q cmaketmp mkdir cmaketmp cd cmaketmp REM Make Ninja build files "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G "Ninja" -DCMAKE_CXX_COMPILER="cl.exe" -DCMAKE_C_COMPILER="cl.exe" -DCMAKE_BUILD_TYPE="Release" -DCMAKE_MAKE_PROGRAM="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja\ninja.exe" ".." || exit /b 1 REM Run Ninja to build "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja\ninja.exe" || exit /b 1 cd .. rmdir /s /q cmaketmp exit /b 0 :package set platform=%1 set relname=killerbeez-%platform% set distdir=dist\%relname% rmdir /s /q %distdir% mkdir %distdir% xcopy /s /exclude:%~dp0\release_excludes.txt build\%platform%\Release\* %distdir% if "%platform%" == "x64" ( xcopy /s /exclude:%~dp0\release_excludes.txt build\X86\Release\killerbeez\bin32\* %distdir%\killerbeez\bin32\ ) xcopy /s /i docs %distdir%\docs mkdir %distdir%\radamsa xcopy /s /i radamsa\bin %distdir%\radamsa\bin xcopy radamsa\LICENCE %distdir%\radamsa if "%platform%" == "x64" ( if exist C:\cygwin64\bin\cygwin1.dll ( xcopy C:\cygwin64\bin\cygwin1.dll %distdir%\radamsa\bin ) ) else ( if exist C:\cygwin\bin\cygwin1.dll ( xcopy C:\cygwin\bin\cygwin1.dll %distdir%\radamsa\bin ) if exist C:\cygwin\bin\cyggcc_s-1.dll ( xcopy C:\cygwin\bin\cyggcc_s-1.dll %distdir%\radamsa\bin ) ) mkdir %distdir%\dynamorio xcopy /s /i dynamorio\bin32 %distdir%\dynamorio\bin32 xcopy /s /i dynamorio\bin64 %distdir%\dynamorio\bin64 xcopy /s /i dynamorio\lib32 %distdir%\dynamorio\lib32 xcopy /s /i dynamorio\lib64 %distdir%\dynamorio\lib64 xcopy /s /i dynamorio\ext %distdir%\dynamorio\ext xcopy dynamorio\License.txt %distdir%\dynamorio xcopy dynamorio\ACKNOWLEDGEMENTS %distdir%\dynamorio if "%platform%" == "x64" ( mkdir %distdir%\server\skel\windows_x86_64 REM Include wrapper binary, stored in C:\killerbeez on the runner xcopy C:\killerbeez\wrapper_26014_windows_x86_64.exe %distdir%\server\skel\windows_x86_64 REM Include license files from the BOINC repo xcopy server\boinc\COPYING %distdir%\server\skel\windows_x86_64 xcopy server\boinc\COPYING.LESSER %distdir%\server\skel\windows_x86_64 xcopy server\boinc\README.md %distdir%\server\skel\windows_x86_64 ) set releasezip=%CI_PROJECT_DIR%\release\%relname%.zip echo Creating %releasezip% mkdir "%CI_PROJECT_DIR%\release" del "%releasezip%" powershell.exe -nologo -noprofile -command "& { Add-Type -A 'System.IO.Compression.FileSystem'; [IO.Compression.Zipfile]::CreateFromDirectory('%distdir%', '%releasezip%', [IO.Compression.CompressionLevel]::Optimal, 1); }" exit /b 0 :buildradamsa rem The argument is the path to the cygwin binary rem If the path doesn't exist, we just bail without rem building radamsa. rem rem Fun fact: If the set command is used inside the if statement rem it will not set oldpath, despite all logic. rem To hack around this, the if statement was inverted and the rem set commands were unconditional, where they work just fine. rem To test this yourself, try this: rem rem if exist C:\windows ( rem echo "oldpath=%oldpath" rem set "oldpath=%path%" rem echo "oldpath=%oldpath%" rem ) rem rem Want to see something even more amazing? Paste in that exact rem same text again and watch it work just fine! Isn't Windows cool? rem echo "Checking for %1" if not exist %1 ( echo "Cygwin not found, skipping radamsa build: %1" exit /b 0 ) echo "Building radamsa with %1" set "oldpath=%path%" set "path=%1;%oldpath%" make -C radamsa clean || exit /b 1 make -C radamsa || exit /b 1 set "path=%oldpath%" exit /b 0 ================================================ FILE: tools/release_vs2019.bat ================================================ REM Meant to be run by CI from the root directory of one of the repos REM Usage: cd killerbeez; tools\release SETLOCAL EnableDelayedExpansion if "%RADAMSA_URL%" == "" ( set RADAMSA_URL=https://gitlab.com/akihe/radamsa.git ) if "%DYNAMORIO_URL%" == "" ( set DYNAMORIO_URL=https://storage.googleapis.com/chromium-dynamorio/builds/DynamoRIO-Windows-6.2.17295-0xa77808f.zip ) if "%CI_PROJECT_DIR%" == "" ( set CI_PROJECT_DIR=%cd% ) rmdir /s /q build if not exist radamsa ( git clone %RADAMSA_URL% || exit /b 1 ) else ( cd radamsa git checkout master || exit /b 1 git pull || exit /b 1 cd .. ) if not exist dynamorio ( powershell.exe -nologo -noprofile -command "& { Add-Type -A 'System.IO.Compression.FileSystem'; $wc = New-Object System.Net.WebClient; $wc.DownloadFile('%DYNAMORIO_URL%', '.\dynamorio.zip'); [IO.Compression.Zipfile]::ExtractToDirectory('.\dynamorio.zip', '.\dynamorio-unzip'); }" move dynamorio-unzip\DynamoRIO* dynamorio rmdir dynamorio-unzip del dynamorio.zip ) call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x86 call :compile || exit /b 1 if exist C:\cygwin\bin ( set "oldpath=%path%" set "path=C:\cygwin\bin\;!oldpath!" make -C radamsa clean || exit /b 1 make -C radamsa || exit /b 1 set "path=!oldpath!" ) call :package X86 call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 call :compile || exit /b 1 if exist C:\cygwin64\bin ( set "oldpath=%path%" set "path=C:\cygwin64\bin\;!oldpath!" make -C radamsa clean || exit /b 1 make -C radamsa || exit /b 1 set "path=!oldpath!" ) call :package x64 exit /b 0 :compile rmdir /s /q cmaketmp mkdir cmaketmp cd cmaketmp REM Make Ninja build files "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G "Ninja" -DCMAKE_CXX_COMPILER="cl.exe" -DCMAKE_C_COMPILER="cl.exe" -DCMAKE_BUILD_TYPE="Release" -DCMAKE_MAKE_PROGRAM="C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja\ninja.exe" ".." || exit /b 1 REM Run Ninja to build "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja\ninja.exe" || exit /b 1 cd .. rmdir /s /q cmaketmp exit /b 0 :package set platform=%1 set relname=killerbeez-%platform% set distdir=dist\%relname% rmdir /s /q %distdir% mkdir %distdir% xcopy /s /exclude:%~dp0\release_excludes.txt build\%platform%\Release\* %distdir% if "%platform%" == "x64" ( xcopy /s /exclude:%~dp0\release_excludes.txt build\X86\Release\killerbeez\bin32\* %distdir%\killerbeez\bin32\ ) xcopy /s /i docs %distdir%\docs mkdir %distdir%\radamsa xcopy /s /i radamsa\bin %distdir%\radamsa\bin xcopy radamsa\LICENCE %distdir%\radamsa if "%platform%" == "x64" ( if exist C:\cygwin64\bin\cygwin1.dll ( xcopy C:\cygwin64\bin\cygwin1.dll %distdir%\radamsa\bin ) ) else ( if exist C:\cygwin\bin\cygwin1.dll ( xcopy C:\cygwin\bin\cygwin1.dll %distdir%\radamsa\bin ) if exist C:\cygwin\bin\cyggcc_s-1.dll ( xcopy C:\cygwin\bin\cyggcc_s-1.dll %distdir%\radamsa\bin ) ) mkdir %distdir%\dynamorio xcopy /s /i dynamorio\bin32 %distdir%\dynamorio\bin32 xcopy /s /i dynamorio\bin64 %distdir%\dynamorio\bin64 xcopy /s /i dynamorio\lib32 %distdir%\dynamorio\lib32 xcopy /s /i dynamorio\lib64 %distdir%\dynamorio\lib64 xcopy /s /i dynamorio\ext %distdir%\dynamorio\ext xcopy dynamorio\License.txt %distdir%\dynamorio xcopy dynamorio\ACKNOWLEDGEMENTS %distdir%\dynamorio if "%platform%" == "x64" ( mkdir %distdir%\server\skel\windows_x86_64 REM Include wrapper binary, stored in C:\killerbeez on the runner xcopy C:\killerbeez\wrapper_26014_windows_x86_64.exe %distdir%\server\skel\windows_x86_64 REM Include license files from the BOINC repo xcopy server\boinc\COPYING %distdir%\server\skel\windows_x86_64 xcopy server\boinc\COPYING.LESSER %distdir%\server\skel\windows_x86_64 xcopy server\boinc\README.md %distdir%\server\skel\windows_x86_64 ) set releasezip=%CI_PROJECT_DIR%\release\%relname%.zip echo Creating %releasezip% mkdir "%CI_PROJECT_DIR%\release" del "%releasezip%" powershell.exe -nologo -noprofile -command "& { Add-Type -A 'System.IO.Compression.FileSystem'; [IO.Compression.Zipfile]::CreateFromDirectory('%distdir%', '%releasezip%', [IO.Compression.CompressionLevel]::Optimal, 1); }" exit /b 0 ================================================ FILE: tools/setup_build_env.ps1 ================================================ param($build_env = $PSScriptRoot, $cygwin_mirror = "http://mirrors.kernel.org/sourceware/cygwin/", $vs_version = "2019") Set-PSDebug -Trace 1 pushd $build_env $ErrorActionPreference = "Stop" Add-Type -A 'System.IO.Compression.FileSystem' if ($env:DNS_SERVER) { netsh interface ip set dns Ethernet static $env:DNS_SERVER } [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.SecurityProtocolType]::Tls12 $wc = New-Object System.Net.WebClient if (!(Test-Path "C:\Gitlab-Runner\gitlab-runner-windows-amd64.exe")) { mkdir -Force C:\Gitlab-Runner $url = "https://gitlab-runner-downloads.s3.amazonaws.com/latest/binaries/gitlab-runner-windows-amd64.exe" $wc.DownloadFile($url, "C:\Gitlab-Runner\gitlab-runner-windows-amd64.exe") } # Install cygwin 32 and 64 bit mkdir -Force installers $url = "https://cygwin.com/setup-x86_64.exe" $wc.DownloadFile($url, "$build_env\installers\cygwin-x86_64.exe") $ret = Start-Process "installers\cygwin-x86_64.exe" -ArgumentList "--arch","x86_64","--packages","gcc-core,make,wget","--upgrade-also","--root","C:\cygwin64","--site","$cygwin_mirror","--quiet-mode" -Wait -PassThru -RedirectStandardOutput "$build_env\cygwin-x86_64-stdout" -RedirectStandardError "$build_env\cygwin-x86_64-stderr" if ($ret.ExitCode) { throw "Cygwin 64-bit install failed" } $ret = Start-Process "installers\cygwin-x86_64.exe" -ArgumentList "--arch","x86","--packages","gcc-core,make,wget","--upgrade-also","--root","C:\cygwin","--site","$cygwin_mirror","--quiet-mode" -Wait -PassThru -RedirectStandardOutput "$build_env\cygwin-x86-stdout" -RedirectStandardError "$build_env\cygwin-x86-stderr" if ($ret.ExitCode) { throw "Cygwin 32-bit install failed" } # Install Visual Studio echo "Beginning Visual Studio install" if ($vs_version -eq "2017") { $url = "https://aka.ms/vs/15/release/vs_community.exe" $vs_exe = "$build_env\installers\vs_community.exe" $example_file = "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" $extra_args = @() } else { $url = "https://aka.ms/vs/16/release/vs_community.exe" $vs_exe = "$build_env\installers\vs_community.exe" $example_file = "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" $extra_args = "--add","Microsoft.VisualStudio.Component.Windows10SDK.18362" } $wc.DownloadFile($url, $vs_exe) if (Test-Path $example_file) { $ret = Start-Process $vs_exe -ArgumentList "update","--passive" -Wait -PassThru } else { $ret = Start-Process $vs_exe -ArgumentList ("--add","Microsoft.VisualStudio.Component.VC.Tools.x86.x64","--add","Microsoft.VisualStudio.Component.VC.CMake.Project","--add","Microsoft.VisualStudio.Component.Git","--passive","--norestart" + $extra_args) -Wait -PassThru } if ($ret.ExitCode) { throw "Visual Studio install failed" } echo "Visual Studio install complete" $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") popd ================================================ FILE: tracer/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 2.8.8) project (tracer) include_directories (${CMAKE_SOURCE_DIR}/driver/) include_directories (${CMAKE_SOURCE_DIR}/instrumentation/) set(TRACER_SRC ${PROJECT_SOURCE_DIR}/main.c) source_group("Executable Sources" FILES ${TRACER_SRC}) add_executable(tracer ${TRACER_SRC} $ $) target_compile_definitions(tracer PUBLIC INSTRUMENTATION_NO_IMPORT) target_compile_definitions(tracer PUBLIC DRIVER_NO_IMPORT) target_link_libraries(tracer utils) target_link_libraries(tracer jansson) if (WIN32) target_link_libraries(tracer Shlwapi) # utils needs Shlwapi target_link_libraries(tracer ws2_32) # driver needs ws2_32 target_link_libraries(tracer iphlpapi) # network driver needs iphlpapi endif (WIN32) ================================================ FILE: tracer/main.c ================================================ #include #include #include #include #include #include #include #include #include void usage(char * program_name) { char * help_text; printf( "Usage: %s driver_name instrumentation_name input_file output_file [options]\n" "\n" "Required:\n" "\t driver_name The driver framework used to run the target program\n" "\t instrumentation_name The instrumenation framework to use to determine the path a program took\n" "\t input_file The input to the target program\n" "\t output_file Write the edges to the given file. The given path will be used as a prefix when recording multiple modules\n" "Options:\n" "\t -b When writing the edges to a file, write them in binary (rather than human readable text)\n" "\t -d driver_options Set the options for the driver\n" "\t -i instrumentation_options Set the options for the instrumentation\n" "\t -l logging_options Set the options for logging\n" "\t -n num_iterations The number of iterations to run [5 per file]. Edges which are only in one run will be excluded\n" "\t -p Record the edges for each module independently\n" "\n", program_name ); #define PRINT_HELP(x, y) \ x = y; \ if(x) { \ puts(x); \ free(x); \ } PRINT_HELP(help_text, logging_help()); PRINT_HELP(help_text, driver_help()); PRINT_HELP(help_text, instrumentation_help()); exit(1); } struct edge_counts { instrumentation_edge_t edge; int count; }; void record_edges(instrumentation_edges_t * edges, struct edge_counts ** all_runs, int * all_runs_num_edges) { int this_run_num_edges; instrumentation_edge_t *this_run; int i, j, found; this_run = NULL; this_run_num_edges = 0; for (i = 0; i < edges->num_edges; i++) { //First check for the edge in this run found = 0; for (j = 0; j < this_run_num_edges; j++) { if (this_run[j].to == edges->edges[i].to && this_run[j].from == edges->edges[i].from) { found = 1; break; } } if (found) //If we've already recorded this one, just skip it continue; //If we haven't recorded this edge for this run before, add it to the this_run list this_run_num_edges++; this_run = (instrumentation_edge_t *)realloc(this_run, this_run_num_edges * sizeof(instrumentation_edge_t)); this_run[this_run_num_edges - 1].to = edges->edges[i].to; this_run[this_run_num_edges - 1].from = edges->edges[i].from; //Now check to see if it's been recorded already in other runs for (j = 0; j < *all_runs_num_edges; j++) { if ((*all_runs)[j].edge.to == edges->edges[i].to && (*all_runs)[j].edge.from == edges->edges[i].from) { (*all_runs)[j].count++; found = 1; break; } } //If we haven't found this edge before, add it to the all_runs list if (!found) { *all_runs_num_edges = *all_runs_num_edges + 1; *all_runs = (struct edge_counts *)realloc(*all_runs, *all_runs_num_edges * sizeof(struct edge_counts)); (*all_runs)[*all_runs_num_edges - 1].edge.to = edges->edges[i].to; (*all_runs)[*all_runs_num_edges - 1].edge.from = edges->edges[i].from; (*all_runs)[*all_runs_num_edges - 1].count = 1; } } free(this_run); } #define MAX_MODULES 512 int main(int argc, char ** argv) { driver_t * driver; instrumentation_t * instrumentation; char *driver_name, *driver_options = NULL, *input_filename = NULL, *seed_buffer = NULL, *output_file = NULL, *instrumentation_name = NULL, *instrumentation_options = NULL, *logging_options = NULL; void * instrumentation_state = NULL; int seed_length, iteration; instrumentation_edges_t * edges; instrumentation_edge_t *deterministic_edges; struct edge_counts * all_runs[MAX_MODULES]; int all_runs_num_edges[MAX_MODULES]; int i, j, deterministic_edges_num_edges, num_modules = 0; char * module_name = NULL; char * module_names[MAX_MODULES]; char filename_buffer[MAX_PATH]; FILE * fp; ////////////////////////////////////////////////////////////////////////////////////////////////////// // Parse Arguments /////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// //Default options int num_iterations = 5; int binary_mode = 0; int per_module_edges = 0; if (argc < 5) { usage(argv[0]); } driver_name = argv[1]; instrumentation_name = argv[2]; input_filename = argv[3]; output_file = argv[4]; for (int i = 5; i < argc; i++) { IF_ARG_SET_TRUE("-b", binary_mode) ELSE_IF_ARG_OPTION("-d", driver_options) ELSE_IF_ARG_OPTION("-i", instrumentation_options) ELSE_IF_ARG_OPTION("-l", logging_options) ELSE_IF_ARGINT_OPTION("-n", num_iterations) ELSE_IF_ARG_SET_TRUE("-p", per_module_edges) else { if (strcmp("-h", argv[i])) printf("Unknown argument: %s\n", argv[i]); usage(argv[0]); } } if (setup_logging(logging_options)) { printf("Failed setting up logging, exiting\n"); return 1; } if (num_iterations < 1) FATAL_MSG("Bad iteration number (%d). Must have a iteration count 1 or greater.", num_iterations); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Ojbect Setup ////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// instrumentation = instrumentation_factory(instrumentation_name); if (!instrumentation) FATAL_MSG("Unknown instrumentation '%s'", instrumentation_name); if (!instrumentation->get_edges) FATAL_MSG("Instrumentation '%s' does not support the ability to get a list of edges", instrumentation_name); if (instrumentation_options) instrumentation_options = add_int_option_to_json(instrumentation_options, "edges", 1); else instrumentation_options = "{\"edges\": 1}"; instrumentation_state = instrumentation->create(instrumentation_options, NULL); if (!instrumentation_state) FATAL_MSG("Bad options/state for instrumentation %s", instrumentation_name); driver = driver_instrumentation_factory(driver_name, driver_options, instrumentation, instrumentation_state); if (!driver) FATAL_MSG("Unknown driver '%s' or bad options: %s", driver_name, driver_options ? driver_options : "none"); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Main Test Loop //////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// //Read the seed file seed_length = read_file(input_filename, &seed_buffer); if (seed_length <= 0) //Couldn't read file, or empty file FATAL_MSG("Unable to open the input file \"%s\"", input_filename); memset(&all_runs, 0, sizeof(all_runs)); memset(&all_runs_num_edges, 0, sizeof(all_runs_num_edges)); memset(&module_names, 0, sizeof(module_names)); if (!per_module_edges) { num_modules = 1; } else { while (!instrumentation->get_module_info(instrumentation_state, num_modules, NULL, &module_name, NULL, NULL)) { if (num_modules >= MAX_MODULES) FATAL_MSG("Too many modules specified, %d specified, %d maximum", num_modules, MAX_MODULES); module_names[num_modules] = module_name; num_modules++; } } for (iteration = 0; iteration < num_iterations; iteration++) { driver->test_input(driver->state, seed_buffer, seed_length); for (i = 0; i < num_modules; i++) { edges = instrumentation->get_edges(instrumentation_state, i); if (!edges) FATAL_MSG("Instrumentation failed to get the program edges from the tested process."); record_edges(edges, &all_runs[i], &all_runs_num_edges[i]); } } free(seed_buffer); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Reduce the list of edges to just the ones in every run, and store it ////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// for (i = 0; i < num_modules; i++) { deterministic_edges = NULL; deterministic_edges_num_edges = 0; for (j = 0; j < all_runs_num_edges[i]; j++) { if (all_runs[i][j].count == num_iterations) //if the edge was found in all the iterations { deterministic_edges_num_edges++; deterministic_edges = (instrumentation_edge_t *)realloc(deterministic_edges, deterministic_edges_num_edges * sizeof(instrumentation_edge_t)); deterministic_edges[deterministic_edges_num_edges - 1].to = all_runs[i][j].edge.to; deterministic_edges[deterministic_edges_num_edges - 1].from = all_runs[i][j].edge.from; } } if (!module_names[i]) snprintf(filename_buffer, sizeof(filename_buffer) - 1, "%s", output_file); else snprintf(filename_buffer, sizeof(filename_buffer) - 1, "%s_%s.%s", output_file, module_names[i], binary_mode ? "dat" : "txt"); fp = fopen(filename_buffer, "wb+"); if (fp == NULL) FATAL_MSG("Couldn't open the file %s to write the edges to for %s", filename_buffer, module_names[i] ? module_names[i] : "the program"); for (j = 0; j < deterministic_edges_num_edges; j++) { if (binary_mode) fwrite(&deterministic_edges[j], sizeof(instrumentation_edge_t), 1, fp); else fprintf(fp, "%016x:%016x\n", deterministic_edges[j].from, deterministic_edges[j].to); } fclose(fp); free(deterministic_edges); } ////////////////////////////////////////////////////////////////////////////////////////////////////// // Cleanup /////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// //Cleanup the objects and exit driver->cleanup(driver->state); instrumentation->cleanup(instrumentation_state); free(driver); free(instrumentation); return 0; } ================================================ FILE: utils/XGetopt.c ================================================ // XGetopt.cpp Version 1.2 // // Author: Hans Dietrich // hdietrich2@hotmail.com // // Description: // XGetopt.cpp implements getopt(), a function to parse command lines. // // History // Version 1.2 - 2003 May 17 // - Added Unicode support // // Version 1.1 - 2002 March 10 // - Added example to XGetopt.cpp module header // // This software is released into the public domain. // You are free to use it in any way you like. // // This software is provided "as is" with no expressed // or implied warranty. I accept no liability for any // damage or loss of business that this software may cause. // /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // if you are using precompiled headers then include this line: //#include "stdafx.h" /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // if you are not using precompiled headers then include these lines: #include #include #include /////////////////////////////////////////////////////////////////////////////// #include "XGetopt.h" /////////////////////////////////////////////////////////////////////////////// // // X G e t o p t . c p p // // // NAME // getopt -- parse command line options // // SYNOPSIS // int getopt(int argc, char *argv[], char *optstring) // // extern char *optarg; // extern int optind; // // DESCRIPTION // The getopt() function parses the command line arguments. Its // arguments argc and argv are the argument count and array as // passed into the application on program invocation. In the case // of Visual C++ programs, argc and argv are available via the // variables __argc and __argv (double underscores), respectively. // getopt returns the next option letter in argv that matches a // letter in optstring. (Note: Unicode programs should use // __targv instead of __argv. Also, all character and string // literals should be enclosed in _T( ) ). // // optstring is a string of recognized option letters; if a letter // is followed by a colon, the option is expected to have an argument // that may or may not be separated from it by white space. optarg // is set to point to the start of the option argument on return from // getopt. // // Option letters may be combined, e.g., "-ab" is equivalent to // "-a -b". Option letters are case sensitive. // // getopt places in the external variable optind the argv index // of the next argument to be processed. optind is initialized // to 0 before the first call to getopt. // // When all options have been processed (i.e., up to the first // non-option argument), getopt returns EOF, optarg will point // to the argument, and optind will be set to the argv index of // the argument. If there are no non-option arguments, optarg // will be set to NULL. // // The special option "--" may be used to delimit the end of the // options; EOF will be returned, and "--" (and everything after it) // will be skipped. // // RETURN VALUE // For option letters contained in the string optstring, getopt // will return the option letter. getopt returns a question mark (?) // when it encounters an option letter not included in optstring. // EOF is returned when processing is finished. // // BUGS // 1) Long options are not supported. // 2) The GNU double-colon extension is not supported. // 3) The environment variable POSIXLY_CORRECT is not supported. // 4) The + syntax is not supported. // 5) The automatic permutation of arguments is not supported. // 6) This implementation of getopt() returns EOF if an error is // encountered, instead of -1 as the latest standard requires. // // EXAMPLE // BOOL CMyApp::ProcessCommandLine(int argc, char *argv[]) // { // int c; // // while ((c = getopt(argc, argv, _T("aBn:"))) != EOF) // { // switch (c) // { // case _T('a'): // TRACE(_T("option a\n")); // // // // set some flag here // // // break; // // case _T('B'): // TRACE( _T("option B\n")); // // // // set some other flag here // // // break; // // case _T('n'): // TRACE(_T("option n: value=%d\n"), atoi(optarg)); // // // // do something with value here // // // break; // // case _T('?'): // TRACE(_T("ERROR: illegal option %s\n"), argv[optind-1]); // return FALSE; // break; // // default: // TRACE(_T("WARNING: no handler for option %c\n"), c); // return FALSE; // break; // } // } // // // // check for non-option args here // // // return TRUE; // } // /////////////////////////////////////////////////////////////////////////////// char *optarg; // global argument pointer int optind = 0; // global argv index int getopt(int argc, char *argv[], char *optstring) { static char *next = NULL; if (optind == 0) next = NULL; optarg = NULL; if (next == NULL || *next == _T('\0')) { if (optind == 0) optind++; if (optind >= argc || argv[optind][0] != _T('-') || argv[optind][1] == _T('\0')) { optarg = NULL; if (optind < argc) optarg = argv[optind]; return EOF; } if (_tcscmp(argv[optind], _T("--")) == 0) { optind++; optarg = NULL; if (optind < argc) optarg = argv[optind]; return EOF; } next = argv[optind]; next++; // skip past - optind++; } char c = *next++; char *cp = _tcschr(optstring, c); if (cp == NULL || c == _T(':')) return _T('?'); cp++; if (*cp == _T(':')) { if (*next != _T('\0')) { optarg = next; next = NULL; } else if (optind < argc) { optarg = argv[optind]; optind++; } else { return _T('?'); } } return c; } ================================================ FILE: utils/XGetopt.h ================================================ // XGetopt.h Version 1.2 // // Author: Hans Dietrich // hdietrich2@hotmail.com // // This software is released into the public domain. // You are free to use it in any way you like. // // This software is provided "as is" with no expressed // or implied warranty. I accept no liability for any // damage or loss of business that this software may cause. // /////////////////////////////////////////////////////////////////////////////// #ifndef XGETOPT_H #define XGETOPT_H extern int optind, opterr; extern char *optarg; int getopt(int argc, char *argv[], char *optstring); #endif //XGETOPT_H ================================================ FILE: utils/global_types.h ================================================ #pragma once #include #include /** * This macro can be used to mask off the rest of the flags in mutate_extended's * flag parameter to get just the input part's index that should be mutated when * the MUTATE_MULTIPLE_INPUTS bit is set. */ #define MUTATE_MULTIPLE_INPUTS_MASK ((1 << 16) - 1) /** * This flag signifies that the mutator should mutate a specific input part, * defined by the index set in the bits covered by MUTATE_MULTIPLE_INPUTS_MASK */ #define MUTATE_MULTIPLE_INPUTS (1 << 16) /** * This flag signifies that the mutations should be done in a thread safe way. */ #define MUTATE_THREAD_SAFE (1 << 17) typedef struct mutator { void * (*create)(char * options, char * state, char * input, size_t input_length); void(*cleanup)(void * mutator_state); int(*mutate)(void * mutator_state, char * buffer, size_t buffer_length); int(*mutate_extended)(void * mutator_state, char * buffer, size_t buffer_length, uint64_t flags); char * (*get_state)(void * mutator_state); void(*free_state)(char * state); int(*set_state)(void * mutator_state, char * state); int(*get_current_iteration)(void * mutator_state); int(*get_total_iteration_count)(void * mutator_state); void(*get_input_info)(void * mutator_state, int * num_inputs, size_t **input_sizes); int(*set_input)(void * mutator_state, char * new_input, size_t input_length); int(*help)(char **help_str); } mutator_t; ================================================ FILE: utils/mutator_factory.c ================================================ #include "mutator_factory.h" #include "utils.h" #include #include #include #ifdef _WIN32 #include #else #include #include #include #include #endif /** * Looks in a directory and retrieves a list of filenames of library files in that directory * @param directory - the directory to look for the library files in * @param num_libraries - a pointer to an int in which to return the number of library files found * @return - a pointer to a list of library files on success, or NULL if no library files were found */ static char ** get_mutator_library_filenames(char * directory, int * num_libraries) { int num_files = 0; char ** mutator_dlls = NULL; char filename[MAX_PATH] ; #ifdef _WIN32 HANDLE file_handle; WIN32_FIND_DATA fdFile; BOOL success; memset(filename, 0, sizeof(filename)); snprintf(filename, sizeof(filename) - 1, "%s\\*.dll", directory); success = 1; for (file_handle = FindFirstFile(filename, &fdFile); file_handle != INVALID_HANDLE_VALUE && success; success = FindNextFile(file_handle, &fdFile)) { //Skip directories if (fdFile.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) continue; //Read the seed file memset(filename, 0, sizeof(filename)); snprintf(filename, sizeof(filename) - 1, "%s\\%s", directory, fdFile.cFileName); num_files++; mutator_dlls = (char **)realloc(mutator_dlls, num_files * sizeof(char *)); mutator_dlls[num_files - 1] = strdup(filename); } FindClose(file_handle); #else #ifdef __APPLE__ char * extension = "dylib"; #else char * extension = "so"; #endif struct dirent *dp; DIR *dfd; struct stat stbuf; if ((dfd = opendir(directory)) != NULL) { while ((dp = readdir(dfd)) != NULL) { snprintf(filename, sizeof(filename), "%s/%s", directory, dp->d_name); if(stat(filename, &stbuf ) == -1) continue; if ((stbuf.st_mode & S_IFMT) == S_IFDIR) continue; // Skip directories if (!strncmp(&filename[strlen(filename)-strlen(extension)], extension, strlen(extension))) { num_files++; mutator_dlls = (char **)realloc(mutator_dlls, num_files * sizeof(char *)); mutator_dlls[num_files - 1] = strdup(filename); } } } closedir(dfd); #endif *num_libraries = num_files; return mutator_dlls; } UTILS_API mutator_t * mutator_factory(char * mutator_filename) { void(*init_ptr)(mutator_t *); mutator_t * ret; #ifdef _WIN32 HINSTANCE handle = LoadLibrary(mutator_filename); #else void * handle = dlopen(mutator_filename, RTLD_LAZY); #endif if (!handle) //Couldn't load the requested mutator library return NULL; #ifdef _WIN32 init_ptr = (void(*)(mutator_t *))GetProcAddress(handle, "init"); #else init_ptr = (void(*)(mutator_t *))dlsym(handle, "init"); #endif if (!init_ptr) { //The library didn't have our init function #ifdef _WIN32 FreeLibrary(handle); #else dlclose(handle); #endif return NULL; } //Call the mutator's init function to initailize the mutators struct ret = (mutator_t *)malloc(sizeof(mutator_t)); init_ptr(ret); return ret; } static void generate_mutator_filename(char * mutator_directory, char * mutator_type, int include_mutator, char * output_filename, size_t output_filename_length) { #ifdef _WIN32 char * extension = "dll", *prefix = ""; #elif defined(__APPLE__) char * extension = "dylib", *prefix = "lib"; #else char * extension = "so", *prefix = "lib"; #endif memset(output_filename, 0, output_filename_length); if(mutator_directory) { if (include_mutator) snprintf(output_filename, output_filename_length, "%s/%s%s_mutator.%s", mutator_directory, prefix, mutator_type, extension); else snprintf(output_filename, output_filename_length, "%s/%s%s.%s", mutator_directory, prefix, mutator_type, extension); } else { if (include_mutator) snprintf(output_filename, output_filename_length, "%s%s_mutator.%s", prefix, mutator_type, extension); else snprintf(output_filename, output_filename_length, "%s%s.%s", prefix, mutator_type, extension); } } /** * This function obtains a mutator_t object by calling the mutator specified by mutator's init method. * @param mutator_directory - the directory to load the mutator library file from. * @param mutator_type - the name of the mutator that should be created. * @return - a instrumentation_t object of the specified type on success or NULL on failure */ UTILS_API mutator_t * mutator_factory_directory(char * mutator_directory, char * mutator_type) { char filename[MAX_PATH]; mutator_t * ret; generate_mutator_filename(mutator_directory, mutator_type, 0, filename, sizeof(filename)); ret = mutator_factory(filename); if (!ret) { generate_mutator_filename(mutator_directory, mutator_type, 1, filename, sizeof(filename)); ret = mutator_factory(filename); } return ret; } /** * This function returns help text for all the mutators found in the specified mutator directory. This help text will * describe the mutators and any options that can be passed to their create functions. * @param mutator_directory - The directory to look for mutator libraries in * @return - a newly allocated string containing the help text. */ UTILS_API char * mutator_help(char * mutator_directory) { #ifdef _WIN32 HINSTANCE handle; #else void * handle; #endif int num_libraries = 0, i; char ** mutator_libraries; int(*help_ptr)(char **); char * text = NULL, * new_text = NULL; mutator_libraries = get_mutator_library_filenames(mutator_directory, &num_libraries); if (!num_libraries) { printf("ERROR: Could not find any mutators. Please ensure that the directory %s contains the mutator library files", mutator_directory); return NULL; } text = strdup("\nMutator Options:\n\n"); for (i = 0; i < num_libraries; i++) { #ifdef _WIN32 handle = LoadLibrary(mutator_libraries[i]); #else handle = dlopen(mutator_libraries[i], RTLD_LAZY); #endif if (!handle) //if we couldn't load the library, just continue continue; #ifdef _WIN32 help_ptr = (int(*)(char **))GetProcAddress(handle, "help"); #else help_ptr = (int(*)(char **))dlsym(handle, "help"); #endif if (help_ptr) {//The library has a help function if (!help_ptr(&new_text)) //Call help() and check for failure { text = (char *)realloc(text, strlen(text) + strlen(new_text) + 1); strcat(text, new_text); free(new_text); } } #ifdef _WIN32 FreeLibrary(handle); #else dlclose(handle); #endif handle = NULL; } text = (char *)realloc(text, strlen(text) + 2); strcat(text, "\n"); free(mutator_libraries); return text; } ================================================ FILE: utils/mutator_factory.h ================================================ #pragma once #include #include UTILS_API mutator_t * mutator_factory(char * mutator_filename); UTILS_API mutator_t * mutator_factory_directory(char * mutator_directory, char * mutator_type); UTILS_API char * mutator_help(char * mutator_directory); ================================================ FILE: utils/utils.c ================================================ #include "utils.h" #include #include #include #include #include #include #include #ifdef _WIN32 #include #include #include #include #include #include #define F_OK 0 #else #ifdef __APPLE__ #include // _NSGetExecutablePath #endif #include #include #include #include #include #include #include #include #include #endif #ifdef _WIN32 static int CreateChildProcess(char * cmd_line, HANDLE read_pipe, HANDLE * process_out, DWORD creation_flags); /** * This function converts a char * to a wchar * * @param - The char * string that should be converted to a wchar * string * @param - A wchar * buffer that the converted string should be placed into. If NULL, * this function will allocate a wchar * buffer to place the converted string into. * @return - A pointer to the converted string */ UTILS_API wchar_t * convert_char_array_to_wchar(char * string, wchar_t * out_buffer) { size_t size = (strlen(string) + 1) * sizeof(wchar_t); size_t converted_length = 0; if (!out_buffer) { out_buffer = (wchar_t *)malloc(size); if (!out_buffer) return NULL; } mbstowcs_s(&converted_length, out_buffer, strlen(string) + 1, string, size); return out_buffer; } /** * This function converts a wchar * to a char * * @param - The wchar * string that should be converted to a char * string * @param - A char * buffer that the converted string should be placed into. If NULL, * this function will allocate a char * buffer to place the converted string into. * @return - A pointer to the converted string */ UTILS_API char * convert_wchar_array_to_char(wchar_t * string, char * out_buffer) { size_t size = (wcslen(string) + 1) * 2; size_t converted_length = 0; if (!out_buffer) { out_buffer = (char *)malloc(size); if (!out_buffer) return NULL; } wcstombs_s(&converted_length, out_buffer, size, string, size-1); return out_buffer; } #define CLOSE_PIPES() \ if(pipe_rd) CloseHandle(pipe_rd); \ if(pipe_wr) CloseHandle(pipe_wr); #define MAX_CMD_LEN 10*4096 #define MAX_STANDARD_IN_PIPE_SIZE 8*1024 *1024 //8MB static int start_process_and_write_to_stdin_inner(char * cmd_line, char * input, size_t input_length, HANDLE * process_out, HANDLE * pipe_rd_ptr, HANDLE * pipe_wr_ptr, DWORD timeout_ms, DWORD creation_flags) { SECURITY_ATTRIBUTES saAttr; int ret; HANDLE pipe_rd, pipe_wr; //Mark the process as not started in case we error out *process_out = NULL; if (strlen(cmd_line) > MAX_CMD_LEN) return 1; // Set the bInheritHandle flag so pipe handles are inherited. saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); saAttr.bInheritHandle = TRUE; saAttr.lpSecurityDescriptor = NULL; // Create a pipe for the child process's STDIN. if (!CreatePipe(&pipe_rd, &pipe_wr, &saAttr, min(input_length, MAX_STANDARD_IN_PIPE_SIZE))) return 1; // Ensure the write handle to the pipe for STDIN is not inherited. if (!SetHandleInformation(pipe_wr, HANDLE_FLAG_INHERIT, 0)) { CLOSE_PIPES(); return 1; } // Create the child process. if (CreateChildProcess(cmd_line, pipe_rd, process_out, creation_flags)) { CLOSE_PIPES(); return 1; } //Write the input buffer ret = 0; if (input && input_length > 0) { if (WriteToPipe(*process_out, pipe_wr, pipe_rd, input, input_length, timeout_ms)) ret = 1; } //Either save the pipes, or close them so we don't leak resources if (pipe_rd_ptr) *pipe_rd_ptr = pipe_rd; else CloseHandle(pipe_rd); if (pipe_wr_ptr) *pipe_wr_ptr = pipe_wr; else CloseHandle(pipe_wr); return ret; } /** * This function starts a process and writes to the stdin of the process. * @param cmd_line - The command line of the new process to start * @param input - a buffer that should be pasesd to the newly created process's stdin * @param input_length - The length of the input parameter * @param process_out - a pointer to a HANDLE that will be filled in with a handle to the newly created process * @param pipe_rd_ptr - a pointer to a HANDLE that will be filled in with the read end of the stdin pipe for the new process. * If pipe_rd_ptr is NULL, the read end of the stdin pipe will be closed instead. * @param pipe_wr_ptr - a pointer to a HANDLE that will be filled in with the write end of the stdin pipe for the new process. * If pipe_wr_ptr is NULL, the write end of the stdin pipe will be closed instead. * @param timeout_ms - The maximum number of milliseconds to wait when writing to the newly created process's stdin pipe. * @return - zero on success, non-zero on failure */ UTILS_API int start_process_and_write_to_stdin_and_save_pipes_timeout(char * cmd_line, char * input, size_t input_length, HANDLE * process_out, HANDLE * pipe_rd_ptr, HANDLE * pipe_wr_ptr, DWORD timeout_ms) { return start_process_and_write_to_stdin_inner(cmd_line, input, input_length, process_out, pipe_rd_ptr, pipe_wr_ptr, timeout_ms, 0); } /** * This function starts a process and writes to the stdin of the process. * @param cmd_line - The command line of the new process to start * @param input - a buffer that should be pasesd to the newly created process's stdin * @param input_length - The length of the input parameter * @param process_out - a pointer to a HANDLE that will be filled in with a handle to the newly created process * @return - zero on success, non-zero on failure */ UTILS_API int start_process_and_write_to_stdin(char * cmd_line, char * input, size_t input_length, HANDLE * process_out) { return start_process_and_write_to_stdin_inner(cmd_line, input, input_length, process_out, NULL, NULL, 0, 0); } /** * This function starts a process and writes to the stdin of the process. * @param cmd_line - The command line of the new process to start * @param input - a buffer that should be pasesd to the newly created process's stdin * @param input_length - The length of the input parameter * @param process_out - a pointer to a HANDLE that will be filled in with a handle to the newly created process * @param creation_flags - The creation flags that should be passed to the CreateProcess Windows API * @return - zero on success, non-zero on failure */ UTILS_API int start_process_and_write_to_stdin_flags(char * cmd_line, char * input, size_t input_length, HANDLE * process_out, DWORD creation_flags) { return start_process_and_write_to_stdin_inner(cmd_line, input, input_length, process_out, NULL, NULL, 0, creation_flags); } /** * This function starts a new process * @param cmd_line - The command line for the process to create * @param read_pipe - A handle to the read end of a pipe that should be assigned to the newly created process's stdin * @param process_out - A pointer toa HANDLE that will be filled in with a handle to the newly created process * @param creation_flags - The creation flags that should be passed to the CreateProcess Windows API * @return - zero on success, non-zero on failure */ static int CreateChildProcess(char * cmd_line, HANDLE read_pipe, HANDLE * process_out, DWORD creation_flags) { PROCESS_INFORMATION piProcInfo; STARTUPINFO siStartInfo; BOOL bSuccess = FALSE; // Set up members of the PROCESS_INFORMATION structure. ZeroMemory(&piProcInfo, sizeof(PROCESS_INFORMATION)); // Set up members of the STARTUPINFO structure. // This structure specifies the STDIN and STDOUT handles for redirection. ZeroMemory(&siStartInfo, sizeof(STARTUPINFO)); siStartInfo.cb = sizeof(STARTUPINFO); siStartInfo.hStdInput = read_pipe; siStartInfo.hStdError = NULL; siStartInfo.hStdOutput = NULL; siStartInfo.dwFlags |= STARTF_USESTDHANDLES; siStartInfo.wShowWindow = 1; // Create the child process. bSuccess = CreateProcess(NULL, cmd_line, // command line NULL, // process security attributes NULL, // primary thread security attributes TRUE, // handles are inherited creation_flags,// creation flags NULL, // use parent's environment NULL, // use parent's current directory &siStartInfo, // STARTUPINFO pointer &piProcInfo); // receives PROCESS_INFORMATION // If an error occurs, exit the application. if (!bSuccess) return 1; CloseHandle(piProcInfo.hThread); //We don't need the thread handle *process_out = piProcInfo.hProcess; return 0; } #define MAX_WRITE_SIZE 8*1024*1024 //8MB #define GET_FILETIME_DIFF_IN_MILLISECONDS(x,y,z) \ ULARGE_INTEGER temp1##x##y, temp2##x##y; \ temp1##x##y.LowPart = x.dwLowDateTime; temp1##x##y.HighPart = x.dwHighDateTime; \ temp2##x##y.LowPart = y.dwLowDateTime; temp2##x##y.HighPart = y.dwHighDateTime; \ z = (temp1##x##y.QuadPart - temp2##x##y.QuadPart) / 10000; /** * Writes the given input buffer the a pipe, checking to make sure the other process hasn't died * and that there is room on the pipe to write. * @param process - The process that holds the read end of the pipe. * @param pipe_wr - The write end of the pipe that will be written to. * @param pipe_rd - The read end of the pipe being written to * @param input - a buffer to write to the the pipe_wr parameter * @param input_length - The length of the input parameter * @param timeout_ms - The maximum number of milliseconds to wait when writing to the pipe * @return - 0 on success (all bytes written to the pipe), 1 on failure */ UTILS_API int WriteToPipe(HANDLE process, HANDLE pipe_wr, HANDLE pipe_rd, char * input, size_t input_length, DWORD timeout_ms) { DWORD dwWritten, out_size, total_in_pipe, timediff; size_t total_written = 0, write_size; BOOL bSuccess = FALSE; FILETIME start_time, time; GetSystemTimeAsFileTime(&start_time); while (total_written < input_length && get_process_status(process)) { if (!GetNamedPipeInfo(pipe_wr, NULL, &out_size, NULL, NULL)) break; if (!PeekNamedPipe(pipe_rd, NULL, 0, NULL, &total_in_pipe, NULL)) break; write_size = min(min(input_length - total_written, MAX_WRITE_SIZE), out_size - total_in_pipe); if (write_size == 0) //There's no room to write to the pipe { GetSystemTimeAsFileTime(&time); GET_FILETIME_DIFF_IN_MILLISECONDS(time, start_time, timediff); if (timeout_ms && timediff > timeout_ms) return 1; dwWritten = WaitForSingleObject(pipe_wr, timeout_ms); } else { bSuccess = WriteFile(pipe_wr, input + total_written, write_size, &dwWritten, NULL); if (!bSuccess) break; total_written += dwWritten; } } return total_written != input_length; } /** * Flushes any input waiting on the given pipe * @param pipe_rd - a handle to the pipe that should be flushed * @return - 0 on success, non-zero on failure */ UTILS_API int FlushPipe(HANDLE pipe_rd) { DWORD total_in_pipe, num_read; int failed; char * temp; if (!PeekNamedPipe(pipe_rd, NULL, 0, NULL, &total_in_pipe, NULL)) return 1; if (!total_in_pipe) return 0; temp = (char *)malloc(total_in_pipe); failed = ReadFile(pipe_rd, temp, total_in_pipe, &num_read, NULL) != TRUE; free(temp); if (num_read != total_in_pipe) failed = 1; return failed; } #endif //_WIN32 /** * This function checks if a process is still alive * @param - a HANDLE to the process to check * @return - FUZZ_RUNNING (1) if the process is alive, FUZZ_NONE (0) if it is not, FUZZ_ERROR (-1) on failure */ #ifdef _WIN32 UTILS_API int get_process_status(HANDLE process) { DWORD exitCode; if (GetExitCodeProcess(process, &exitCode) == 0) return FUZZ_ERROR; return exitCode == STILL_ACTIVE; } #else /** * This function checks if a CHILD process is still alive * @return - FUZZ_CRASH (2) if the process exited by crash, FUZZ_RUNNING (1) if * the process is alive, FUZZ_NONE (0) if it exited cleanly, FUZZ_ERROR (-1) on * failure * * NOTE: This should only be called once after a process has terminated. * */ UTILS_API int get_process_status(pid_t pid) { // We can't use kill here, because it'll return "alive" if the process is // in a zombie state (ie, unreaped). So, we have to reap here. int status; pid_t result; // WNOHANG result: 0 means it exists and is alive, pid means it has exited, // -1 means error result = waitpid(pid, &status, WNOHANG); if(result == 0) { return FUZZ_RUNNING; } else if (result > 0) { if(WIFEXITED(status)) return FUZZ_NONE; // it exited normally if(WIFSIGNALED(status)) return FUZZ_CRASH; // it crashed } // either waitpid failed, or the process is not running, did not exit // normally, and was not signaled, in either case we don't know what // went wrong return FUZZ_ERROR; } #endif /** * Generates a temporary filename * @param suffix - Optionally, a suffix to append to the generated temporary filename. If NULL, * no file extension will be added. * @return - NULL on failure, or a newly allocated character buffer holding the temporary filename. * The caller should free the returned buffer */ UTILS_API char * get_temp_filename(char * suffix) { #ifdef _WIN32 char temp_dir[MAX_PATH]; char temp_filename[MAX_PATH]; char * ret; size_t suffix_length = 0; //Get the temp filename // eg C:\Users\\AppData\Local\Temp\ if (GetTempPath(MAX_PATH, temp_dir) == 0) return NULL; // eg C:\Users\\AppData\Local\Temp\fuzD828.tmp GetTempFileName(temp_dir, "fuzzfile", 0, temp_filename); //Add the suffix and convert it to a useable format if (suffix) suffix_length = strlen(suffix); ret = (char *)malloc(MAX_PATH + suffix_length); if (!ret) return NULL; memset(ret, 0, MAX_PATH + suffix_length); strncpy(ret, temp_filename, MAX_PATH); unlink(ret); //Cleanup the file without the extension that GetTempFileName generated if(suffix) strncat(ret, suffix, MAX_PATH + suffix_length); // eg C:\Users\\AppData\Local\Temp\fuzFEAD.tmp.txt #else // on macOS we can use $TMPDIR. ubuntu doesn't seem to have one, stackoverflow recommends // /tmp. /dev/shm might be a better option, because it's a tmpfs (doesn't write to disk) // but i suspect it's less portable to other *nixes. char temp_filename[] = "/tmp/fuzzfileXXXXXX"; // X's required for mktemp char * ret; size_t suffix_length = 0; // mktemp is unsafe, but i'm not sure what the threat model is. // for ours, it might be sufficient. // alternatively, we can mkstemp, but that will also create a file // (as is happening in the windows version of the code) and requires deletion. // that's probably as simple as an unlink(), but it's almost certainly slower. mktemp(temp_filename); if (suffix) suffix_length = strlen(suffix); ret = (char *)malloc(MAX_PATH + suffix_length); if (!ret) return NULL; memset(ret, 0, MAX_PATH + suffix_length); strncpy(ret, temp_filename, MAX_PATH); if(suffix) strncat(ret, suffix, MAX_PATH + suffix_length); #endif return ret; } /** * Determines whether a file exists or not * @param path - The path of the file to check for existence * @return - 1 if the file exists, 0 otherwise */ UTILS_API int file_exists(char * path) { return !access(path,F_OK); } /** * This function writes a buffer to the specified file. * @param filename - The filename to write the buffer to * @param buffer - The buffer to write * @param length - THe length of the buffer parameter * @param return - 0 on success, non-zero otherwise */ UTILS_API int write_buffer_to_file(char * filename, char * buffer, size_t length) { int num_written; size_t total = 0; FILE * fp = NULL; int error = EACCES; #ifdef _WIN32 //On Windows, we need to do this in a loop, since we may //need to wait for a process to stop holding this file while (!fp && error == EACCES) { fp = fopen(filename, "wb+"); error = errno; } #else fp = fopen(filename, "wb+"); #endif if (!fp) return -1; while (total < length) { num_written = fwrite(buffer + total, 1, length - total, fp); if (num_written < 0 && errno != EAGAIN && errno != EINTR) break; else if (num_written > 0) total += num_written; } fclose(fp); return total != length; } /** * This function takes a relative path representing a location relative to the * running binary (note: NOT the working directory) and returns the * corresponding absolute path, if that path exists in the filesystem. * * @param relative_path - a char * pointing to the path relative to the executable * @return - NULL on error or nonexistent path, or a char * pointing to a * newly-allocated buffer containing the absolute path. The caller should free the * returned buffer */ UTILS_API char * filename_relative_to_binary_dir(char * relative_path) { char exedir[2*MAX_PATH], temppath[MAX_PATH]; int len; // write full path into exedir #ifdef _WIN32 if (!GetModuleFileName(NULL, exedir, 2*MAX_PATH)) { return NULL; } PathRemoveFileSpec(exedir); // Cut off file name len = snprintf(temppath, MAX_PATH, "%s\\%s", exedir, relative_path); #elif __APPLE__ unsigned int bufsize = sizeof(exedir) + 1; if (_NSGetExecutablePath(exedir, &bufsize) != 0) return NULL; realpath(exedir, temppath); dirname_r(temppath, temppath); // Cut off file name len = snprintf(temppath, MAX_PATH, "%s/%s", temppath, relative_path); #else if ((len = readlink("/proc/self/exe", exedir, MAX_PATH)) < 0) return NULL; exedir[len] = 0; //readlink doesn't null terminate dirname(exedir); // Cut off file name len = snprintf(temppath, MAX_PATH, "%s/%s", exedir, relative_path); #endif if (len == MAX_PATH) { return NULL; } if (!file_exists(temppath)) { return NULL; } return strdup(temppath); } /** * Calculates the MD5 hash of a buffer and return the value as a hexstring. * Taken from https://gist.github.com/creationix/4710780 * @param buffer - The buffer to calculate the md5 hash on * @param buffer_length - the length of the buffer parameter * @param output - a buffer to record the md5 hash to. * @param output_size - the length of the output parameter */ void md5(uint8_t *buffer, size_t buffer_length, char * output, size_t output_size) { // leftrotate function definition #define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c)))) // These vars will contain the hash uint32_t h0, h1, h2, h3; // Message (to prepare) uint8_t *msg = NULL; // Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating // r specifies the per-round shift amounts uint32_t r[] = { 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21 }; // Use binary integer part of the sines of integers (in radians) as constants// Initialize variables: uint32_t k[] = { 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 }; h0 = 0x67452301; h1 = 0xefcdab89; h2 = 0x98badcfe; h3 = 0x10325476; // Pre-processing: adding a single 1 bit //append "1" bit to message /* Notice: the input bytes are considered as bits strings, where the first bit is the most significant bit of the byte.[37] */ // Pre-processing: padding with zeros //append "0" bit until message length in bit = 448 (mod 512) //append length mod (2 pow 64) to message int new_len; for (new_len = buffer_length * 8 + 1; new_len % 512 != 448; new_len++); new_len /= 8; msg = (uint8_t *)calloc(new_len + 64, 1); // also appends "0" bits // (we alloc also 64 extra bytes...) memcpy(msg, buffer, buffer_length); msg[buffer_length] = 128; // write the "1" bit uint32_t bits_len = 8 * buffer_length; // note, we append the len memcpy(msg + new_len, &bits_len, 4); // in bits at the end of the buffer // Process the message in successive 512-bit chunks: int offset; for (offset = 0; offset 0) \ dest = temp; /** * This function takes a JSON string of logging options and sets up the desired * logging state. * * @param log_options - a JSON string of logging options. For the default options, * NULL can be provided * @return - zero on success, non-zero on error */ UTILS_API int setup_logging(const char * log_options) { int temp_int, result; char * temp_str, * filename = NULL; if (logging.initialized) return 0; if (log_options) { GET_OPTIONAL_ARG(temp_int, log_options, logging.level, "level", result, get_int_options); GET_OPTIONAL_ARG(temp_int, log_options, logging.stdout_on, "stdout", result, get_int_options); GET_OPTIONAL_ARG(temp_int, log_options, logging.file_on, "file", result, get_int_options); GET_OPTIONAL_ARG(temp_str, log_options, filename, "filename", result, get_string_options); } if (logging.file_on) { if (!filename) filename = strdup("killerbeez.log"); logging.log_file = fopen(filename, "a+"); if (!logging.log_file) { #ifdef _WIN32 printf("[LOGGING] ERROR: Failed to open file %s. GetLastError %d", filename, GetLastError()); #else printf("[LOGGING] ERROR: Failed to open file %s. errno %d", filename, errno); #endif return 1; } free(filename); } logging.initialized = 1; INFO_MSG("Logging Started"); return 0; } /** * This function takes a log level, a printf style format string, and printf style * arguments and outputs the message to any of the configured loggers. Prior to * calling this function, logging must be initialized via the setup_logging * function prior to any calls to log_msg. If the specified level is FATAL or * above, log_msg will exit(1) immediately after logging the specified message. * * @param level - the log level of the message to log * @param msg - a printf style format string to log * @param ... - printf style arguments to log * @return - zero on success, non-zero on error */ UTILS_API int log_msg(enum LOG_LEVEL level, const char * msg, ...) { va_list args, temp_args; struct tm new_time; time_t aclock; char time_buf[64]; if (!logging.initialized) return 1; if (level < logging.level) return 0; time(&aclock); #ifdef _WIN32 localtime_s(&new_time, &aclock); if (asctime_s(time_buf, sizeof(time_buf), &new_time)) #else localtime_r(&aclock, &new_time); if (!asctime_r(&new_time, time_buf)) #endif { //If we couldn't get the time, NULL out time_buf, so we don't print garbage strncpy(time_buf, "TIME FAILURE", sizeof(time_buf)); } else //asctime appends a newline to the end of the buffer, time_buf[strlen(time_buf) - 1] = 0; //remove it va_start(args, msg); if (logging.stdout_on) { va_copy(temp_args, args); fprintf(stdout, "%s - %-8s - ", time_buf, log_level_names[level]); vprintf(msg, temp_args); fwrite("\n", 1, 1, stdout); fflush(stdout); va_end(temp_args); } if (logging.file_on) { va_copy(temp_args, args); fprintf(logging.log_file, "%s - %-8s - ", time_buf, log_level_names[level]); vfprintf(logging.log_file, msg, temp_args); fwrite("\n", 1, 1, logging.log_file); fflush(logging.log_file); va_end(temp_args); } va_end(args); //If the message is FATAL, we should die after logging if (level >= FATAL) exit(1); return 0; } /** * Reads a file from disk * @param filename - The filename of the file to read * @param buffer - A pointer to a character buffer that will be assigned a newly allocated * buffer to hold the file contents. The caller should free this buffer. * @return - -1 on failure, otherwise the number of bytes read from the file */ UTILS_API int read_file(char * filename, char **buffer) { FILE *fp; long fsize, total = 0, num_read; *buffer = NULL; fp = fopen(filename, "rb"); if (!fp) return -1; //Get the size fseek(fp, 0, SEEK_END); fsize = ftell(fp); fseek(fp, 0, SEEK_SET); *buffer = (char *)malloc(fsize + 1); if (!*buffer) { fclose(fp); return -1; } (*buffer)[fsize] = 0; //NULL terminate in case the caller wants to use it as a string while (total < fsize) { num_read = fread(*buffer + total, 1, fsize, fp); total += num_read; } fclose(fp); return fsize; } /** * This function prints a data buffer in hex * @param data - a char * data buffer * @param size - the size of the data buffer * @return none */ UTILS_API void print_hex(char * data, size_t size) { unsigned char *p = (unsigned char *)data; for (size_t i = 0; i 0) total_written += result; else if (result < 0 && errno != EAGAIN) //Error, then break break; } close(pipes[1]); // If the child stopped accepting input (write failed) if(total_written != input_length) { kill(child_pid, 9); wait(&status); return 1; } free(executable); for(i = 0; argv[i]; i++) free(argv[i]); free(argv); *process_out = child_pid; return 0; } #endif //!_WIN32 ================================================ FILE: utils/utils.h ================================================ #pragma once #ifdef _WIN32 #include #else #include #include #include #ifdef __APPLE__ #include #else #include #endif #endif #include #include #ifdef _WIN32 #if defined(UTILS_EXPORTS) #define UTILS_API __declspec(dllexport) #elif defined(UTILS_NO_IMPORT) #define UTILS_API #elif defined(__cplusplus) #define UTILS_API extern "C" __declspec(dllimport) #else #define UTILS_API #endif #else //_WIN32 #define UTILS_API #endif #ifndef MAX_PATH #define MAX_PATH PATH_MAX #endif #define FUZZ_ERROR -1 #define FUZZ_NONE 0 #define FUZZ_RUNNING 1 #define FUZZ_CRASH 2 #define FUZZ_HANG 3 #ifdef _WIN32 typedef HANDLE mutex_t; typedef HANDLE semaphore_t; #else typedef pthread_mutex_t * mutex_t; typedef sem_t * semaphore_t; #endif #ifdef _WIN32 UTILS_API int start_process_and_write_to_stdin(char * cmd_line, char * input, size_t input_length, HANDLE * process_out); UTILS_API int start_process_and_write_to_stdin_flags(char * cmd_line, char * input, size_t input_length, HANDLE * process_out, DWORD creation_flags); UTILS_API int start_process_and_write_to_stdin_and_save_pipes_timeout(char * cmd_line, char * input, size_t input_length, HANDLE * process_out, HANDLE * pipe_rd_ptr, HANDLE * pipe_wr_ptr, DWORD timeout_ms); UTILS_API int WriteToPipe(HANDLE process, HANDLE pipe_wr, HANDLE pipe_rd, char * input, size_t input_length, DWORD timeout_ms); UTILS_API int FlushPipe(HANDLE pipe_rd); UTILS_API wchar_t * convert_char_array_to_wchar(char * string, wchar_t * out_buffer); UTILS_API char * convert_wchar_array_to_char(wchar_t * string, char * out_buffer); UTILS_API int get_process_status(HANDLE process); #else UTILS_API int get_process_status(pid_t process); #endif UTILS_API char * get_temp_filename(char * suffix); UTILS_API int file_exists(char * path); UTILS_API int write_buffer_to_file(char * filename, char * buffer, size_t length); UTILS_API char * filename_relative_to_binary_dir(char * relative_path); UTILS_API int read_file(char * filename, char **buffer); UTILS_API void print_hex(char * data, size_t size); UTILS_API void md5(uint8_t *initial_msg, size_t initial_len, char * output, size_t output_size); UTILS_API void * memdup(void * src, size_t length); UTILS_API mutex_t create_mutex(void); UTILS_API int take_mutex(mutex_t mutex); UTILS_API int release_mutex(mutex_t mutex); UTILS_API void destroy_mutex(mutex_t mutex); UTILS_API semaphore_t create_semaphore(int initial, int max); UTILS_API int take_semaphore(semaphore_t semaphore); UTILS_API int release_semaphore(semaphore_t semaphore); UTILS_API void destroy_semaphore(semaphore_t semaphore); #ifndef _WIN32 UTILS_API int split_command_line(char * cmd_line, char ** executable, char ***argv); UTILS_API int start_process_and_write_to_stdin(char * cmd_line, char * input, size_t input_length, pid_t * process_out); #endif //Logging enum LOG_LEVEL { DEBUG, INFO, WARNING, ERROR_LEVEL, //ERROR is already taken CRITICAL, FATAL, MAX_LOG_LEVEL, }; #if defined(_DEBUG) #define DEBUG_MSG(msg, ...) log_msg(DEBUG, msg, ##__VA_ARGS__) #else #define DEBUG_MSG(msg, ...) #endif #define INFO_MSG(msg, ...) log_msg(INFO, msg, ##__VA_ARGS__) #define WARNING_MSG(msg, ...) log_msg(WARNING, msg, ##__VA_ARGS__) #define ERROR_MSG(msg, ...) log_msg(ERROR_LEVEL, msg, ##__VA_ARGS__) #define CRITICAL_MSG(msg, ...) log_msg(CRITICAL, msg, ##__VA_ARGS__) #define FATAL_MSG(msg, ...) log_msg(FATAL, msg, ##__VA_ARGS__) UTILS_API char * logging_help(void); UTILS_API int setup_logging(const char * log_options); UTILS_API int log_msg(enum LOG_LEVEL level, const char * msg, ...); //Argument parser helpers #define IF_ARG_OPTION(x, y) \ if(!strcmp(argv[i], x) && i+1 < argc) \ { \ y = argv[i + 1]; \ i++; \ } #define IF_ARGINT_OPTION(x, y) \ if(!strcmp(argv[i], x) && i+1 < argc) \ { \ y = atoi(argv[i + 1]); \ i++; \ } #define IF_ARGDOUBLE_OPTION(x, y) \ if(!strcmp(argv[i], x) && i+1 < argc) \ { \ y = atof(argv[i + 1]); \ i++; \ } #define IF_ARG_SET_TRUE(x, y) \ if(!strcmp(argv[i], x)) \ { \ y = 1; \ } #define ELSE_IF_ARG_OPTION(x, y) else IF_ARG_OPTION(x,y) #define ELSE_IF_ARGINT_OPTION(x, y) else IF_ARGINT_OPTION(x,y) #define ELSE_IF_ARGDOUBLE_OPTION(x, y) else IF_ARGDOUBLE_OPTION(x,y) #define ELSE_IF_ARG_SET_TRUE(x, y) else IF_ARG_SET_TRUE(x,y) /** * Get the number of items in an array */ #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*x)) ================================================ FILE: vagrant/README.md ================================================ These are configuration files and scripts for [Vagrant](https://www.vagrantup.com/). The scripts will install all necessary dependencies, checkout and build Killerbeez, and then run some basic tests to make sure it's working as expected. They are organized by Linux distribution and version. To use them, copy the shell scripts into the distribution you want to run, `cd` into that directory and run `vagrant up` (assuming Vagrant is installed and configured, obviously). These should work with any hypervisor, but they have been tested using VirtualBox as a back end. For example, to get Killerbeez running on Ubuntu 16.04 (xenial): ``` cp *.sh ubuntu/xenial cd ubuntu/xenial vagrant up ``` If you don't want to use Vagrant, the scripts in this directory should still help get you up and running on your own VM or on a bare-metal installation. The dependencies.sh script should be run as root, as it'll install the dependencies. The setup.sh script should be run as a normal user. ================================================ FILE: vagrant/ci_runner/Dockerfile ================================================ ARG SOURCE_IMG FROM ${SOURCE_IMG} COPY dependencies.sh /usr/bin/dependencies.sh RUN /usr/bin/dependencies.sh ================================================ FILE: vagrant/ci_runner/Vagrantfile ================================================ # -*- mode: ruby -*- # vi: set ft=ruby : # The following platforms are listed as having supported packages in the # gitlab-runner documentation # (https://docs.gitlab.com/runner/install/linux-repository.html). Thus we can # build runners for these platforms just by starting from the appropriate box # and installing the package. LINUX_BOXES = [ ["ubuntu", "16.04", "ubuntu/xenial64"], ["ubuntu", "18.04", "ubuntu/bionic64"], ["fedora", "29", "generic/fedora29"], ["debian", "8", "debian/jessie64"], ["debian", "9", "debian/stretch64"], ] # The following platforms do not have supported gitlab-runner packages # according to the webpage, so we build on them using the docker executor # instead. DOCKER_IMAGES = [ ["ubuntu", "14.04", "ubuntu:14.04"], ["ubuntu", "19.04", "ubuntu:19.04"], ["fedora", "30", "fedora:30"], ["debian", "10", "debian:10"], ] WINDOWS_BOXES = [ ["10", "x64", "2017", "Windows10x64en-noup"], ["10", "x64", "2019", "Windows10x64en-noup"], ] Vagrant.configure("2") do |config| LINUX_BOXES.each do |(distro, version, boxname)| hostname = "kb-#{distro}-#{version}" config.vm.define hostname do |config| config.vm.box = boxname config.vm.hostname = hostname.gsub(".", "-") # Settings for Virtualbox config.vm.provider "virtualbox" do |vb| vb.linked_clone = true end config.vm.provision "shell", inline: "mkdir -p /killerbeez; chown vagrant:vagrant /killerbeez" config.vm.provision "file", source: ".", destination: "/killerbeez" config.vm.provision "shell", path: "../dependencies.sh", name: "dependencies" config.vm.provision "shell", path: "runner.sh", privileged: false end end # Creates a VM for each of the docker platforms, containing a runner that # defaults to the configured docker image. Thus we can tag these runners # with the ${distro}-${version} like the above ones, and they will # work the same way from the perspective of the .gitlab-ci.yml file. Under # the hood they will run the configured script in a docker container of the # configured OS rather than on the host filesystem, though. DOCKER_IMAGES.each do |(distro, version, docker_image)| hostname = "kb-docker-#{distro}-#{version}" config.vm.define hostname do |config| config.vm.box = "ubuntu/bionic64" config.vm.hostname = hostname.gsub(".", "-") # Settings for Virtualbox config.vm.provider "virtualbox" do |vb| vb.linked_clone = true end config.vm.provision "shell", inline: "mkdir -p /killerbeez; chown vagrant:vagrant /killerbeez" config.vm.provision "file", source: ".", destination: "/killerbeez" config.vm.provision "file", source: "../dependencies.sh", destination: "/killerbeez/dependencies.sh" config.vm.provision "docker" do |d| d.build_image "/killerbeez", args: "-t kb-#{distro}-#{version} --build-arg SOURCE_IMG=#{docker_image}" d.run "gitlab-runner", image: "gitlab/gitlab-runner", args: "-v /srv/gitlab-runner/config:/etc/gitlab-runner \ -v /var/run/docker.sock:/var/run/docker.sock \ -v /killerbeez:/killerbeez" end config.vm.provision "shell", path: "register_docker_runner.sh", privileged: false, args: [distro, version] end end WINDOWS_BOXES.each do |(version, bitness, vs_version, boxname)| hostname = "kb-windows-#{version}-#{bitness}-vs#{vs_version}" config.vm.define hostname do | config| config.vm.box = boxname config.vm.hostname = hostname.gsub(".", "-") # Settings for Virtualbox config.vm.provider "virtualbox" do |vb| vb.linked_clone = true end config.vm.communicator = "winrm" config.vm.guest = :windows config.windows.halt_timeout = 15 # RDP port for debugging config.vm.network :forwarded_port, guest: 3389, host: 3389, auto_correct: true config.vm.provision "file", source: ".", destination: "C:\\killerbeez" config.vm.provision "shell", path: "../../tools/setup_build_env.ps1", args: ["-build_env", "C:\\killerbeez", "-vs_version", vs_version] config.vm.provision "shell", path: "runner.ps1", args: ["-version", version, "-bitness", bitness, "-vs_version", vs_version] end end end ================================================ FILE: vagrant/ci_runner/register_docker_runner.sh ================================================ # TODO: exit early if already registered DISTRO=$1 VERSION=$2 # Register the runner RUNNER_NAME=kb-docker-${DISTRO}-${VERSION} RUNNER_TAG_LIST=${DISTRO},${DISTRO}-${VERSION} DOCKER_IMAGE=kb-${DISTRO}-${VERSION} # Check if already registered sudo docker run \ --rm \ -v /srv/gitlab-runner/config:/etc/gitlab-runner \ -v /var/run/docker.sock:/var/run/docker.sock \ -v /killerbeez:/killerbeez \ gitlab/gitlab-runner list |& grep -qF "$RUNNER_NAME" if [[ $? -eq 0 ]]; then echo "$RUNNER_NAME already registered, skipping registration" exit 0 fi sudo docker run \ --rm \ -e RUNNER_NAME=$RUNNER_NAME \ -e RUNNER_TAG_LIST=$RUNNER_TAG_LIST \ -e RUNNER_EXECUTOR=docker \ -e DOCKER_IMAGE=$DOCKER_IMAGE \ -e DOCKER_PULL_POLICY=never \ --env-file /killerbeez/runner_vars \ -v /srv/gitlab-runner/config:/etc/gitlab-runner \ -v /var/run/docker.sock:/var/run/docker.sock \ -v /killerbeez:/killerbeez \ gitlab/gitlab-runner register ================================================ FILE: vagrant/ci_runner/runner.ps1 ================================================ param($version, $bitness, $vs_version) Set-PSDebug -Trace 1 $name = "kb-windows-$version-$bitness-vs$vs_version" pushd C:\Gitlab-Runner # Quit if already registered if (& C:\Gitlab-Runner\gitlab-runner-windows-amd64.exe list | Select-String -Quiet $name) { exit 0 } foreach($line in (Get-Content /killerbeez/runner_vars)) { $key, $val = $line.Split("=") New-Item -Name $key -Value $val -ItemType Variable -Path Env: -Force } $env:RUNNER_EXECUTOR = "shell" $env:RUNNER_SHELL = "cmd" # Workaround for https://gitlab.com/gitlab-org/gitlab-runner/issues/4814 $env:RUNNER_NAME = $name $env:RUNNER_TAG_LIST = "windows,windows-$version,$bitness,vs$vs_version" & C:\Gitlab-Runner\gitlab-runner-windows-amd64.exe register & C:\Gitlab-Runner\gitlab-runner-windows-amd64.exe install & C:\Gitlab-Runner\gitlab-runner-windows-amd64.exe start popd ================================================ FILE: vagrant/ci_runner/runner.sh ================================================ distro=`grep '^ID=' /etc/os-release | sed -e 's/.*=//g'` version=`grep 'VERSION_ID=' /etc/os-release | sed -e 's/"$//g' -e 's/.*"//g' -e 's/^VERSION_ID=//g'` # Set up repository if [[ "$distro" == "debian" || "$distro" == "ubuntu" ]]; then # For Debian/Ubuntu/Mint curl -L https://packages.gitlab.com/install/repositories/runner/gitlab-runner/script.deb.sh | sudo bash elif [[ "$distro" == "fedora" ]]; then # For RHEL/CentOS/Fedora curl -L https://packages.gitlab.com/install/repositories/runner/gitlab-runner/script.rpm.sh | sudo bash fi # Create apt pin for debian if [[ "$distro" == "debian" ]]; then cat < /* we use direct map cache to avoid locking */ static inline void global_module_cache_add(module_entry_t **cache, module_entry_t *entry) { cache[entry->id % NUM_GLOBAL_MODULE_CACHE] = entry; } /* Maintains LRU order in thread-private caches. A new/recent entry is moved to * the front, and all other entries are shifted back to make place. For new * entries, shifting results in the oldest entry being discarded. */ static inline void thread_module_cache_adjust(module_entry_t **cache, module_entry_t *entry, uint pos, uint max_pos) { uint i; ASSERT(pos < max_pos, "wrong pos"); for (i = pos; i > 0; i--) cache[i] = cache[i-1]; cache[0] = entry; } static inline void thread_module_cache_add(module_entry_t **cache, uint cache_size, module_entry_t *entry) { thread_module_cache_adjust(cache, entry, cache_size - 1, cache_size); } static void module_table_entry_free(void *entry) { dr_free_module_data(((module_entry_t *)entry)->data); dr_global_free(entry, sizeof(module_entry_t)); } void module_table_load(module_table_t *table, const module_data_t *data) { module_entry_t *entry = NULL; module_data_t *mod; int i; /* Some apps repeatedly unload and reload the same module, * so we will try to re-use the old one. */ ASSERT(data != NULL, "data must not be NULL"); drvector_lock(&table->vector); /* Assuming most recently loaded entries are most likely to be unloaded, * we iterate the module table in a backward way for better performance. */ for (i = table->vector.entries-1; i >= 0; i--) { entry = drvector_get_entry(&table->vector, i); mod = entry->data; if (entry->unload && /* If the same module is re-loaded at the same address, * we will try to use the existing entry. */ mod->start == data->start && mod->end == data->end && mod->entry_point == data->entry_point && #ifdef WINDOWS mod->checksum == data->checksum && mod->timestamp == data->timestamp && #endif /* If a module w/ no name (there are some) is loaded, we will * keep making new entries. */ dr_module_preferred_name(data) != NULL && dr_module_preferred_name(mod) != NULL && strcmp(dr_module_preferred_name(data), dr_module_preferred_name(mod)) == 0) { entry->unload = false; break; } entry = NULL; } if (entry == NULL) { entry = dr_global_alloc(sizeof(*entry)); entry->id = table->vector.entries; entry->unload = false; entry->data = dr_copy_module_data(data); drvector_append(&table->vector, entry); } drvector_unlock(&table->vector); global_module_cache_add(table->cache, entry); } static inline bool pc_is_in_module(module_entry_t *entry, app_pc pc) { if (entry != NULL && !entry->unload && entry->data != NULL) { module_data_t *mod = entry->data; if (pc >= mod->start && pc < mod->end) return true; } return false; } module_entry_t * module_table_lookup(module_entry_t **cache, int cache_size, module_table_t *table, app_pc pc) { module_entry_t *entry; int i; /* We assume we never change an entry's data field, even on unload, * and thus it is ok to check its value without a lock. */ /* lookup thread module cache */ if (cache != NULL) { for (i = 0; i < cache_size; i++) { entry = cache[i]; if (pc_is_in_module(entry, pc)) { if (i > 0) thread_module_cache_adjust(cache, entry, i, cache_size); return entry; } } } /* lookup global module cache */ /* we use a direct map cache, so it is ok to access it without lock */ for (i = 0; i < NUM_GLOBAL_MODULE_CACHE; i++) { entry = table->cache[i]; if (pc_is_in_module(entry, pc)) return entry; } /* lookup module table */ entry = NULL; drvector_lock(&table->vector); for (i = table->vector.entries - 1; i >= 0; i--) { entry = drvector_get_entry(&table->vector, i); ASSERT(entry != NULL, "fail to get module entry"); if (pc_is_in_module(entry, pc)) { global_module_cache_add(table->cache, entry); if (cache != NULL) thread_module_cache_add(cache, cache_size, entry); break; } entry = NULL; } drvector_unlock(&table->vector); return entry; } void module_table_unload(module_table_t *table, const module_data_t *data) { module_entry_t *entry = module_table_lookup(NULL, 0, table, data->start); if (entry != NULL) { entry->unload = true; } else { ASSERT(false, "fail to find the module to be unloaded"); } } /* assuming caller holds the lock */ void module_table_entry_print(module_entry_t *entry, file_t log, bool print_all_info) { const char *name; module_data_t *data; const char *full_path = ""; data = entry->data; name = dr_module_preferred_name(data); if (data->full_path != NULL && data->full_path[0] != '\0') full_path = data->full_path; if (print_all_info) { dr_fprintf(log, "%3u, "PFX", "PFX", "PFX", %s, %s", entry->id, data->start, data->end, data->entry_point, (name == NULL || name[0] == '\0') ? "" : name, full_path); #ifdef WINDOWS dr_fprintf(log, ", 0x%08x, 0x%08x", data->checksum, data->timestamp); #endif /* WINDOWS */ dr_fprintf(log, "\n"); } else { dr_fprintf(log, " %u, %llu, %s\n", entry->id, (uint64)(data->end - data->start), full_path); } } void module_table_print(module_table_t *table, file_t log, bool print_all_info) { uint i; module_entry_t *entry; if (log == INVALID_FILE) { /* It is possible that failure on log file creation is caused by the * running process not having enough privilege, so this is not a * release-build fatal error */ ASSERT(false, "invalid log file"); return; } drvector_lock(&table->vector); dr_fprintf(log, "Module Table: %u\n", table->vector.entries); if (print_all_info) { dr_fprintf(log, "Module Table: id, base, end, entry, unload, name, path"); #ifdef WINDOWS dr_fprintf(log, ", checksum, timestamp"); #endif dr_fprintf(log, "\n"); } for (i = 0; i < table->vector.entries; i++) { entry = drvector_get_entry(&table->vector, i); module_table_entry_print(entry, log, print_all_info); } drvector_unlock(&table->vector); } module_table_t * module_table_create() { module_table_t *table = dr_global_alloc(sizeof(*table)); memset(table->cache, 0, sizeof(table->cache)); drvector_init(&table->vector, 16, false, module_table_entry_free); return table; } void module_table_destroy(module_table_t *table) { drvector_delete(&table->vector); dr_global_free(table, sizeof(*table)); } ================================================ FILE: winafl/modules.h ================================================ /* *************************************************************************** * Copyright (c) 2012-2013 Google, Inc. All rights reserved. * ***************************************************************************/ /* * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * * Neither the name of Google, Inc. nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE, INC. OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * This file has been modified from the original to suit the purposes of this * project. */ /* Utilities for keeping track of (un)loaded modules in DynamoRIO clients. Copied from the DyanmoRIO project, http://dynamorio.org/ */ #ifndef CLIENTS_COMMON_MODULES_H_ #define CLIENTS_COMMON_MODULES_H_ #include #include #define NUM_GLOBAL_MODULE_CACHE 8 typedef struct _module_entry_t { int id; bool unload; /* if the module is unloaded */ module_data_t *data; } module_entry_t; typedef struct _module_table_t { drvector_t vector; /* for quick query without lock, assuming pointer-aligned */ module_entry_t *cache[NUM_GLOBAL_MODULE_CACHE]; } module_table_t; void module_table_load(module_table_t *table, const module_data_t *data); /* To avoid data race, proper sychronization on module table is required for * accessing module table entry. */ module_entry_t * module_table_lookup(module_entry_t **cache, int cache_size, module_table_t *table, app_pc pc); /* To avoid data race, proper sychronization on module table is required for * accessing module table entry. */ void module_table_entry_print(module_entry_t *entry, file_t log, bool print_all_info); void module_table_unload(module_table_t *table, const module_data_t *data); void module_table_print(module_table_t *table, file_t log, bool print_all_info); module_table_t * module_table_create(); void module_table_destroy(module_table_t *table); #endif /* CLIENTS_COMMON_MODULES_H_ */ ================================================ FILE: winafl/utils.h ================================================ /* *************************************************************************** * Copyright (c) 2012-2013 Google, Inc. All rights reserved. * ***************************************************************************/ /* * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * * Neither the name of Google, Inc. nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE, INC. OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * This file has been modified from the original to suit the purposes of this * project. */ /* DynamoRIO utility macros. Copied from the DyanmoRIO project, http://dynamorio.org/ */ #ifndef CLIENTS_COMMON_UTILS_H_ #define CLIENTS_COMMON_UTILS_H_ #include #ifdef DEBUG # define ASSERT(x, msg) DR_ASSERT_MSG(x, msg) # define IF_DEBUG(x) x #else # define ASSERT(x, msg) /* nothing */ # define IF_DEBUG(x) /* nothing */ #endif /* XXX: should be moved to DR API headers? */ #define BUFFER_SIZE_BYTES(buf) sizeof(buf) #define BUFFER_SIZE_ELEMENTS(buf) (BUFFER_SIZE_BYTES(buf) / sizeof((buf)[0])) #define BUFFER_LAST_ELEMENT(buf) (buf)[BUFFER_SIZE_ELEMENTS(buf) - 1] #define NULL_TERMINATE_BUFFER(buf) BUFFER_LAST_ELEMENT(buf) = 0 #define ALIGNED(x, alignment) ((((ptr_uint_t)x) & ((alignment)-1)) == 0) #define TESTANY(mask, var) (((mask) & (var)) != 0) #define TEST TESTANY #ifdef WINDOWS # define IF_WINDOWS(x) x # define IF_UNIX_ELSE(x,y) y #else # define IF_WINDOWS(x) # define IF_UNIX_ELSE(x,y) x #endif /* Checks for both debug and release builds: */ #define USAGE_CHECK(x, msg) DR_ASSERT_MSG(x, msg) static inline generic_func_t cast_to_func(void *p) { #ifdef WINDOWS # pragma warning(push) # pragma warning(disable : 4055) #endif return (generic_func_t) p; #ifdef WINDOWS # pragma warning(pop) #endif } #endif /* CLIENTS_COMMON_UTILS_H_ */ ================================================ FILE: winafl/winafl.c ================================================ /* WinAFL - DynamoRIO client (instrumentation) code ------------------------------------------------ Written and maintained by Ivan Fratric Copyright 2016 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. This file has been modified from the original to suit the purposes of this project. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "modules.h" #include "utils.h" #include #define NOTIFY(level, fmt, ...) do { \ if (verbose >= (level)) \ dr_fprintf(STDERR, fmt, __VA_ARGS__); \ } while (0) ////////////////////////////////////////////////////////////////////////////////////// // Enums and Struct Definitions ////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////// #define UNKNOWN_MODULE_ID USHRT_MAX #define OPTION_MAX_LENGTH MAXIMUM_PATH #define COVERAGE_BB 0 #define COVERAGE_EDGE 1 #define NUM_THREAD_MODULE_CACHE 8 typedef struct _target_module_t { char module_name[MAXIMUM_PATH]; int index; struct _target_module_t *next; unsigned char *afl_area; } target_module_t; typedef struct _winafl_option_t { /* Use nudge to notify the process for termination so that * event_exit will be called. */ bool verbose_edges; bool nudge_kills; bool debug_mode; bool write_log; int coverage_kind; char logdir[MAXIMUM_PATH]; target_module_t *target_modules; char fuzz_module[MAXIMUM_PATH]; char fuzz_method[MAXIMUM_PATH]; char pipe_name[MAXIMUM_PATH]; char shm_name[MAXIMUM_PATH]; unsigned long fuzz_offset; int fuzz_iterations; void **func_args; int num_fuz_args; drwrap_callconv_t callconv; bool thread_coverage; bool per_module_coverage; } winafl_option_t; typedef struct _winafl_data_t { module_entry_t *cache[NUM_THREAD_MODULE_CACHE]; file_t log; bool instrumentation_enabled; bool exception_hit; //Because we instrument the code once, and multiple threads //all access that code. We point the instrumented code at this area //for threads we don't care to record the coverage info for (when //thread_coverage is on). unsigned char *fake_afl_area; //The real coverage info area (when per-module coverage is off) unsigned char *afl_area; } winafl_data_t; typedef struct _debug_data_t { int pre_hanlder_called; int post_handler_called; } debug_data_t; typedef struct _fuzz_target_t { reg_t xsp; /* stack level at entry to the fuzz target */ app_pc func_pc; int iteration; } fuzz_target_t; enum { NUDGE_TERMINATE_PROCESS = 1, NUDGE_DONE_PROCESSING_INPUT = 2, }; ////////////////////////////////////////////////////////////////////////////////////// // Global Variables ////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////// static uint verbose; static winafl_option_t options; static winafl_data_t winafl_data; static int winafl_tls_field; static fuzz_target_t fuzz_target; static debug_data_t debug_data; static module_table_t *module_table; static client_id_t client_id; static volatile bool go_native; static HANDLE pipe = NULL; ////////////////////////////////////////////////////////////////////////////////////// // Function Prototypes /////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////// static void event_exit(void); static void event_thread_exit(void *drcontext); static void setup_shm_and_tls_regions_for_coverage(void *drcontext); static void read_start_fuzz_command(); ////////////////////////////////////////////////////////////////////////////////////// // Function Definitions ////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////// /** * This function calculates the number of modules in a linked list of modules. * @param target_module - a linked list of modules * @return - the number of modules in the specified linked list */ static int get_target_modules_length(target_module_t * target_module) { int count = 0; while (target_module) { count++; target_module = target_module->next; } return count; } /** * This function is the registered nudge handler. It will handle DynamoRIO's nudges. * @param drcontext - a pointer to the input program's machine context. This parameter should not be * inspected or modified, and should only be used to be passed to the DynamoRIO API routines. * @param argument - The argument that the nudging process gave */ static void event_nudge(void *drcontext, uint64 argument) { int nudge_arg = (int)argument; int exit_arg = (int)(argument >> 32); char buffer[200]; if (nudge_arg == NUDGE_TERMINATE_PROCESS) { static int nudge_term_count; /* handle multiple from both NtTerminateProcess and NtTerminateJobObject */ uint count = dr_atomic_add32_return_sum(&nudge_term_count, 1); if (count == 1) { dr_exit_process(exit_arg); } } else if (nudge_arg == NUDGE_DONE_PROCESSING_INPUT) { } else { snprintf(buffer, sizeof(buffer), "Unknown nudge argument: %d", nudge_arg); DR_ASSERT_MSG(false, buffer); } } /** * This function cleans up the winafl instrumentation prior to the process being killed * @param pid - the pid of the process about to be killed * @param exit_code - The exit code of the process about to be killed * @return - whether to skip the termination action by the application: i.e., true indicates * to skip it (the usual case) and false indicates to continue with the application action */ static bool event_soft_kill(process_id_t pid, int exit_code) { /* we pass [exit_code, NUDGE_TERMINATE_PROCESS] to target process */ dr_config_status_t res; res = dr_nudge_client_ex(pid, client_id, NUDGE_TERMINATE_PROCESS | (uint64)exit_code << 32, 0); if (res == DR_SUCCESS) { /* skip syscall since target will terminate itself */ return true; } /* else failed b/c target not under DR control or maybe some other * error: let syscall go through */ return false; } /** * This function writes the afl bitmap of edges to the log file */ static void dump_winafl_data() { dr_write_file(winafl_data.log, winafl_data.afl_area, MAP_SIZE); } /** * This function is the registered exception handler. It will handle exceptions passed to it by DynamoRIO, * whenever the instrumented application throws an exception. * @param drcontext - a pointer to the input program's machine context. This parameter should not be * inspected or modified, and should only be used to be passed to the DynamoRIO API routines. * @param excpt - A DynamoRIO exceptions struct that contains details of the exception was generated. * @return - whether the exception should be passed on to the client */ static bool onexception(void *drcontext, dr_exception_t *excpt) { DWORD num_written; DWORD exception_code = excpt->record->ExceptionCode; if (options.debug_mode || options.write_log) dr_fprintf(winafl_data.log, "Exception caught: %x\n", exception_code); if ((exception_code == EXCEPTION_ACCESS_VIOLATION) || (exception_code == EXCEPTION_ILLEGAL_INSTRUCTION) || (exception_code == EXCEPTION_PRIV_INSTRUCTION) || (exception_code == STATUS_HEAP_CORRUPTION) || (exception_code == EXCEPTION_STACK_OVERFLOW)) { if (options.debug_mode) { dr_fprintf(winafl_data.log, "crashed\n"); } else { winafl_data.exception_hit = true; WriteFile(pipe, "C", 1, &num_written, NULL); } dr_exit_process(1); } return true; } /** * This function is the callback for a new thread being created by the instrumented application. * @param drcontext - a pointer to the input program's machine context. This parameter should not be * inspected or modified, and should only be used to be passed to the DynamoRIO API routines. */ static void event_thread_init(void *drcontext) { void **thread_data; int i, num_modules, size; //Calculate size of tls data num_modules = get_target_modules_length(options.target_modules); size = (num_modules + 1) * sizeof(void *); //Allocate tls data thread_data = (void **)dr_thread_alloc(drcontext, size); memset(thread_data, 0, size); if (options.thread_coverage) { if (options.per_module_coverage) { for (i = 0; i < num_modules; i++) thread_data[i + 1] = winafl_data.fake_afl_area; } else thread_data[1] = winafl_data.fake_afl_area; } drmgr_set_tls_field(drcontext, winafl_tls_field, thread_data); //If we haven't set a target module, then just enable instrumentation now if (!options.fuzz_module[0]) { if (!winafl_data.instrumentation_enabled) { winafl_data.instrumentation_enabled = true; read_start_fuzz_command(); } setup_shm_and_tls_regions_for_coverage(drcontext); } } /** * This function is the callback for a thread exiting in the instrumented application. * @param drcontext - a pointer to the input program's machine context. This parameter should not be * inspected or modified, and should only be used to be passed to the DynamoRIO API routines. */ static void event_thread_exit(void *drcontext) { int num_modules, size; void *data; //Calculate size of tls data num_modules = get_target_modules_length(options.target_modules); size = (num_modules + 1) * sizeof(void *); //Free tls data data = drmgr_get_tls_field(drcontext, winafl_tls_field); dr_thread_free(drcontext, data, size); } /** * This function looks up a module in the target module linked list by the module name * @param module_name - the module to find in the target module linked list * @return - A pointer to the target_module_t struct describing the requested module, or NULL if the * module was not found */ static target_module_t * find_target_module(const char * module_name) { target_module_t * target_module = options.target_modules; while (target_module) { if (_stricmp(module_name, target_module->module_name) == 0) return target_module; target_module = target_module->next; } return NULL; } /** * This function adds a module to the linked list of target modules. * @param the name of the module to add */ static void add_target_module(const char * name) { target_module_t *target_modules; target_modules = options.target_modules; options.target_modules = (target_module_t *)dr_global_alloc(sizeof(target_module_t)); options.target_modules->index = get_target_modules_length(target_modules); options.target_modules->next = target_modules; strncpy(options.target_modules->module_name, name, BUFFER_SIZE_ELEMENTS(options.target_modules->module_name)); } /** * This function is a callback for DynamoRIO's instrumentation insertion phase. Depending on the module of the * passed in instruction, it will instrument the application's code to track the hit count for each * basic block. * @param drcontext - a pointer to the input program's machine context. This parameter should not be * inspected or modified, and should only be used to be passed to the DynamoRIO API routines. * @param tag - tag is a unique identifier for the basic block fragment * @param bb - tag is a unique identifier for the basic block fragment * @param inst - The current instruction being instrumented * @param for_trace - for_trace indicates whether this callback is for a new basic block (false) or for adding a * basic block to a trace being created (true). The client has the opportunity to either include the same modifications * made to the standalone basic block, or to use different modifications, for the code in the trace. * @param translating - whether this callback is for basic block creation (false) or is for address translation (true). * @param user_data - User data passed from the previous DynamoRIO instrumentation phase, currently unused * @return - emit flags that control the behavior of basic blocks and traces when emitted into the code cache */ static dr_emit_flags_t instrument_bb_coverage(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst, bool for_trace, bool translating, void *user_data) { app_pc start_pc; module_entry_t *mod_entry; const char *module_name; uint offset; target_module_t *target_module; unsigned char *afl_map; if (!drmgr_is_first_instr(drcontext, inst)) return DR_EMIT_DEFAULT; //Find the module start_pc = dr_fragment_app_pc(tag); mod_entry = module_table_lookup(winafl_data.cache, NUM_THREAD_MODULE_CACHE, module_table, start_pc); if (mod_entry == NULL || mod_entry->data == NULL) return DR_EMIT_DEFAULT; //Find the module in our list of target modules module_name = dr_module_preferred_name(mod_entry->data); target_module = find_target_module(module_name); if (!target_module) return DR_EMIT_DEFAULT; offset = (uint)(start_pc - mod_entry->data->start); if (options.write_log) dr_fprintf(winafl_data.log, "Instrumenting module %s for bb coverage at offset %lx\n", module_name, offset); offset &= MAP_SIZE - 1; drreg_reserve_aflags(drcontext, bb, inst); if (options.thread_coverage) { reg_id_t reg; opnd_t opnd1, opnd2; instr_t *new_instr; drreg_reserve_register(drcontext, bb, inst, NULL, ®); drmgr_insert_read_tls_field(drcontext, winafl_tls_field, bb, inst, reg); opnd1 = opnd_create_reg(reg); if (options.per_module_coverage) opnd2 = OPND_CREATE_MEMPTR(reg, (target_module->index + 1) * sizeof(void *)); else opnd2 = OPND_CREATE_MEMPTR(reg, sizeof(void *)); new_instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); opnd1 = OPND_CREATE_MEM8(reg, offset); new_instr = INSTR_CREATE_inc(drcontext, opnd1); instrlist_meta_preinsert(bb, inst, new_instr); drreg_unreserve_register(drcontext, bb, inst, reg); } else { afl_map = winafl_data.afl_area; if (options.per_module_coverage) afl_map = target_module->afl_area; instrlist_meta_preinsert(bb, inst, INSTR_CREATE_inc(drcontext, OPND_CREATE_ABSMEM (&(afl_map[offset]), OPSZ_1))); } drreg_unreserve_aflags(drcontext, bb, inst); return DR_EMIT_DEFAULT; } /** * This function is a callback for DynamoRIO's instrumentation insertion phase. Depending on the module of the * passed in instruction, it will instrument the application's code to track the hit count for each * basic block edge. * @param drcontext - a pointer to the input program's machine context. This parameter should not be * inspected or modified, and should only be used to be passed to the DynamoRIO API routines. * @param tag - tag is a unique identifier for the basic block fragment * @param bb - tag is a unique identifier for the basic block fragment * @param inst - The current instruction being instrumented * @param for_trace - for_trace indicates whether this callback is for a new basic block (false) or for adding a * basic block to a trace being created (true). The client has the opportunity to either include the same modifications * made to the standalone basic block, or to use different modifications, for the code in the trace. * @param translating - whether this callback is for basic block creation (false) or is for address translation (true). * @param user_data - User data passed from the previous DynamoRIO instrumentation phase, currently unused * @return - emit flags that control the behavior of basic blocks and traces when emitted into the code cache */ static dr_emit_flags_t instrument_edge_coverage(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst, bool for_trace, bool translating, void *user_data) { static bool debug_information_output = false; app_pc start_pc; module_entry_t *mod_entry; reg_id_t reg, reg2, reg3; opnd_t opnd1, opnd2; instr_t *new_instr; const char *module_name; uint offset; target_module_t *target_module; if (!drmgr_is_first_instr(drcontext, inst)) return DR_EMIT_DEFAULT; //Find the module start_pc = dr_fragment_app_pc(tag); mod_entry = module_table_lookup(winafl_data.cache, NUM_THREAD_MODULE_CACHE, module_table, start_pc); if (mod_entry == NULL || mod_entry->data == NULL) return DR_EMIT_DEFAULT; //Find the module in our list of target modules module_name = dr_module_preferred_name(mod_entry->data); target_module = find_target_module(module_name); if (!target_module) return DR_EMIT_DEFAULT; offset = (uint)(start_pc - mod_entry->data->start); if (options.write_log) dr_fprintf(winafl_data.log, "Instrumenting module %s for edge coverage at offset %lx\n", module_name, offset); offset &= MAP_SIZE - 1; drreg_reserve_aflags(drcontext, bb, inst); drreg_reserve_register(drcontext, bb, inst, NULL, ®); drreg_reserve_register(drcontext, bb, inst, NULL, ®2); drreg_reserve_register(drcontext, bb, inst, NULL, ®3); //reg2 stores AFL area, reg 3 stores previous offset //load the pointer to previous offset in reg3 drmgr_insert_read_tls_field(drcontext, winafl_tls_field, bb, inst, reg3); //load address of shm into reg2 if (options.thread_coverage) { opnd1 = opnd_create_reg(reg2); if (options.per_module_coverage) opnd2 = OPND_CREATE_MEMPTR(reg3, (target_module->index + 1) * sizeof(void *)); else opnd2 = OPND_CREATE_MEMPTR(reg3, sizeof(void *)); new_instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); } else { opnd1 = opnd_create_reg(reg2); if(options.per_module_coverage) opnd2 = OPND_CREATE_INTPTR((uint64)target_module->afl_area); else opnd2 = OPND_CREATE_INTPTR((uint64)winafl_data.afl_area); new_instr = INSTR_CREATE_mov_imm(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); } //load previous offset into register opnd1 = opnd_create_reg(reg); opnd2 = OPND_CREATE_MEMPTR(reg3, 0); new_instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); //xor register with the new offset opnd1 = opnd_create_reg(reg); opnd2 = OPND_CREATE_INT32(offset); new_instr = INSTR_CREATE_xor(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); //increase the counter at reg (offset ^ previous) + reg2 (afl area) opnd1 = opnd_create_base_disp(reg2, reg, 1, 0, OPSZ_1); new_instr = INSTR_CREATE_inc(drcontext, opnd1); instrlist_meta_preinsert(bb, inst, new_instr); //store the new previous offset value offset = (offset >> 1) & (MAP_SIZE - 1); opnd1 = OPND_CREATE_MEMPTR(reg3, 0); opnd2 = OPND_CREATE_INT32(offset); new_instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); drreg_unreserve_register(drcontext, bb, inst, reg3); drreg_unreserve_register(drcontext, bb, inst, reg2); drreg_unreserve_register(drcontext, bb, inst, reg); drreg_unreserve_aflags(drcontext, bb, inst); return DR_EMIT_DEFAULT; } static dr_emit_flags_t instrument_verbose_edge_coverage(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst, bool for_trace, bool translating, void *user_data) { static bool debug_information_output = false; app_pc start_pc; module_entry_t *mod_entry; reg_id_t index_register, shm_reg, tls_reg, previous_reg; opnd_t opnd1, opnd2; instr_t *new_instr; const char *module_name; uint offset; target_module_t *target_module; if (!drmgr_is_first_instr(drcontext, inst)) return DR_EMIT_DEFAULT; //Find the module start_pc = dr_fragment_app_pc(tag); mod_entry = module_table_lookup(winafl_data.cache, NUM_THREAD_MODULE_CACHE, module_table, start_pc); if (mod_entry == NULL || mod_entry->data == NULL) return DR_EMIT_DEFAULT; //Find the module in our list of target modules module_name = dr_module_preferred_name(mod_entry->data); target_module = find_target_module(module_name); if (!target_module) return DR_EMIT_DEFAULT; offset = (uint)(start_pc - mod_entry->data->start); if(options.write_log) dr_fprintf(winafl_data.log, "Instrumenting module %s for verbose edge recording at offset %lx\n", module_name, offset); drreg_reserve_aflags(drcontext, bb, inst); drreg_reserve_register(drcontext, bb, inst, NULL, &index_register); //used to hold the offset to the current index in the from/to array drreg_reserve_register(drcontext, bb, inst, NULL, &previous_reg); //used to hold the previous basic block's offset drreg_reserve_register(drcontext, bb, inst, NULL, &shm_reg); //used to hold the pointer to the shm region drreg_reserve_register(drcontext, bb, inst, NULL, &tls_reg); //used to hold the pointer to the tls structure //shm_reg stores AFL area, previous_reg stores previous offset //the thread local area when in non-per module coverage mode is of the form: // void * previous_block_address; // void * shm pointer; // //the thread local area when in per module coverage mode is of the form: // void * previous_block_address; // void * shm pointer for first target module; // void * shm pointer for second target module; // ... // //And the shm area is of the form (depending on whether we're compiled to 32/64-bit): // uint32_t/uint64_t num_items // void * from1 // void * to1 // void * from2 // void * to2 // ... //load the address of the thread local storage in tls_reg drmgr_insert_read_tls_field(drcontext, winafl_tls_field, bb, inst, tls_reg); //Get the address of the shm region into shm_reg opnd1 = opnd_create_reg(shm_reg); if (options.per_module_coverage) opnd2 = OPND_CREATE_MEMPTR(tls_reg, (target_module->index + 1) * sizeof(void *)); else opnd2 = OPND_CREATE_MEMPTR(tls_reg, sizeof(void *)); new_instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); //load index to write to into a register and increment it atomically //First we set index_register to 1 opnd1 = opnd_create_reg(index_register); opnd2 = OPND_CREATE_INTPTR(1); new_instr = INSTR_CREATE_mov_imm(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); //Then we use xadd with a lock to atomically read the index and increment it opnd1 = OPND_CREATE_MEMPTR(shm_reg, 0); opnd2 = opnd_create_reg(index_register); new_instr = LOCK(INSTR_CREATE_xadd(drcontext, opnd1, opnd2)); instrlist_meta_preinsert(bb, inst, new_instr); //load previous offset into register opnd1 = opnd_create_reg(previous_reg); opnd2 = OPND_CREATE_MEMPTR(tls_reg, 0); new_instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); //times the index register by 2 opnd1 = opnd_create_reg(index_register); opnd2 = OPND_CREATE_INT8(1); new_instr = INSTR_CREATE_shl(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); //Now index_register contains the offset in the array to write to and previous_reg contains the previous basic block's offset //Let's write the previous basic basic block's edge to the array #ifdef _M_X64 opnd1 = opnd_create_base_disp(shm_reg, index_register, 8, 8, OPSZ_8); #else opnd1 = opnd_create_base_disp(shm_reg, index_register, 4, 4, OPSZ_4); #endif opnd2 = opnd_create_reg(previous_reg); new_instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); //Now let's write the current basic block's offset to the array #ifdef _M_X64 opnd1 = opnd_create_base_disp(shm_reg, index_register, 8, 0x10, OPSZ_8); opnd2 = OPND_CREATE_INT64(offset); #else opnd1 = opnd_create_base_disp(shm_reg, index_register, 4, 8, OPSZ_4); opnd2 = OPND_CREATE_INT32(offset); #endif new_instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); //store the current basic block's offset to the previous slot opnd1 = OPND_CREATE_MEMPTR(tls_reg, 0); #ifdef _M_X64 opnd2 = OPND_CREATE_INT64(offset); #else opnd2 = OPND_CREATE_INT32(offset); #endif new_instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); instrlist_meta_preinsert(bb, inst, new_instr); drreg_unreserve_register(drcontext, bb, inst, tls_reg); drreg_unreserve_register(drcontext, bb, inst, shm_reg); drreg_unreserve_register(drcontext, bb, inst, previous_reg); drreg_unreserve_register(drcontext, bb, inst, index_register); drreg_unreserve_aflags(drcontext, bb, inst); return DR_EMIT_DEFAULT; } /** * This function is called prior to the application's function being fuzzed. It records the application's state * (rsp and rip values), so that post_fuzz_handler can snap the application state back and continue fuzzing * without restarting the process. It also initializes the afl_area edges bitmap that will be used by the * basic block instrumentations. * @param wrapcxt - An opaque pointer to the DynamoRIO context that this function is called in. Should only * be passed to the DynamoRIO API routines. * @param user_data - User data field that can be assigned and passed on to the post_fuzz_handler. Currently unused. */ static void pre_fuzz_handler(void *wrapcxt, INOUT void **user_data) { int i; void *drcontext; if (options.debug_mode || options.write_log) dr_fprintf(winafl_data.log, "pre_fuzz_handler started\n"); app_pc target_to_fuzz = drwrap_get_func(wrapcxt); dr_mcontext_t *mc = drwrap_get_mcontext_ex(wrapcxt, DR_MC_ALL); drcontext = drwrap_get_drcontext(wrapcxt); //Save the PC and stack fuzz_target.xsp = mc->xsp; fuzz_target.func_pc = target_to_fuzz; //save or restore arguments if (fuzz_target.iteration == 0) { for (i = 0; i < options.num_fuz_args; i++) { options.func_args[i] = drwrap_get_arg(wrapcxt, i); } } else { for (i = 0; i < options.num_fuz_args; i++) { drwrap_set_arg(wrapcxt, i, options.func_args[i]); } } //Wait for the fuzzer to tell us to start read_start_fuzz_command(); //Setup the SHM and TLS regions before we start tracking coverage setup_shm_and_tls_regions_for_coverage(drcontext); if (options.debug_mode || options.write_log) dr_fprintf(winafl_data.log, "pre_fuzz_handler finished\n"); } static void read_start_fuzz_command() { char command = 0; DWORD num_read; char buffer[256]; //Wait for orders from the fuzzer if (!options.debug_mode) { DR_ASSERT_MSG(ReadFile(pipe, &command, 1, &num_read, NULL), "Failed to read from comms pipe"); dr_fprintf(winafl_data.log, "Got %c from pipe\n", command); if (command != 'F') { if (command == 'Q') { dr_exit_process(0); } else if (command != 0) { memset(buffer, 0, sizeof(buffer)); snprintf(buffer, sizeof(buffer) - 1, "unrecognized command received over pipe: %02x (%c)", command, command); DR_ASSERT_MSG(false, buffer); } } } else { debug_data.pre_hanlder_called++; } } static void setup_shm_and_tls_regions_for_coverage(void *drcontext) { target_module_t * cur; if (options.write_log) dr_fprintf(winafl_data.log, "Initializing shm area\n"); //Zeroize the shm memory area if (options.per_module_coverage) { for (cur = options.target_modules; cur; cur = cur->next) { if (cur->afl_area) memset(cur->afl_area, 0, options.verbose_edges ? EDGES_SHM_SIZE : MAP_SIZE); } } else if (winafl_data.afl_area) memset(winafl_data.afl_area, 0, options.verbose_edges ? EDGES_SHM_SIZE : MAP_SIZE); if(options.write_log) dr_fprintf(winafl_data.log, "initializing thread local data\n"); //If needed fill in the thread local storage if (options.coverage_kind == COVERAGE_EDGE || options.thread_coverage || options.verbose_edges) { void **thread_data = (void **)drmgr_get_tls_field(drcontext, winafl_tls_field); thread_data[0] = 0; //previous basic block offset if (options.per_module_coverage) { for (cur = options.target_modules; cur; cur = cur->next) thread_data[cur->index + 1] = cur->afl_area; } else thread_data[1] = winafl_data.afl_area; } } /** * This function is called after to the application's function being fuzzed. It snaps the application state back * and continue fuzzing without restarting the process. However, if the fuzz_iterations count has been hit, the process * will be ended. * @param wrapcxt - An opaque pointer to the DynamoRIO context that this function is called in. Should only * be passed to the DynamoRIO API routines. * @param user_data - User data from the post_fuzz_handler. Currently unused. */ static void post_fuzz_handler(void *wrapcxt, void *user_data) { DWORD num_written; DWORD num_bytes = 0; dr_mcontext_t *mc; if(options.debug_mode || options.write_log) dr_fprintf(winafl_data.log, "post_fuzz_handler started\n"); if (!options.debug_mode) { WriteFile(pipe, "K", 1, &num_written, NULL); } else { debug_data.post_handler_called++; } fuzz_target.iteration++; if (fuzz_target.iteration == options.fuzz_iterations) { if (options.debug_mode || options.write_log) dr_fprintf(winafl_data.log, "Exiting due to iteration count (iteration %d max %d)", fuzz_target.iteration, options.fuzz_iterations); dr_exit_process(0); } mc = drwrap_get_mcontext(wrapcxt); mc->xsp = fuzz_target.xsp; mc->pc = fuzz_target.func_pc; drwrap_redirect_execution(wrapcxt); if (options.debug_mode || options.write_log) dr_fprintf(winafl_data.log , "post_fuzz_handler finished\n"); } /** * This function is prior to the CreateFileW Windows API function to help determine what * files are being opened by the target application. * @param wrapcxt - An opaque pointer to the DynamoRIO context that this function is called in. Should only * be passed to the DynamoRIO API routines. * @param user_data - User data pointer. Currently unused. */ static void createfilew_interceptor(void *wrapcxt, INOUT void **user_data) { wchar_t *filenamew = (wchar_t *)drwrap_get_arg(wrapcxt, 0); if (options.debug_mode || options.write_log) dr_fprintf(winafl_data.log, "In OpenFileW, reading %ls\n", filenamew); } /** * This function is prior to the CreateFileA Windows API function to help determine what * files are being opened by the target application. * @param wrapcxt - An opaque pointer to the DynamoRIO context that this function is called in. Should only * be passed to the DynamoRIO API routines. * @param user_data - User data pointer. Currently unused. */ static void createfilea_interceptor(void *wrapcxt, INOUT void **user_data) { char *filename = (char *)drwrap_get_arg(wrapcxt, 0); if (options.debug_mode || options.write_log) dr_fprintf(winafl_data.log, "In OpenFileA, reading %s\n", filename); } static void verfierstopmessage_interceptor_pre(void *wrapctx, INOUT void **user_data) { EXCEPTION_RECORD exception_record = { 0 }; dr_exception_t dr_exception = { 0 }; dr_exception.record = &exception_record; exception_record.ExceptionCode = STATUS_HEAP_CORRUPTION; onexception(NULL, &dr_exception); } /** * This function is a callback for when a module is unloaded. We remove it from the module_table so * we don't try to instrument it anymore. * @param drcontext - a pointer to the input program's machine context. This parameter should not be * inspected or modified, and should only be used to be passed to the DynamoRIO API routines. * @param info - a module_data_t structure describing the module that was unloaded */ static void event_module_unload(void *drcontext, const module_data_t *info) { module_table_unload(module_table, info); } /** * This function is a callback for when a module is loaded. We remove it from the module_table so * we don't try to instrument it anymore. * @param drcontext - a pointer to the input program's machine context. This parameter should not be * inspected or modified, and should only be used to be passed to the DynamoRIO API routines. * @param info - a module_data_t structure describing the module that was unloaded * @param loaded - whether the module is fully initialized by the loader or in the process of being loaded */ static void event_module_load(void *drcontext, const module_data_t *info, bool loaded) { const char *module_name = info->names.exe_name; app_pc to_wrap = 0; if (module_name == NULL) { // In case exe_name is not defined, we will fall back on the preferred name. module_name = dr_module_preferred_name(info); } if (options.debug_mode) dr_fprintf(winafl_data.log, "Module loaded, %s\n", module_name); if (options.fuzz_module[0]) { if (strcmp(module_name, options.fuzz_module) == 0) { if (options.fuzz_offset) { to_wrap = info->start + options.fuzz_offset; } else { //first try exported symbols to_wrap = (app_pc)dr_get_proc_address(info->handle, options.fuzz_method); if (!to_wrap) { //if that fails, try with the symbol access library drsym_init(0); drsym_lookup_symbol(info->full_path, options.fuzz_method, (size_t *)(&to_wrap), 0); drsym_exit(); DR_ASSERT_MSG(to_wrap, "Can't find specified method in fuzz_module"); to_wrap += (size_t)info->start; } } drwrap_wrap_ex(to_wrap, pre_fuzz_handler, post_fuzz_handler, NULL, options.callconv); } if (options.debug_mode && (_stricmp(module_name, "KERNEL32.dll") == 0)) { to_wrap = (app_pc)dr_get_proc_address(info->handle, "CreateFileW"); drwrap_wrap(to_wrap, createfilew_interceptor, NULL); to_wrap = (app_pc)dr_get_proc_address(info->handle, "CreateFileA"); drwrap_wrap(to_wrap, createfilea_interceptor, NULL); } } if (_stricmp(module_name, "verifier.dll") == 0) { to_wrap = (app_pc)dr_get_proc_address(info->handle, "VerifierStopMessage"); drwrap_wrap(to_wrap, verfierstopmessage_interceptor_pre, NULL); } module_table_load(module_table, info); } /** * This function is called prior to the application exiting. It cleans up the DynamoRIO releated resources. */ static void event_exit(void) { DWORD num_written; if (options.debug_mode) { if (debug_data.pre_hanlder_called == 0) { dr_fprintf(winafl_data.log, "WARNING: Target function was never called. Incorrect target_offset?\n"); } else if (debug_data.post_handler_called == 0) { dr_fprintf(winafl_data.log, "WARNING: Post-fuzz handler was never reached. Did the target function return normally?\n"); } else { dr_fprintf(winafl_data.log, "Everything appears to be running normally.\n"); } dr_fprintf(winafl_data.log, "Coverage map follows:\n"); dump_winafl_data(); dr_close_file(winafl_data.log); } if (!options.fuzz_module[0] && !winafl_data.exception_hit) { //if we're not using the pre/post fuzz handler functions, we should let the fuzzer know we didn't crash WriteFile(pipe, "K", 1, &num_written, NULL); } /* destroy module table */ module_table_destroy(module_table); drx_exit(); drmgr_exit(); } /** * This function is called at the time DynamoRIO is started. It initializes the AFL bitmaps and sets up the instrumentation. */ static void event_init(void) { char buffer[MAXIMUM_PATH]; if (options.debug_mode || options.write_log) { debug_data.pre_hanlder_called = 0; debug_data.post_handler_called = 0; winafl_data.log = drx_open_unique_appid_file(options.logdir, dr_get_process_id(), "afl", "proc.log", DR_FILE_ALLOW_LARGE, buffer, BUFFER_SIZE_ELEMENTS(buffer)); if (winafl_data.log != INVALID_FILE) { dr_log(NULL, LOG_ALL, 1, "winafl: log file is %s\n", buffer); NOTIFY(1, "\n", buffer); } } module_table = module_table_create(); memset(winafl_data.cache, 0, sizeof(winafl_data.cache)); if (options.per_module_coverage) { target_module_t * target_module; for (target_module = options.target_modules; target_module; target_module = target_module->next) { DR_ASSERT_MSG(target_module->afl_area != NULL, "afl_area not properly setup"); memset(target_module->afl_area, 0, options.verbose_edges ? EDGES_SHM_SIZE : MAP_SIZE); } } else { DR_ASSERT_MSG(winafl_data.afl_area != NULL, "afl_area not properly setup"); memset(winafl_data.afl_area, 0, options.verbose_edges ? EDGES_SHM_SIZE : MAP_SIZE); } fuzz_target.iteration = 0; } /** * This function sets up a pipe to communicate to the main fuzzing process. * @param pipe_name - the name of the pipe to setup * @param access - the type of access required to the pipe * @return - a HANDLE to the pipe that was setup */ static HANDLE setup_pipe(const char * pipe_name, DWORD access) { char buffer[512]; HANDLE pipe_handle; pipe_handle = CreateFile( pipe_name, // pipe name access, 0, // no sharing NULL, // default security attributes OPEN_EXISTING, // opens existing pipe 0, // default attributes NULL); // no template file if (pipe_handle == INVALID_HANDLE_VALUE) { snprintf(buffer, sizeof(buffer) - 1, "Error connecting to pipe '%s'", pipe_name); buffer[sizeof(buffer) - 1] = 0; DR_ASSERT_MSG(false, buffer); } return pipe_handle; } /** * Sets up the pipe used for communication to the main fuzzing process */ static void setup_comms_pipe() { pipe = setup_pipe(options.pipe_name, GENERIC_READ | GENERIC_WRITE); } /** * This function maps a shared memory region and returns a pointer it * @param name - the name of the shared memory region to map * @param for_edges - whether the shm region is for the full edge recording or not * @return - a pointer to the shared memory region */ static unsigned char * get_shmem_region(char * name, int for_edges) { HANDLE map_file; char buffer[512]; char * ret; DWORD size; if (for_edges) size = EDGES_SHM_SIZE; else size = MAP_SIZE; map_file = OpenFileMapping( FILE_MAP_ALL_ACCESS, // read/write access FALSE, // do not inherit the name name); // name of mapping object if (map_file == NULL) { snprintf(buffer, sizeof(buffer) - 1, "OpenFileMapping Failed for shm_name %s (GLE=%d)", name, GetLastError()); DR_ASSERT_MSG(false, buffer); } ret = (unsigned char *)MapViewOfFile(map_file, // handle to map object FILE_MAP_ALL_ACCESS, // read/write permission 0, 0, size); if (ret == NULL) { snprintf(buffer, sizeof(buffer) - 1, "MapViewOfFile Failed for shm_name %s (GLE=%d)", name, GetLastError()); DR_ASSERT_MSG(false, buffer); } return ret; } /** * Sets up the shared memory regions for all of the target modules being tracked. */ static void setup_per_module_shmem() { target_module_t * cur; char name[512]; DR_ASSERT_MSG(options.per_module_coverage, "setup_per_module_shmem should only be called when options.per_module_coverage is true"); for (cur = options.target_modules; cur; cur = cur->next) { memset(name, 0, sizeof(name)); snprintf(name, sizeof(name) - 1, "%s_%d", options.shm_name, cur->index); cur->afl_area = get_shmem_region(name, options.verbose_edges); } } /** * Sets up the shared memory region when not tracking per module coverage. */ static void setup_shmem() { DR_ASSERT_MSG(!options.per_module_coverage, "setup_shmem should only be called when options.per_module_coverage is false"); winafl_data.afl_area = get_shmem_region(options.shm_name, options.verbose_edges); } /** * Opens the module file and adds the modules listed in it to the target_module linked list */ static void read_module_file(const char * filename) { FILE *fp; char line[1024]; fp = fopen(filename, "rb"); USAGE_CHECK(fp, "Couldn't open module file"); while (fgets(line, sizeof(line), fp)) { if (line[strlen(line) - 1] == '\n') line[strlen(line) - 1] = 0; if (line[strlen(line) - 1] == '\r') line[strlen(line) - 1] = 0; add_target_module(line); } fclose(fp); } /** * Parses the command line arguments passed to DynamoRIO and sets up the global options struct * @param id - The client_id assigned by DynamoRIO to this instrance * @param argc - the number of arguments in the argv parameter * @param argv - the command line arguments passed to DynamoRIO */ static void options_init(client_id_t id, int argc, const char *argv[]) { int i; const char *token; char buffer[512]; /* default values */ options.nudge_kills = true; options.verbose_edges = false; options.debug_mode = false; options.write_log = false; options.thread_coverage = true; options.per_module_coverage = false; options.coverage_kind = COVERAGE_EDGE; options.target_modules = NULL; options.fuzz_module[0] = 0; options.fuzz_method[0] = 0; options.fuzz_offset = 0; options.fuzz_iterations = 1; options.func_args = NULL; options.num_fuz_args = 0; options.callconv = DRWRAP_CALLCONV_DEFAULT; dr_snprintf(options.logdir, BUFFER_SIZE_ELEMENTS(options.logdir), "."); strcpy(options.pipe_name, "\\\\.\\pipe\\afl_pipe_default"); strcpy(options.shm_name, "afl_shm_default"); for (i = 1/*skip client*/; i < argc; i++) { token = argv[i]; if (strcmp(token, "-no_nudge_kills") == 0) options.nudge_kills = false; else if (strcmp(token, "-verbose_edges") == 0) { options.verbose_edges = true; options.coverage_kind = COVERAGE_EDGE; options.thread_coverage = true; } else if (strcmp(token, "-nudge_kills") == 0) options.nudge_kills = true; else if (strcmp(token, "-no_thread_coverage") == 0) options.thread_coverage = false; else if (strcmp(token, "-per_module_coverage") == 0) options.per_module_coverage = true; else if (strcmp(token, "-debug") == 0) options.debug_mode = true; else if (strcmp(token, "-write_log") == 0) options.write_log = true; else if (strcmp(token, "-logdir") == 0) { USAGE_CHECK((i + 1) < argc, "missing logdir path"); strncpy(options.logdir, argv[++i], BUFFER_SIZE_ELEMENTS(options.logdir)); } else if (strcmp(token, "-fuzzer_id") == 0) { USAGE_CHECK((i + 1) < argc, "missing fuzzer id"); strcpy(options.pipe_name, "\\\\.\\pipe\\afl_pipe_"); strcat(options.pipe_name, argv[i + 1]); strcpy(options.shm_name, "afl_shm_"); strcat(options.shm_name, argv[i + 1]); i++; } else if (strcmp(token, "-covtype") == 0) { USAGE_CHECK((i + 1) < argc, "missing coverage type"); token = argv[++i]; if (strcmp(token, "bb") == 0) options.coverage_kind = COVERAGE_BB; else if (strcmp(token, "edge") == 0) options.coverage_kind = COVERAGE_EDGE; else USAGE_CHECK(false, "invalid coverage type"); } else if (strcmp(token, "-coverage_module") == 0) { USAGE_CHECK((i + 1) < argc, "missing module"); add_target_module(argv[++i]); } else if (strcmp(token, "-coverage_module_file") == 0) { USAGE_CHECK((i + 1) < argc, "missing module file"); read_module_file(argv[++i]); } else if (strcmp(token, "-target_module") == 0) { USAGE_CHECK((i + 1) < argc, "missing module"); strncpy(options.fuzz_module, argv[++i], BUFFER_SIZE_ELEMENTS(options.fuzz_module)); } else if (strcmp(token, "-target_method") == 0) { USAGE_CHECK((i + 1) < argc, "missing method"); strncpy(options.fuzz_method, argv[++i], BUFFER_SIZE_ELEMENTS(options.fuzz_method)); } else if (strcmp(token, "-fuzz_iterations") == 0) { USAGE_CHECK((i + 1) < argc, "missing number of iterations"); options.fuzz_iterations = atoi(argv[++i]); } else if (strcmp(token, "-nargs") == 0) { USAGE_CHECK((i + 1) < argc, "missing number of arguments"); options.num_fuz_args = atoi(argv[++i]); } else if (strcmp(token, "-target_offset") == 0) { USAGE_CHECK((i + 1) < argc, "missing offset"); options.fuzz_offset = strtoul(argv[++i], NULL, 0); } else if (strcmp(token, "-verbose") == 0) { USAGE_CHECK((i + 1) < argc, "missing -verbose number"); token = argv[++i]; if (dr_sscanf(token, "%u", &verbose) != 1) { USAGE_CHECK(false, "invalid -verbose number"); } } else if (strcmp(token, "-call_convention") == 0) { USAGE_CHECK((i + 1) < argc, "missing calling convention"); ++i; if (strcmp(argv[i], "stdcall") == 0) options.callconv = DRWRAP_CALLCONV_CDECL; else if (strcmp(argv[i], "fastcall") == 0) options.callconv = DRWRAP_CALLCONV_FASTCALL; else if (strcmp(argv[i], "thiscall") == 0) options.callconv = DRWRAP_CALLCONV_THISCALL; else if (strcmp(argv[i], "ms64") == 0) options.callconv = DRWRAP_CALLCONV_MICROSOFT_X64; else NOTIFY(0, "Unknown calling convention, using default value instead.\n"); } else { NOTIFY(0, "UNRECOGNIZED OPTION: \"%s\"\n", token); memset(buffer, 0, sizeof(buffer)); snprintf(buffer, sizeof(buffer) - 1, "Invalid option: %s", token); USAGE_CHECK(false, buffer); } } if (options.verbose_edges && (options.coverage_kind != COVERAGE_EDGE || options.thread_coverage != true)) { USAGE_CHECK(false, "If verbose_edges is specified, then the coverage kind must be edge and thread coverage must be on"); } if (options.fuzz_module[0] && (options.fuzz_offset == 0) && (options.fuzz_method[0] == 0)) { USAGE_CHECK(false, "If fuzz_module is specified, then either fuzz_method or fuzz_offset must be as well"); } if (options.num_fuz_args) { options.func_args = (void **)dr_global_alloc(options.num_fuz_args * sizeof(void *)); } if (strlen(options.fuzz_module) == 0 && strlen(options.fuzz_method) == 0 && options.fuzz_offset == 0 && options.fuzz_iterations != 1) { USAGE_CHECK(false, "If fuzz_module is specified, then either fuzz_method or fuzz_offset must be as well"); } } /** * The main entrypoint from DynamoRIO * @param id - The client_id assigned by DynamoRIO to this instrance * @param argc - the number of command line arguments in the argv parameter * @param argv - the command line arguments passed to DynamoRIO */ DR_EXPORT void dr_client_main(client_id_t id, int argc, const char *argv[]) { target_module_t * cur; drreg_options_t ops = { sizeof(ops), 2 /*max slots needed: aflags*/, false }; size_t size; dr_set_client_name("WinAFL", ""); drmgr_init(); drx_init(); drreg_init(&ops); drwrap_init(); options_init(id, argc, argv); dr_register_exit_event(event_exit); drmgr_register_exception_event(onexception); if (options.verbose_edges) { drmgr_register_bb_instrumentation_event(NULL, instrument_verbose_edge_coverage, NULL); } else if (options.coverage_kind == COVERAGE_BB) { drmgr_register_bb_instrumentation_event(NULL, instrument_bb_coverage, NULL); } else if (options.coverage_kind == COVERAGE_EDGE) { drmgr_register_bb_instrumentation_event(NULL, instrument_edge_coverage, NULL); } drmgr_register_module_load_event(event_module_load); drmgr_register_module_unload_event(event_module_unload); dr_register_nudge_event(event_nudge, id); client_id = id; if (options.nudge_kills) drx_register_soft_kills(event_soft_kill); winafl_data.instrumentation_enabled = false; winafl_data.exception_hit = false; if (options.thread_coverage || options.coverage_kind == COVERAGE_EDGE) { size = MAP_SIZE; if (options.verbose_edges) size = EDGES_SHM_SIZE; winafl_data.fake_afl_area = (unsigned char *)dr_global_alloc(size); memset(winafl_data.fake_afl_area, 0, size); } //Allocate the afl area if (!options.debug_mode) { setup_comms_pipe(); if (options.per_module_coverage) setup_per_module_shmem(); else setup_shmem(); } else { if (options.per_module_coverage) { for (cur = options.target_modules; cur; cur = cur->next) cur->afl_area = (unsigned char *)dr_global_alloc(MAP_SIZE); } else winafl_data.afl_area = (unsigned char *)dr_global_alloc(MAP_SIZE); } if (options.coverage_kind == COVERAGE_EDGE || options.thread_coverage) { winafl_tls_field = drmgr_register_tls_field(); if (winafl_tls_field == -1) { DR_ASSERT_MSG(false, "error reserving TLS field"); } drmgr_register_thread_init_event(event_thread_init); drmgr_register_thread_exit_event(event_thread_exit); } event_init(); if(options.write_log) dr_fprintf(winafl_data.log, "Done with dr_client_main\n"); }