From 87f4c93005a35bd8171d779df3b23d7be26bbad8 Mon Sep 17 00:00:00 2001 From: Dmitriy Nesterov <dim.nesterov2015@gmail.com> Date: Sun, 25 Sep 2022 23:10:13 +0300 Subject: [PATCH] test/fuzz: add grammar-based LuaJIT fuzzer Patch adds a LuaJIT fuzzer based on libprotobuf-mutator and LibFuzzer. Grammar is described via messages in protobuf format, serializer is applied to convert .proto format to string. For displaying generated code on the screen during fuzzing set the environment variable 'LPM_DUMP_NATIVE_INPUT'. For displaying error messages from lua functions set the environment variable 'LUA_FUZZER_VERBOSE'. Note: UndefinedBehaviourSanitizer is unsupported by LuaJIT (see #8473), so fuzzing test is disabled when CMake option ENABLE_UB_SANITIZER is passed. Closes #4823 NO_DOC=<fuzzing testing of LuaJIT> NO_TEST=<fuzzing testing of LuaJIT> (cherry picked from commit a287c853aa2ef6cf260c6b9d35ac99108f0e5483) --- .../unreleased/luajit-grammar-fuzzer.md | 3 + test/fuzz/CMakeLists.txt | 8 + test/fuzz/luaL_loadbuffer/CMakeLists.txt | 32 + .../luaL_loadbuffer/luaL_loadbuffer_fuzzer.cc | 72 ++ test/fuzz/luaL_loadbuffer/lua_grammar.proto | 402 ++++++++++ test/fuzz/luaL_loadbuffer/serializer.cc | 700 ++++++++++++++++++ test/fuzz/luaL_loadbuffer/serializer.h | 123 +++ 7 files changed, 1340 insertions(+) create mode 100644 changelogs/unreleased/luajit-grammar-fuzzer.md create mode 100644 test/fuzz/luaL_loadbuffer/CMakeLists.txt create mode 100644 test/fuzz/luaL_loadbuffer/luaL_loadbuffer_fuzzer.cc create mode 100644 test/fuzz/luaL_loadbuffer/lua_grammar.proto create mode 100644 test/fuzz/luaL_loadbuffer/serializer.cc create mode 100644 test/fuzz/luaL_loadbuffer/serializer.h diff --git a/changelogs/unreleased/luajit-grammar-fuzzer.md b/changelogs/unreleased/luajit-grammar-fuzzer.md new file mode 100644 index 0000000000..59e119c856 --- /dev/null +++ b/changelogs/unreleased/luajit-grammar-fuzzer.md @@ -0,0 +1,3 @@ +## feature/test/fuzz + +* LuaJIT now can be fuzzed using grammar-based fuzzer (gh-4823). diff --git a/test/fuzz/CMakeLists.txt b/test/fuzz/CMakeLists.txt index 94b4987b99..a082b735af 100644 --- a/test/fuzz/CMakeLists.txt +++ b/test/fuzz/CMakeLists.txt @@ -93,6 +93,14 @@ create_fuzz_test(PREFIX mp_datetime LIBRARIES core fuzzer_config ) +include(ProtobufMutator) + +# UndefinedBehaviorSanitizer is not supported in LuaJIT. +# See https://github.com/tarantool/tarantool/issues/8473 +if (NOT ENABLE_UB_SANITIZER) + add_subdirectory(luaL_loadbuffer) +endif() + add_custom_target(fuzzers DEPENDS ${FUZZ_TEST_TARGETS} COMMENT "Build fuzzers") diff --git a/test/fuzz/luaL_loadbuffer/CMakeLists.txt b/test/fuzz/luaL_loadbuffer/CMakeLists.txt new file mode 100644 index 0000000000..23271186ec --- /dev/null +++ b/test/fuzz/luaL_loadbuffer/CMakeLists.txt @@ -0,0 +1,32 @@ +add_executable(luaL_loadbuffer_fuzzer + luaL_loadbuffer_fuzzer.cc + serializer.cc) + +add_library(lua_grammar-proto) + +foreach(lib ${LPM_LIBRARIES}) + find_library(${lib} REQUIRED_FILES) +endforeach(lib) + +protobuf_generate(LANGUAGE cpp + TARGET lua_grammar-proto + PROTOS lua_grammar.proto) + +target_link_libraries(lua_grammar-proto + ${PROTOBUF_LIBRARIES}) + +target_include_directories(luaL_loadbuffer_fuzzer PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) + +target_link_libraries(luaL_loadbuffer_fuzzer + PUBLIC + lua_grammar-proto + ${LPM_LIBRARIES} + libluajit_static + fuzzer_config) + +add_dependencies(luaL_loadbuffer_fuzzer + libluajit_static + ${LPM_LIBRARIES} + lua_grammar-proto) + +set(FUZZ_TEST_TARGETS "${FUZZ_TEST_TARGETS};luaL_loadbuffer_fuzzer" PARENT_SCOPE) diff --git a/test/fuzz/luaL_loadbuffer/luaL_loadbuffer_fuzzer.cc b/test/fuzz/luaL_loadbuffer/luaL_loadbuffer_fuzzer.cc new file mode 100644 index 0000000000..94ebd5bad9 --- /dev/null +++ b/test/fuzz/luaL_loadbuffer/luaL_loadbuffer_fuzzer.cc @@ -0,0 +1,72 @@ +extern "C" +{ +#include <lua.h> +#include <lualib.h> +#include <lauxlib.h> +} + +#include "lua_grammar.pb.h" +#include "serializer.h" + +#include <libprotobuf-mutator/port/protobuf.h> +#include <libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h> + +/** + * Get an error message from the stack, and report it to std::cerr. + * Remove the message from the stack. + */ +static inline void +report_error(lua_State *L, const std::string &prefix) +{ + const char *verbose = ::getenv("LUA_FUZZER_VERBOSE"); + if (!verbose) + return; + + std::string err_str = lua_tostring(L, 1); + /* Pop error message from stack. */ + lua_pop(L, 1); + std::cerr << prefix << " error: " << err_str << std::endl; +} + +DEFINE_PROTO_FUZZER(const lua_grammar::Block &message) +{ + lua_State *L = luaL_newstate(); + if (!L) + return; + + std::string code = BlockToString(message); + + if (::getenv("LPM_DUMP_NATIVE_INPUT") && code.size() != 0) { + std::cout << "-------------------------" << std::endl; + std::cout << code << std::endl; + } + + luaL_openlibs(L); + + /* + * See https://luajit.org/running.html. + */ + luaL_dostring(L, "jit.opt.start('hotloop=1')"); + luaL_dostring(L, "jit.opt.start('hotexit=1')"); + luaL_dostring(L, "jit.opt.start('recunroll=1')"); + luaL_dostring(L, "jit.opt.start('callunroll=1')"); + + if (luaL_loadbuffer(L, code.c_str(), code.size(), "fuzz") != LUA_OK) { + report_error(L, "luaL_loadbuffer()"); + goto end; + } + + /* + * Using lua_pcall (protected call) to catch errors due to + * wrong semantics of some generated code chunks. + * Mostly, generated code is not semantically correct, so it is + * needed to describe Lua semantics for more interesting + * results and fuzzer tests. + */ + if (lua_pcall(L, 0, 0, 0) != LUA_OK) + report_error(L, "lua_pcall()"); + +end: + lua_settop(L, 0); + lua_close(L); +} diff --git a/test/fuzz/luaL_loadbuffer/lua_grammar.proto b/test/fuzz/luaL_loadbuffer/lua_grammar.proto new file mode 100644 index 0000000000..6a4fbfad26 --- /dev/null +++ b/test/fuzz/luaL_loadbuffer/lua_grammar.proto @@ -0,0 +1,402 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2022, Tarantool AUTHORS, please see AUTHORS file. + */ + +/* + * Grammar is for Lua 5.1. + * Comments around message definitions are in eBNF notation. + * Strings defined using a long format enclosed by long brackets are NIY. + */ +syntax = "proto2"; +package lua_grammar; + +/* block ::= chunk */ +message Block { + required Chunk chunk = 1; +} + +/* + * chunk ::= {stat [`;`]} [laststat [`;`]] + * Semicolon encapsulated in Statement and LastStatement. + */ +message Chunk { + repeated Statement stat = 1; + optional LastStatement laststat = 2; +} + +/* + * stat ::= assignmentlist | + * functioncall | + * doblock | + * whilecycle | + * repeatcycle | + * ifstat | + * forcyclename | + * forcyclelist | + * function | + * localfunc | + * localnames + */ +message Statement { + oneof stat_oneof { + AssignmentList list = 1; + FunctionCall call = 2; + DoBlock block = 3; + WhileCycle whilecycle = 4; + RepeatCycle repeatcycle = 5; + IfStatement ifstat = 6; + ForCycleName forcyclename = 7; + ForCycleList forcyclelist = 8; + Function func = 9; + LocalFunc localfunc = 10; + LocalNames localnames = 11; + } + optional bool semicolon = 12; +} + +/* assignmentlist ::= varlist `=` explist */ +message AssignmentList { + message VariableList { + required Variable var = 1; + repeated Variable vars = 2; + } + + required VariableList varlist = 1; + required ExpressionList explist = 2; +} + +/* functioncall ::= prefixexp args | prefixexp `:` Name args */ +message FunctionCall { + /* args ::= `(` [explist] `)` | tableconstructor | String */ + message Args { + oneof args_oneof { + OptionalExpressionList explist = 1; + TableConstructor tableconstructor = 2; + string str = 3; + } + } + + /* prefixexp args */ + message PrefixArgs { + required PrefixExpression prefixexp = 1; + required Args args = 2; + } + + /* prefixexp `:` Name args */ + message PrefixNamedArgs { + required PrefixExpression prefixexp = 1; + required Name name = 2; + required Args args = 3; + } + + oneof call_oneof { + PrefixArgs prefArgs = 1; + PrefixNamedArgs namedArgs = 2; + } +} + +/* doblock ::= `do` block `end` */ +message DoBlock { + required Block block = 1; +} + +/* whilecycle ::= `while` exp `do` block `end` */ +message WhileCycle { + required Expression condition = 1; + required DoBlock doblock = 2; +} + +/* repeatcycle ::= `repeat` block `until` exp */ +message RepeatCycle { + required Block block = 1; + required Expression condition = 2; +} + +/* + * ifstat ::= `if` exp `then` block {`elseif` exp `then` block} + * [`else` block] `end` + */ +message IfStatement { + message ElseIfBlock { + required Expression condition = 1; + required Block block = 2; + } + + required Expression condition = 1; + required Block first = 2; + repeated ElseIfBlock clauses = 3; + optional Block last = 4; +} + +/* forcyclename ::= `for` Name `=` exp `,` exp [`,` exp] doblock */ +message ForCycleName { + required Name name = 1; + required Expression startexp = 2; + required Expression stopexp = 3; + optional Expression stepexp = 4; + required DoBlock doblock = 5; +} + +/* forcyclelist ::= `for` namelist `in` explist doblock */ +message ForCycleList { + required NameList names = 1; + required ExpressionList expressions = 2; + required DoBlock doblock = 3; +} + +/* function ::= `function` funcname funcbody */ +message Function { + /* funcname ::= Name {`.` Name} [`:` Name] */ + message FuncName { + required Name firstname = 1; + repeated Name names = 2; + optional Name lastname = 3; + } + + required FuncName name = 1; + required FuncBody body = 2; +} + +/* funcbody ::= `(` [parlist] `)` block `end` */ +message FuncBody { + /* namelistwithellipsis ::= namelist [`,` `...`] */ + message NameListWithEllipsis { + required NameList namelist = 1; + optional string ellipsis = 2; + } + + /* parlist ::= namelistwithellipsis | `...` */ + message ParList { + oneof parlist_oneof { + NameListWithEllipsis namelist = 1; + string ellipsis = 2; + } + } + + optional ParList parlist = 1; + required Block block = 2; +} + +/* namelist ::= Name {`,` Name} */ +message NameList { + required Name firstname = 1; + repeated Name names = 2; +} + +/* localfunc ::= `local` `function` Name funcbody */ +message LocalFunc { + required Name name = 1; + required FuncBody funcbody = 2; +} + +/* localnames ::= `local` namelist [`=` explist] */ +message LocalNames { + required NameList namelist = 1; + optional ExpressionList explist = 2; +} + +/* laststat ::= `return` [explist] | `break` */ +message LastStatement { + message ReturnOptionalExpressionList { + optional ExpressionList explist = 1; + } + + oneof last_oneof { + ReturnOptionalExpressionList explist = 1; + uint32 break = 2; + } + optional bool semicolon = 3; +} + +/* explist ::= {exp `,`} exp */ +message ExpressionList { + repeated Expression expressions = 1; + required Expression explast = 2; +} + +message OptionalExpressionList { + optional ExpressionList explist = 1; +} + +/* var ::= Name | prefixexp `[` exp `]` | prefixexp `.` Name */ +message Variable { + /* prefixexp `[` exp `]` */ + message IndexWithExpression { + required PrefixExpression prefixexp = 1; + required Expression exp = 2; + } + + /* prefixexp `.` Name */ + message IndexWithName { + required PrefixExpression prefixexp = 1; + required string Name = 2; + } + + oneof var_oneof { + Name name = 1; + IndexWithExpression indexexpr = 2; + IndexWithName indexname = 3; + } +} + +/* prefixexp ::= var | functioncall | `(` exp `)` */ +message PrefixExpression { + oneof prefix_oneof { + Variable var = 1; + FunctionCall functioncall = 2; + Expression exp = 3; + } +} + +/* + * exp ::= nil | + * false | + * true | + * Number | + * String | + * `...` | + * anonfunction | + * prefixexp | + * tableconstructor | + * exp binop exp | + * unop exp + */ +message Expression { + /* anonfunction ::= `function` funcbod */ + message AnonFunc { + required FuncBody body = 1; + } + + /* exp binop exp */ + message ExpBinaryOpExp { + required Expression leftexp = 1; + required BinaryOperator binop = 2; + required Expression rightexp = 3; + } + + /* unop exp */ + message UnaryOpExp { + required UnaryOperator unop = 1; + required Expression exp = 2; + } + + oneof expr_oneof { + uint32 nil = 1; + uint32 false = 2; + uint32 true = 3; + double number = 4; + string str = 5; + string ellipsis = 6; + AnonFunc function = 7; + PrefixExpression prefixexp = 8; + TableConstructor tableconstructor = 9; + ExpBinaryOpExp binary = 10; + UnaryOpExp unary = 11; + } +} + +/* tableconstructor ::= `{` [fieldlist] `}` */ +message TableConstructor { + optional FieldList fieldlist = 1; +} + +/* fieldlist ::= field {fieldsep field} [fieldsep] */ +message FieldList { + /* fieldsep field */ + message FieldWithFieldSep { + required Field field = 1; + required FieldSep sep = 2; + } + + required Field firstField = 1; + repeated FieldWithFieldSep fields = 2; + optional FieldSep lastSep = 3; +} + +/* field ::= `[` exp `]` `=` exp | Name `=` exp | exp */ +message Field { + /* `[` exp `]` `=` exp */ + message ExpressionAssignment { + required Expression key = 1; + required Expression value = 2; + } + + /* Name `=` exp */ + message NameAssignment { + required Name name = 1; + required Expression value = 2; + } + + oneof field_oneof { + ExpressionAssignment exprassign = 1; + NameAssignment namedassign = 2; + Expression expression = 3; + } +} + +/* fieldsep ::= `,` | `;` */ +message FieldSep { + oneof sep_oneof { + uint32 comma = 1; + uint32 semicolon = 2; + } +} + +/* + * binop ::= `+` | + * `-` | + * `*` | + * `/` | + * `^` | + * `%` | + * `..` | + * `<` | + * `<=` | + * `>` | + * `>=` | + * `==` | + * `~=` | + * `and` | + * `or` + */ +message BinaryOperator { + oneof binary_oneof { + /* Arithmetic operators. */ + uint32 add = 1; + uint32 sub = 2; + uint32 mult = 3; + uint32 div = 4; + uint32 exp = 5; + uint32 mod = 6; + + /* Concatenation. */ + uint32 concat = 7; + + /* Logical operators. */ + uint32 less = 8; + uint32 lessEqual = 9; + uint32 greater = 10; + uint32 greaterEqual = 11; + uint32 equal = 12; + uint32 notEqual = 13; + uint32 and = 14; + uint32 or = 15; + } +} + +/* unop ::= `-` | `not` | `#` */ +message UnaryOperator { + oneof unary_oneof { + uint32 negate = 1; + uint32 not = 2; + uint32 length = 3; + } +} + +/* Name ::= <correct Lua Identifier> */ +message Name { + required string name = 1; + required uint32 num = 2; +} diff --git a/test/fuzz/luaL_loadbuffer/serializer.cc b/test/fuzz/luaL_loadbuffer/serializer.cc new file mode 100644 index 0000000000..8a54077b9a --- /dev/null +++ b/test/fuzz/luaL_loadbuffer/serializer.cc @@ -0,0 +1,700 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2022, Tarantool AUTHORS, please see AUTHORS file. + */ +#include "serializer.h" + +static inline std::string +RemoveLeadingNumbers(const std::string &s) +{ + for (size_t i = 0; i < s.length(); ++i) + if (!std::isdigit(s[i])) + return s.substr(i); + return ""; +} + +static inline std::string +ClearNonIdentifierSymbols(const std::string &s) +{ + std::string cleared; + + if (std::isalpha(s[0]) || s[0] == '_') + cleared += s[0]; + + for (size_t i = 1; i < s.length(); ++i) + if (std::iswalnum(s[i]) || s[i] == '_') + cleared += s[i]; + + return cleared; +} + +static inline std::string +clamp(std::string s, size_t maxSize = kMaxStrLength) +{ + if (s.size() > maxSize) + s.resize(maxSize); + return s; +} + +static inline double +clamp(double number, double upper, double lower) +{ + return number <= lower ? lower : + number >= upper ? upper : number; +} + +static inline std::string +ConvertToStringDefault(const std::string &s) +{ + std::string ident = RemoveLeadingNumbers(s); + ident = clamp(ClearNonIdentifierSymbols(ident)); + if (ident.empty()) + return std::string(kDefaultIdent); + return ident; +} + +PROTO_TOSTRING(Block, block) +{ + return ChunkToString(block.chunk()); +} + +PROTO_TOSTRING(Chunk, chunk) +{ + std::string chunk_str; + for (int i = 0; i < chunk.stat_size(); ++i) + chunk_str += StatementToString(chunk.stat(i)) + "\n"; + + if (chunk.has_laststat()) + chunk_str += LastStatementToString(chunk.laststat()) + "\n"; + + return chunk_str; +} + +/** + * LastStatement and nested types. + */ +PROTO_TOSTRING(LastStatement, laststat) +{ + std::string laststat_str; + using LastStatType = LastStatement::LastOneofCase; + switch (laststat.last_oneof_case()) { + case LastStatType::kExplist: + laststat_str = ReturnOptionalExpressionListToString( + laststat.explist()); + case LastStatType::kBreak: + laststat_str = "break"; + default: + /* Chosen as default in order to decrease number of 'break's. */ + laststat_str = ReturnOptionalExpressionListToString( + laststat.explist()); + } + + if (laststat.has_semicolon()) + laststat_str += "; "; + + return laststat_str; +} + +NESTED_PROTO_TOSTRING(ReturnOptionalExpressionList, explist, LastStatement) +{ + std::string explist_str = "return"; + if (explist.has_explist()) { + explist_str += " " + ExpressionListToString(explist.explist()); + explist_str += " "; + } + return explist_str; +} + +/** + * Statement and statement options. + */ +PROTO_TOSTRING(Statement, stat) +{ + std::string stat_str; + using StatType = Statement::StatOneofCase; + switch (stat.stat_oneof_case()) { + case StatType::kList: + stat_str = AssignmentListToString(stat.list()); + case StatType::kCall: + stat_str = FunctionCallToString(stat.call()); + case StatType::kBlock: + stat_str = DoBlockToString(stat.block()); + /** + * TODO: + * Commented due to possible generation of infinite loops. + * In that case, fuzzer will drop only by timeout. + * Example: 'while true do end'. + */ + /* + * case StatType::kWhilecycle: + * stat_str = WhileCycleToString(stat.whilecycle()); + * case StatType::kRepeatcycle: + * stat_str = RepeatCycleToString(stat.repeatcycle()); + */ + case StatType::kIfstat: + stat_str = IfStatementToString(stat.ifstat()); + case StatType::kForcyclename: + stat_str = ForCycleNameToString(stat.forcyclename()); + case StatType::kForcyclelist: + stat_str = ForCycleListToString(stat.forcyclelist()); + case StatType::kFunc: + stat_str = FunctionToString(stat.func()); + case StatType::kLocalfunc: + stat_str = LocalFuncToString(stat.localfunc()); + case StatType::kLocalnames: + stat_str = LocalNamesToString(stat.localnames()); + default: + /** + * Chosen arbitrarily more for simplicity. + * TODO: Choose "more interesting" defaults. + */ + stat_str = AssignmentListToString(stat.list()); + } + + if (stat.has_semicolon()) + stat_str += "; "; + + return stat_str; +} + +/** + * AssignmentList and nested types. + */ +PROTO_TOSTRING(AssignmentList, assignmentlist) +{ + std::string list_str = VariableListToString(assignmentlist.varlist()); + list_str += " = " + ExpressionListToString(assignmentlist.explist()); + return list_str; +} + +NESTED_PROTO_TOSTRING(VariableList, varlist, AssignmentList) +{ + std::string varlist_str = VariableToString(varlist.var()); + for (int i = 0; i < varlist.vars_size(); ++i) { + varlist_str += ", " + VariableToString(varlist.vars(i)); + varlist_str += " "; + } + return varlist_str; +} + +/** + * FunctionCall and nested types. + */ +PROTO_TOSTRING(FunctionCall, call) +{ + using FuncCallType = FunctionCall::CallOneofCase; + switch (call.call_oneof_case()) { + case FuncCallType::kPrefArgs: + return PrefixArgsToString(call.prefargs()); + case FuncCallType::kNamedArgs: + return PrefixNamedArgsToString(call.namedargs()); + default: + /* Chosen for more variability of generated programs. */ + return PrefixNamedArgsToString(call.namedargs()); + } +} + +NESTED_PROTO_TOSTRING(Args, args, FunctionCall) +{ + using ArgsType = FunctionCall::Args::ArgsOneofCase; + switch (args.args_oneof_case()) { + case ArgsType::kExplist: + return "(" + OptionalExpressionListToString(args.explist()) + + ")"; + case ArgsType::kTableconstructor: + return TableConstructorToString(args.tableconstructor()); + case ArgsType::kStr: + return "'" + ConvertToStringDefault(args.str()) + "'"; + default: + /* For more variability. */ + return TableConstructorToString(args.tableconstructor()); + } +} + +NESTED_PROTO_TOSTRING(PrefixArgs, prefixargs, FunctionCall) +{ + std::string prefixargs_str = PrefixExpressionToString( + prefixargs.prefixexp()); + prefixargs_str += " " + ArgsToString(prefixargs.args()); + return prefixargs_str; +} + +NESTED_PROTO_TOSTRING(PrefixNamedArgs, prefixnamedargs, FunctionCall) +{ + std::string predixnamedargs_str = PrefixExpressionToString( + prefixnamedargs.prefixexp()); + predixnamedargs_str += ":" + NameToString(prefixnamedargs.name()); + predixnamedargs_str += " " + ArgsToString(prefixnamedargs.args()); + return predixnamedargs_str; +} + +/** + * DoBlock clause. + */ +PROTO_TOSTRING(DoBlock, block) +{ + return "do\n" + BlockToString(block.block()) + "end\n"; +} + +/** + * WhileCycle clause. + */ +PROTO_TOSTRING(WhileCycle, whilecycle) +{ + std::string whilecycle_str = "while " + ExpressionToString( + whilecycle.condition()); + whilecycle_str += " " + DoBlockToString(whilecycle.doblock()); + return whilecycle_str; +} + +/** + * RepeatCycle clause. + */ +PROTO_TOSTRING(RepeatCycle, repeatcycle) +{ + std::string repeatcycle_str = "repeat " + BlockToString( + repeatcycle.block()); + repeatcycle_str += "until " + ExpressionToString( + repeatcycle.condition()); + return repeatcycle_str; +} + +/** + * IfStatement and nested types. + */ +PROTO_TOSTRING(IfStatement, statement) +{ + std::string statement_str = "if " + + ExpressionToString(statement.condition()); + statement_str += " then\n\t" + BlockToString(statement.first()); + + for (int i = 0; i < statement.clauses_size(); ++i) + statement_str += ElseIfBlockToString(statement.clauses(i)); + + if (statement.has_last()) + statement_str += "else\n\t" + BlockToString(statement.last()); + + statement_str += "end\n"; + return statement_str; +} + +NESTED_PROTO_TOSTRING(ElseIfBlock, elseifblock, IfStatement) +{ + std::string elseifblock_str = "else if "; + elseifblock_str += ExpressionToString(elseifblock.condition()); + elseifblock_str += " then\n\t"; + elseifblock_str += BlockToString(elseifblock.block()); + return elseifblock_str; +} + +/** + * ForCycleName clause. + * TODO: In 'for i = start, stop, step' construction start, stop, step + * should be numbers. So results of the corresponding expressions + * should be number. + */ +PROTO_TOSTRING(ForCycleName, forcyclename) +{ + std::string forcyclename_str = "for " + NameToString( + forcyclename.name()); + forcyclename_str += " = " + ExpressionToString(forcyclename.startexp()); + forcyclename_str += ", " + ExpressionToString(forcyclename.stopexp()); + + if (forcyclename.has_stepexp()) + forcyclename_str += ", " + ExpressionToString( + forcyclename.stepexp()); + + forcyclename_str += " " + DoBlockToString(forcyclename.doblock()); + return forcyclename_str; +} + +/** + * ForCycleList clause. + */ +PROTO_TOSTRING(ForCycleList, forcyclelist) +{ + std::string forcyclelist_str = "for " + NameListToString( + forcyclelist.names()); + forcyclelist_str += " in " + ExpressionListToString( + forcyclelist.expressions()); + forcyclelist_str += " " + DoBlockToString(forcyclelist.doblock()); + return forcyclelist_str; +} + +/** + * Function and nested types. + */ +PROTO_TOSTRING(Function, func) +{ + std::string func_str = "function " + FuncNameToString(func.name()); + func_str += FuncBodyToString(func.body()); + return func_str; +} + +NESTED_PROTO_TOSTRING(FuncName, funcname, Function) +{ + std::string funcname_str = NameToString(funcname.firstname()); + + for (int i = 0; i < funcname.names_size(); ++i) + funcname_str += "." + NameToString(funcname.names(i)); + + if (funcname.has_lastname()) + funcname_str += ":" + NameToString(funcname.lastname()); + + return funcname_str; +} + +PROTO_TOSTRING(NameList, namelist) +{ + std::string namelist_str = NameToString(namelist.firstname()); + for (int i = 0; i < namelist.names_size(); ++i) + namelist_str += ", " + NameToString(namelist.names(i)); + return namelist_str; +} + +PROTO_TOSTRING(FuncBody, body) +{ + std::string body_str = "( "; + if (body.has_parlist()) + body_str += ParListToString(body.parlist()); + body_str += " )\n\t"; + body_str += BlockToString(body.block()); + body_str += "end\n"; + return body_str; +} + +NESTED_PROTO_TOSTRING(NameListWithEllipsis, namelist, FuncBody) +{ + std::string namelist_str = NameListToString(namelist.namelist()); + if (namelist.has_ellipsis()) + namelist_str += ", ..."; + return namelist_str; +} + +NESTED_PROTO_TOSTRING(ParList, parlist, FuncBody) +{ + using ParListType = FuncBody::ParList::ParlistOneofCase; + switch (parlist.parlist_oneof_case()) { + case ParListType::kNamelist: + return NameListWithEllipsisToString(parlist.namelist()); + case ParListType::kEllipsis: + return "..."; + default: + /* Chosen as default in order to decrease number of ellipses. */ + return NameListWithEllipsisToString(parlist.namelist()); + } +} + +/** + * LocalFunc clause. + */ +PROTO_TOSTRING(LocalFunc, localfunc) +{ + std::string localfunc_str = "local function " + NameToString( + localfunc.name()); + localfunc_str += " " + FuncBodyToString(localfunc.funcbody()); + return localfunc_str; +} + +/** + * LocalNames clause. + */ +PROTO_TOSTRING(LocalNames, localnames) +{ + std::string localnames_str = "local "; + localnames_str += NameListToString(localnames.namelist()); + + if (localnames.has_explist()) + localnames_str += " = " + ExpressionListToString( + localnames.explist()); + return localnames_str; +} + +/** + * Expressions and variables. + */ + +/** + * Expressions clauses. + */ +PROTO_TOSTRING(ExpressionList, explist) +{ + std::string explist_str; + for (int i = 0; i < explist.expressions_size(); ++i) + explist_str += ExpressionToString(explist.expressions(i)) + + ", "; + explist_str += ExpressionToString(explist.explast()) + " "; + return explist_str; +} + +PROTO_TOSTRING(OptionalExpressionList, explist) +{ + if (explist.has_explist()) + return ExpressionListToString(explist.explist()); + return ""; +} + +PROTO_TOSTRING(PrefixExpression, prefixexp) +{ + using PrefExprType = PrefixExpression::PrefixOneofCase; + switch (prefixexp.prefix_oneof_case()) { + case PrefExprType::kVar: + return VariableToString(prefixexp.var()); + case PrefExprType::kFunctioncall: + return FunctionCallToString(prefixexp.functioncall()); + case PrefExprType::kExp: + return "(" + ExpressionToString(prefixexp.exp()) + ")"; + default: + /* + * Can be generated too nested expressions with other options, + * though they can be enabled for more variable fuzzing. + */ + return VariableToString(prefixexp.var()); + } +} + +/** + * Variable and nested types. + */ +PROTO_TOSTRING(Variable, var) +{ + using VarType = Variable::VarOneofCase; + switch (var.var_oneof_case()) { + case VarType::kName: + return NameToString(var.name()); + case VarType::kIndexexpr: + return IndexWithExpressionToString(var.indexexpr()); + case VarType::kIndexname: + return IndexWithNameToString(var.indexname()); + default: + /* + * Can be generated too nested expressions with other options, + * though they can be enabled for more variable fuzzing. + */ + return NameToString(var.name()); + } +} + +NESTED_PROTO_TOSTRING(IndexWithExpression, indexexpr, Variable) +{ + std::string indexexpr_str = PrefixExpressionToString( + indexexpr.prefixexp()); + indexexpr_str += "[" + ExpressionToString(indexexpr.exp()) + "]"; + return indexexpr_str; +} + +NESTED_PROTO_TOSTRING(IndexWithName, indexname, Variable) +{ + std::string indexname_str = PrefixExpressionToString( + indexname.prefixexp()); + indexname_str += "." + ConvertToStringDefault(indexname.name()); + return indexname_str; +} + +/** + * Expression and nested types. + */ +PROTO_TOSTRING(Expression, expr) +{ + using ExprType = Expression::ExprOneofCase; + switch (expr.expr_oneof_case()) { + case ExprType::kNil: + return "nil"; + case ExprType::kFalse: + return "false"; + case ExprType::kTrue: + return "true"; + case ExprType::kNumber: { + /* Clamp number between given boundaries. */ + double number = clamp(expr.number(), kMaxNumber, kMinNumber); + return std::to_string(number); + } + case ExprType::kStr: + return "'" + ConvertToStringDefault(expr.str()) + "'"; + case ExprType::kEllipsis: + return " ... "; + case ExprType::kFunction: + return AnonFuncToString(expr.function()); + case ExprType::kPrefixexp: + return PrefixExpressionToString(expr.prefixexp()); + case ExprType::kTableconstructor: + return TableConstructorToString(expr.tableconstructor()); + case ExprType::kBinary: + return ExpBinaryOpExpToString(expr.binary()); + case ExprType::kUnary: + return UnaryOpExpToString(expr.unary()); + default: + /** + * Arbitrary choice. + * TODO: Choose "more interesting" defaults. + */ + return "'" + ConvertToStringDefault(expr.str()) + "'"; + } +} + +NESTED_PROTO_TOSTRING(AnonFunc, func, Expression) +{ + return "function " + FuncBodyToString(func.body()); +} + +NESTED_PROTO_TOSTRING(ExpBinaryOpExp, binary, Expression) +{ + std::string binary_str = ExpressionToString(binary.leftexp()); + binary_str += " " + BinaryOperatorToString(binary.binop()) + " "; + binary_str += ExpressionToString(binary.rightexp()); + return binary_str; +} + +NESTED_PROTO_TOSTRING(UnaryOpExp, unary, Expression) +{ + std::string unary_str = UnaryOperatorToString(unary.unop()); + unary_str += ExpressionToString(unary.exp()); + return unary_str; +} + +/** + * Tables and fields. + */ +PROTO_TOSTRING(TableConstructor, table) +{ + std::string table_str = "{ "; + if (table.has_fieldlist()) + table_str += FieldListToString(table.fieldlist()); + table_str += " }"; + return table_str; +} + +PROTO_TOSTRING(FieldList, fieldlist) +{ + std::string fieldlist_str = FieldToString(fieldlist.firstfield()); + for (int i = 0; i < fieldlist.fields_size(); ++i) + fieldlist_str += FieldWithFieldSepToString(fieldlist.fields(i)); + if (fieldlist.has_lastsep()) + fieldlist_str += FieldSepToString(fieldlist.lastsep()); + return fieldlist_str; +} + +NESTED_PROTO_TOSTRING(FieldWithFieldSep, field, FieldList) +{ + std::string field_str = FieldSepToString(field.sep()); + field_str += " " + FieldToString(field.field()); + return field_str; +} + +/** + * Field and nested types. + */ +PROTO_TOSTRING(Field, field) +{ + using FieldType = Field::FieldOneofCase; + switch (field.field_oneof_case()) { + case FieldType::kExprassign: + return ExpressionAssignmentToString(field.exprassign()); + case FieldType::kNamedassign: + return NameAssignmentToString(field.namedassign()); + case FieldType::kExpression: + return ExpressionToString(field.expression()); + default: + /* More common case of using fields. */ + return NameAssignmentToString(field.namedassign()); + } +} + +NESTED_PROTO_TOSTRING(ExpressionAssignment, assignment, Field) +{ + std::string assignment_str = "[ " + + ExpressionToString(assignment.key()) + " ]"; + assignment_str += " = " + ExpressionToString(assignment.value()); + return assignment_str; +} + +NESTED_PROTO_TOSTRING(NameAssignment, assignment, Field) +{ + std::string assignment_str = NameToString(assignment.name()); + assignment_str += " = " + ExpressionToString(assignment.value()); + return assignment_str; +} + +PROTO_TOSTRING(FieldSep, sep) +{ + using FieldSepType = FieldSep::SepOneofCase; + switch (sep.sep_oneof_case()) { + case FieldSepType::kComma: + return ","; + case FieldSepType::kSemicolon: + return ";"; + default: + return ","; + } +} + +/** + * Operators. + */ +PROTO_TOSTRING(BinaryOperator, op) +{ + using BinopType = BinaryOperator::BinaryOneofCase; + switch (op.binary_oneof_case()) { + case BinopType::kAdd: + return "+"; + case BinopType::kSub: + return "-"; + case BinopType::kMult: + return "*"; + case BinopType::kDiv: + return "/"; + case BinopType::kExp: + return "^"; + case BinopType::kMod: + return "%"; + + case BinopType::kConcat: + return ".."; + + case BinopType::kLess: + return "<"; + case BinopType::kLessEqual: + return "<="; + case BinopType::kGreater: + return ">"; + case BinopType::kGreaterEqual: + return ">="; + case BinopType::kEqual: + return "=="; + case BinopType::kNotEqual: + return "~="; + case BinopType::kAnd: + return "and"; + case BinopType::kOr: + return "or"; + default: + /* Works in most cases. */ + return "=="; + } +} + +PROTO_TOSTRING(UnaryOperator, op) +{ + using UnaryopType = UnaryOperator::UnaryOneofCase; + switch (op.unary_oneof_case()) { + case UnaryopType::kNegate: + return "-"; + case UnaryopType::kNot: + return "not "; + case UnaryopType::kLength: + return "#"; + default: + /* Works in most cases. */ + return "not "; + } +} + +/** + * Identifier (Name). + */ +PROTO_TOSTRING(Name, name) +{ + std::string ident = ConvertToStringDefault(name.name()); + return ident + std::to_string(name.num() % kMaxIdentifiers); +} diff --git a/test/fuzz/luaL_loadbuffer/serializer.h b/test/fuzz/luaL_loadbuffer/serializer.h new file mode 100644 index 0000000000..97812c919a --- /dev/null +++ b/test/fuzz/luaL_loadbuffer/serializer.h @@ -0,0 +1,123 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2022, Tarantool AUTHORS, please see AUTHORS file. + */ +#pragma once + +#include "lua_grammar.pb.h" + +using namespace lua_grammar; + +#define PROTO_TOSTRING(TYPE, VAR_NAME) \ + std::string TYPE##ToString(const TYPE & (VAR_NAME)) + +/* PROTO_TOSTRING version for nested (depth=2) protobuf messages. */ +#define NESTED_PROTO_TOSTRING(TYPE, VAR_NAME, PARENT_MESSAGE) \ + std::string TYPE##ToString \ + (const PARENT_MESSAGE::TYPE & (VAR_NAME)) + +/** + * Fuzzing parameters: + * kMaxNumber - upper bound for all generated numbers. + * kMinNumber - lower bound for all generated numbers. + * kMaxStrLength - upper bound for generating string literals and identifiers. + * kMaxIdentifiers - max number of unique generated identifiers. + * kDefaultIdent - default name for identifier. + * Default values were chosen arbitrary but not too big for better readability + * of generated code samples. + */ +constexpr double kMaxNumber = 1000.0; +constexpr double kMinNumber = -1000.0; +constexpr size_t kMaxStrLength = 20; +constexpr size_t kMaxIdentifiers = 10; +constexpr char kDefaultIdent[] = "Name"; + +PROTO_TOSTRING(Block, block); +PROTO_TOSTRING(Chunk, chunk); + +PROTO_TOSTRING(Statement, stat); + +/** LastStatement and nested types. */ +PROTO_TOSTRING(LastStatement, laststat); +NESTED_PROTO_TOSTRING(ReturnOptionalExpressionList, explist, LastStatement); + +/** + * Statement options. + */ + +/** AssignmentList and nested types. */ +PROTO_TOSTRING(AssignmentList, assignmentlist); +NESTED_PROTO_TOSTRING(VariableList, varlist, AssignmentList); + +/** FunctionCall and nested types. */ +PROTO_TOSTRING(FunctionCall, call); +NESTED_PROTO_TOSTRING(Args, args, FunctionCall); +NESTED_PROTO_TOSTRING(PrefixArgs, prefixargs, FunctionCall); +NESTED_PROTO_TOSTRING(PrefixNamedArgs, prefixnamedargs, FunctionCall); + +/** DoBlock, WhileCycle and RepeatCycle clauses. */ +PROTO_TOSTRING(DoBlock, block); +PROTO_TOSTRING(WhileCycle, whilecycle); +PROTO_TOSTRING(RepeatCycle, repeatcycle); + +/** IfStatement and nested types. */ +PROTO_TOSTRING(IfStatement, statement); +NESTED_PROTO_TOSTRING(ElseIfBlock, elseifblock, IfStatement); + +/** ForCycleName and ForCycleList clauses. */ +PROTO_TOSTRING(ForCycleName, forcyclename); +PROTO_TOSTRING(ForCycleList, forcyclelist); + +/** Function and nested types. */ +PROTO_TOSTRING(Function, func); +NESTED_PROTO_TOSTRING(FuncName, funcname, Function); + +PROTO_TOSTRING(NameList, namelist); +PROTO_TOSTRING(FuncBody, body); +NESTED_PROTO_TOSTRING(NameListWithEllipsis, namelist, FuncBody); +NESTED_PROTO_TOSTRING(ParList, parlist, FuncBody); + +/** LocalFunc and LocalNames clauses. */ +PROTO_TOSTRING(LocalFunc, localfunc); +PROTO_TOSTRING(LocalNames, localnames); + +/** + * Expressions and variables. + */ + +/** Expressions clauses. */ +PROTO_TOSTRING(ExpressionList, explist); +PROTO_TOSTRING(OptionalExpressionList, explist); +PROTO_TOSTRING(PrefixExpression, prefExpr); + +/* Variable and nested types. */ +PROTO_TOSTRING(Variable, var); +NESTED_PROTO_TOSTRING(IndexWithExpression, indexexpr, Variable); +NESTED_PROTO_TOSTRING(IndexWithName, indexname, Variable); + +/** Expression and nested types. */ +PROTO_TOSTRING(Expression, expr); +NESTED_PROTO_TOSTRING(AnonFunc, function, Expression); +NESTED_PROTO_TOSTRING(ExpBinaryOpExp, binary, Expression); +NESTED_PROTO_TOSTRING(UnaryOpExp, unary, Expression); + +/** + * Tables and fields. + */ +PROTO_TOSTRING(TableConstructor, table); +PROTO_TOSTRING(FieldList, fieldlist); +NESTED_PROTO_TOSTRING(FieldWithFieldSep, field, FieldList); + +/** Field and nested types. */ +PROTO_TOSTRING(Field, field); +NESTED_PROTO_TOSTRING(ExpressionAssignment, assignment, Field); +NESTED_PROTO_TOSTRING(NameAssignment, assignment, Field); +PROTO_TOSTRING(FieldSep, sep); + +/** Operators. */ +PROTO_TOSTRING(BinaryOperator, op); +PROTO_TOSTRING(UnaryOperator, op); + +/** Identifier (Name). */ +PROTO_TOSTRING(Name, name); -- GitLab