37 bool parse()
override;
72 error() <<
"invalid constant pool index (" << index <<
")" <<
eom;
107 void rbytecode(std::vector<instructiont> &);
119 for(std::size_t i=0; i<bytes; i++)
123 error() <<
"unexpected end of bytecode file" <<
eom;
130 template <
typename T>
134 std::is_unsigned<T>::value,
"T should be an unsigned integer");
135 const constexpr
size_t bytes =
sizeof(T);
137 for(
size_t i = 0; i < bytes; i++)
141 error() <<
"unexpected end of bytecode file" <<
eom;
147 return narrow_cast<T>(
result);
151 size_t bootstrap_method_index,
152 std::vector<u2> u2_values);
155 #define CONSTANT_Class 7
156 #define CONSTANT_Fieldref 9
157 #define CONSTANT_Methodref 10
158 #define CONSTANT_InterfaceMethodref 11
159 #define CONSTANT_String 8
160 #define CONSTANT_Integer 3
161 #define CONSTANT_Float 4
162 #define CONSTANT_Long 5
163 #define CONSTANT_Double 6
164 #define CONSTANT_NameAndType 12
165 #define CONSTANT_Utf8 1
166 #define CONSTANT_MethodHandle 15
167 #define CONSTANT_MethodType 16
168 #define CONSTANT_InvokeDynamic 18
170 #define VTYPE_INFO_TOP 0
171 #define VTYPE_INFO_INTEGER 1
172 #define VTYPE_INFO_FLOAT 2
173 #define VTYPE_INFO_LONG 3
174 #define VTYPE_INFO_DOUBLE 4
175 #define VTYPE_INFO_ITEM_NULL 5
176 #define VTYPE_INFO_UNINIT_THIS 6
177 #define VTYPE_INFO_OBJECT 7
178 #define VTYPE_INFO_UNINIT 8
287 "name_and_typeindex did not correspond to a name_and_type in the "
391 catch(
const std::string &
message)
406 #define ACC_PUBLIC 0x0001u
407 #define ACC_PRIVATE 0x0002u
408 #define ACC_PROTECTED 0x0004u
409 #define ACC_STATIC 0x0008u
410 #define ACC_FINAL 0x0010u
411 #define ACC_SYNCHRONIZED 0x0020u
412 #define ACC_BRIDGE 0x0040u
413 #define ACC_NATIVE 0x0100u
414 #define ACC_INTERFACE 0x0200u
415 #define ACC_ABSTRACT 0x0400u
416 #define ACC_STRICT 0x0800u
417 #define ACC_SYNTHETIC 0x1000u
418 #define ACC_ANNOTATION 0x2000u
419 #define ACC_ENUM 0x4000u
421 #define UNUSED_u2(x) \
423 const u2 x = read<u2>(); \
432 const u4 magic = read<u4>();
434 const u2 major_version = read<u2>();
436 if(magic!=0xCAFEBABE)
444 error() <<
"unexpected major version" <<
eom;
452 const u2 access_flags = read<u2>();
453 const u2 this_class = read<u2>();
454 const u2 super_class = read<u2>();
481 const u2 attributes_count = read<u2>();
483 for(std::size_t j=0; j<attributes_count; j++)
517 if(field.signature.has_value())
539 for(
const auto ¶meter_annotations : method.parameter_annotations)
542 if(method.signature.has_value())
557 for(
const auto &var : method.local_variable_table)
559 if(var.signature.has_value())
579 if(src.
id()==ID_code)
587 else if(src.
id() == ID_struct_tag)
595 else if(src.
id()==ID_struct)
601 else if(src.
id()==ID_pointer)
608 const std::vector<annotationt> &annotations)
610 for(
const auto &annotation : annotations)
613 for(
const auto &element_value_pair : annotation.element_value_pairs)
623 if(
const auto &symbol_expr = expr_try_dynamic_cast<symbol_exprt>(value))
625 const irep_idt &value_id = symbol_expr->get_identifier();
628 else if(
const auto &array_expr = expr_try_dynamic_cast<array_exprt>(value))
641 const u2 constant_pool_count = read<u2>();
642 if(constant_pool_count==0)
644 error() <<
"invalid constant_pool_count" <<
eom;
653 it->tag = read<u1>();
658 it->ref1 = read<u2>();
666 it->ref1 = read<u2>();
667 it->ref2 = read<u2>();
672 it->ref1 = read<u2>();
677 it->number = read<u4>();
682 it->number = read<u8>();
686 error() <<
"invalid double entry" <<
eom;
695 const u2 bytes = read<u2>();
705 it->ref1 = read<u1>();
706 it->ref2 = read<u2>();
710 error() <<
"unknown constant pool entry (" << it->tag <<
")"
720 [&](constant_poolt::value_type &entry) {
725 const std::string &s = id2string(pool_entry(entry.ref1).s);
726 entry.expr = type_exprt(java_classname(s));
730 case CONSTANT_Fieldref:
732 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
733 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
734 const pool_entryt &class_entry = pool_entry(entry.ref1);
735 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
736 typet type=type_entry(nameandtype_entry.ref2);
738 auto class_tag = java_classname(id2string(class_name_entry.s));
740 fieldref_exprt fieldref(type, name_entry.s, class_tag.get_identifier());
742 entry.expr = fieldref;
746 case CONSTANT_Methodref:
747 case CONSTANT_InterfaceMethodref:
749 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
750 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
751 const pool_entryt &class_entry = pool_entry(entry.ref1);
752 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
753 typet type=type_entry(nameandtype_entry.ref2);
755 auto class_tag = java_classname(id2string(class_name_entry.s));
757 irep_idt mangled_method_name =
758 id2string(name_entry.s) +
":" +
759 id2string(pool_entry(nameandtype_entry.ref2).s);
761 irep_idt class_id = class_tag.get_identifier();
763 entry.expr = class_method_descriptor_exprt{
764 type, mangled_method_name, class_id, name_entry.s};
768 case CONSTANT_String:
771 entry.expr = java_string_literal_exprt{pool_entry(entry.ref1).s};
775 case CONSTANT_Integer:
776 entry.expr = from_integer(entry.number, java_int_type());
781 ieee_floatt value(ieee_float_spect::single_precision());
782 value.unpack(entry.number);
783 entry.expr = value.to_expr();
788 entry.expr = from_integer(entry.number, java_long_type());
791 case CONSTANT_Double:
793 ieee_floatt value(ieee_float_spect::double_precision());
794 value.unpack(entry.number);
795 entry.expr = value.to_expr();
799 case CONSTANT_NameAndType:
801 entry.expr.id(
"nameandtype");
805 case CONSTANT_MethodHandle:
807 entry.expr.id(
"methodhandle");
811 case CONSTANT_MethodType:
813 entry.expr.id(
"methodtype");
817 case CONSTANT_InvokeDynamic:
819 entry.expr.id(
"invokedynamic");
820 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
821 typet type=type_entry(nameandtype_entry.ref2);
822 type.set(ID_java_lambda_method_handle_index, entry.ref1);
823 entry.expr.type() = type;
832 const u2 interfaces_count = read<u2>();
834 for(std::size_t i=0; i<interfaces_count; i++)
836 constant(read<u2>()).type().get(ID_C_base_name));
841 const u2 fields_count = read<u2>();
843 for(std::size_t i=0; i<fields_count; i++)
847 const u2 access_flags = read<u2>();
848 const u2 name_index = read<u2>();
849 const u2 descriptor_index = read<u2>();
850 const u2 attributes_count = read<u2>();
861 const auto flags = (field.
is_public ? 1 : 0) +
864 DATA_INVARIANT(flags<=1,
"at most one of public, protected, private");
866 for(std::size_t j=0; j<attributes_count; j++)
882 const u4 code_length = read<u4>();
885 size_t bytecode_index=0;
887 for(address=0; address<code_length; address++)
889 bool wide_instruction=
false;
890 u4 start_of_instruction=address;
892 u1 bytecode = read<u1>();
896 wide_instruction=
true;
898 bytecode = read<u1>();
905 std::string(
"Unexpected wide instruction: ") +
909 instructions.emplace_back();
912 instruction.
address=start_of_instruction;
941 const s1 c = read<u1>();
949 const s2 offset = read<u2>();
952 instruction.
args.push_back(
960 const s4 offset = read<u4>();
963 instruction.
args.push_back(
973 const u2 v = read<u2>();
979 const u1 v = read<u1>();
991 const u2 v = read<u2>();
993 const s2 c = read<u2>();
999 const u1 v = read<u1>();
1001 const s1 c = read<u1>();
1009 const u2 c = read<u2>();
1011 const u1 b1 = read<u1>();
1013 const u1 b2 = read<u1>();
1021 u4 base_offset=address;
1024 while(((address + 1u) & 3u) != 0)
1031 const s4 default_value = read<u4>();
1034 instruction.
args.push_back(
1039 const u4 npairs = read<u4>();
1042 for(std::size_t i=0; i<npairs; i++)
1044 const s4 match = read<u4>();
1045 const s4 offset = read<u4>();
1046 instruction.
args.push_back(
1050 instruction.
args.push_back(
1059 size_t base_offset=address;
1062 while(((address + 1u) & 3u) != 0)
1069 const s4 default_value = read<u4>();
1070 instruction.
args.push_back(
1075 const s4 low_value = read<u4>();
1079 const s4 high_value = read<u4>();
1083 for(
s4 i=low_value; i<=high_value; i++)
1085 s4 offset = read<u4>();
1089 instruction.
args.push_back(
1098 const u2 c = read<u2>();
1100 const u1 dimensions = read<u1>();
1101 instruction.
args.push_back(
1118 case T_INT: t.
id(ID_int);
break;
1129 const s2 s = read<u2>();
1136 throw "unknown JVM bytecode instruction";
1141 if(address!=code_length)
1143 error() <<
"bytecode length mismatch" <<
eom;
1150 const u2 attribute_name_index = read<u2>();
1151 const u4 attribute_length = read<u4>();
1155 if(attribute_name ==
"Code")
1165 const u2 exception_table_length = read<u2>();
1172 for(std::size_t e = 0; e < exception_table_length; e++)
1174 const u2 start_pc = read<u2>();
1175 const u2 end_pc = read<u2>();
1181 "The start_pc must be less than the end_pc as this is the range the "
1182 "exception is active");
1184 const u2 handler_pc = read<u2>();
1185 const u2 catch_type = read<u2>();
1195 u2 attributes_count = read<u2>();
1197 for(std::size_t j=0; j<attributes_count; j++)
1210 if(!instruction.source_location.get_line().empty())
1211 line_number = instruction.source_location.get_line();
1212 else if(!line_number.
empty())
1213 instruction.source_location.set_line(line_number);
1214 instruction.source_location.set_function(
1219 const auto it = std::find_if(
1223 return !instruction.source_location.get_line().empty();
1228 else if(attribute_name==
"Signature")
1230 const u2 signature_index = read<u2>();
1233 else if(attribute_name==
"RuntimeInvisibleAnnotations" ||
1234 attribute_name==
"RuntimeVisibleAnnotations")
1239 attribute_name ==
"RuntimeInvisibleParameterAnnotations" ||
1240 attribute_name ==
"RuntimeVisibleParameterAnnotations")
1242 const u1 parameter_count = read<u1>();
1250 for(
u2 param_no = 0; param_no < parameter_count; ++param_no)
1253 else if(attribute_name ==
"Exceptions")
1263 const u2 attribute_name_index = read<u2>();
1264 const u4 attribute_length = read<u4>();
1268 if(attribute_name==
"Signature")
1270 const u2 signature_index = read<u2>();
1273 else if(attribute_name==
"RuntimeInvisibleAnnotations" ||
1274 attribute_name==
"RuntimeVisibleAnnotations")
1284 const u2 attribute_name_index = read<u2>();
1285 const u4 attribute_length = read<u4>();
1289 if(attribute_name==
"LineNumberTable")
1291 std::map<unsigned, std::reference_wrapper<instructiont>> instruction_map;
1293 instruction_map.emplace(instruction.address, instruction);
1295 const u2 line_number_table_length = read<u2>();
1297 for(std::size_t i=0; i<line_number_table_length; i++)
1299 const u2 start_pc = read<u2>();
1300 const u2 line_number = read<u2>();
1303 auto it = instruction_map.find(start_pc);
1305 if(it!=instruction_map.end())
1306 it->second.get().source_location.set_line(line_number);
1309 else if(attribute_name==
"LocalVariableTable")
1311 const u2 local_variable_table_length = read<u2>();
1315 for(std::size_t i=0; i<local_variable_table_length; i++)
1317 const u2 start_pc = read<u2>();
1318 const u2 length = read<u2>();
1319 const u2 name_index = read<u2>();
1320 const u2 descriptor_index = read<u2>();
1321 const u2 index = read<u2>();
1331 else if(attribute_name==
"LocalVariableTypeTable")
1335 else if(attribute_name==
"StackMapTable")
1337 const u2 stack_map_entries = read<u2>();
1341 for(
size_t i=0; i<stack_map_entries; i++)
1343 const u1 frame_type = read<u1>();
1350 else if(64<=frame_type && frame_type<=127)
1360 else if(frame_type==247)
1367 const u2 offset_delta = read<u2>();
1372 else if(248<=frame_type && frame_type<=250)
1377 const u2 offset_delta = read<u2>();
1380 else if(frame_type==251)
1386 const u2 offset_delta = read<u2>();
1389 else if(252<=frame_type && frame_type<=254)
1391 size_t new_locals = frame_type - 251;
1395 const u2 offset_delta = read<u2>();
1397 for(
size_t k=0; k<new_locals; k++)
1406 else if(frame_type==255)
1409 const u2 offset_delta = read<u2>();
1411 const u2 number_locals = read<u2>();
1413 for(
size_t k=0; k<(size_t) number_locals; k++)
1421 const u2 number_stack_items = read<u2>();
1423 for(
size_t k=0; k<(size_t) number_stack_items; k++)
1433 throw "error: unknown stack frame type encountered";
1443 const u1 tag = read<u1>();
1476 throw "error: unknown verification type info encountered";
1481 std::vector<annotationt> &annotations)
1483 const u2 num_annotations = read<u2>();
1485 for(
u2 number=0; number<num_annotations; number++)
1489 annotations.push_back(annotation);
1496 const u2 type_index = read<u2>();
1504 const u2 num_element_value_pairs = read<u2>();
1505 element_value_pairs.resize(num_element_value_pairs);
1507 for(
auto &element_value_pair : element_value_pairs)
1509 const u2 element_name_index = read<u2>();
1510 element_value_pair.element_name=
pool_entry(element_name_index).
s;
1523 const u1 tag = read<u1>();
1537 const u2 class_info_index = read<u2>();
1552 const u2 num_values = read<u2>();
1554 values.reserve(num_values);
1555 for(std::size_t i=0; i<num_values; i++)
1564 const u2 const_value_index = read<u2>();
1570 const u2 const_value_index = read<u2>();
1571 return constant(const_value_index);
1588 const u4 &attribute_length)
1591 std::string name = parsed_class.
name.
c_str();
1592 const u2 number_of_classes = read<u2>();
1593 const u4 number_of_bytes_to_be_read = number_of_classes * 8 + 2;
1595 number_of_bytes_to_be_read == attribute_length,
1596 "The number of bytes to be read for the InnerClasses attribute does not "
1597 "match the attribute length.");
1599 const auto pool_entry_lambda = [
this](
u2 index) ->
pool_entryt & {
1602 const auto remove_separator_char = [](std::string str,
char ch) {
1603 str.erase(std::remove(str.begin(), str.end(), ch), str.end());
1607 for(
int i = 0; i < number_of_classes; i++)
1609 const u2 inner_class_info_index = read<u2>();
1610 const u2 outer_class_info_index = read<u2>();
1611 const u2 inner_name_index = read<u2>();
1612 const u2 inner_class_access_flags = read<u2>();
1614 std::string inner_class_info_name =
1617 bool is_private = (inner_class_access_flags &
ACC_PRIVATE) != 0;
1618 bool is_public = (inner_class_access_flags &
ACC_PUBLIC) != 0;
1619 bool is_protected = (inner_class_access_flags &
ACC_PROTECTED) != 0;
1620 bool is_static = (inner_class_access_flags &
ACC_STATIC) != 0;
1625 bool is_inner_class = remove_separator_char(
id2string(parsed_class.
name),
'.') ==
1626 remove_separator_char(inner_class_info_name,
'/');
1632 if(inner_name_index == 0)
1635 parsed_class.
inner_name = pool_entry_lambda(inner_name_index).s;
1638 if(outer_class_info_index == 0)
1646 std::string outer_class_info_name =
1665 const u2 number_of_exceptions = read<u2>();
1667 std::vector<irep_idt> exceptions;
1668 for(
size_t i = 0; i < number_of_exceptions; i++)
1670 const u2 exception_index_table = read<u2>();
1673 exceptions.push_back(exception_name);
1682 const u2 attribute_name_index = read<u2>();
1683 const u4 attribute_length = read<u4>();
1687 if(attribute_name==
"SourceFile")
1689 const u2 sourcefile_index = read<u2>();
1693 size_t last_index = fqn.find_last_of(
'.');
1694 if(last_index==std::string::npos)
1698 std::string package_name=fqn.substr(0, last_index+1);
1699 std::replace(package_name.begin(), package_name.end(),
'.',
'/');
1700 const std::string &full_file_name=
1702 sourcefile_name=full_file_name;
1705 for(
auto &method : parsed_class.
methods)
1707 method.source_location.set_file(sourcefile_name);
1708 for(
auto &instruction : method.instructions)
1710 if(!instruction.source_location.get_line().empty())
1711 instruction.source_location.set_file(sourcefile_name);
1715 else if(attribute_name==
"Signature")
1717 const u2 signature_index = read<u2>();
1723 else if(attribute_name==
"RuntimeInvisibleAnnotations" ||
1724 attribute_name==
"RuntimeVisibleAnnotations")
1728 else if(attribute_name ==
"BootstrapMethods")
1734 "only one BootstrapMethods argument is allowed in a class file");
1740 else if(attribute_name ==
"InnerClasses")
1750 const u2 methods_count = read<u2>();
1752 for(std::size_t j=0; j<methods_count; j++)
1756 #define ACC_PUBLIC 0x0001u
1757 #define ACC_PRIVATE 0x0002u
1758 #define ACC_PROTECTED 0x0004u
1759 #define ACC_STATIC 0x0008u
1760 #define ACC_FINAL 0x0010u
1761 #define ACC_VARARGS 0x0080u
1762 #define ACC_SUPER 0x0020u
1763 #define ACC_VOLATILE 0x0040u
1764 #define ACC_TRANSIENT 0x0080u
1765 #define ACC_INTERFACE 0x0200u
1766 #define ACC_ABSTRACT 0x0400u
1767 #define ACC_SYNTHETIC 0x1000u
1768 #define ACC_ANNOTATION 0x2000u
1769 #define ACC_ENUM 0x4000u
1775 const u2 access_flags = read<u2>();
1776 const u2 name_index = read<u2>();
1777 const u2 descriptor_index = read<u2>();
1794 const auto flags = (method.
is_public ? 1 : 0) +
1797 DATA_INVARIANT(flags<=1,
"at most one of public, protected, private");
1798 const u2 attributes_count = read<u2>();
1800 for(std::size_t j=0; j<attributes_count; j++)
1805 std::istream &istream,
1808 bool skip_instructions)
1811 java_bytecode_parser.
in=&istream;
1814 bool parser_result=java_bytecode_parser.
parse();
1823 return std::move(java_bytecode_parser.
parse_tree);
1827 const std::string &
file,
1830 bool skip_instructions)
1840 in, class_name, message_handler, skip_instructions);
1848 const u2 local_variable_type_table_length = read<u2>();
1852 "Local variable type table cannot have more elements "
1853 "than the local variable table.");
1854 for(std::size_t i=0; i<local_variable_type_table_length; i++)
1856 const u2 start_pc = read<u2>();
1857 const u2 length = read<u2>();
1858 const u2 name_index = read<u2>();
1859 const u2 signature_index = read<u2>();
1860 const u2 index = read<u2>();
1866 if(lvar.index==index &&
1868 lvar.start_pc==start_pc &&
1869 lvar.length==length)
1878 "Entry in LocalVariableTypeTable must be present in LVT");
1899 std::string class_name = class_entry.
get_name(pool_entry_lambda);
1901 std::replace(class_name.begin(), class_name.end(),
'.',
'$');
1903 std::replace(class_name.begin(), class_name.end(),
'/',
'.');
1904 const std::string method_ref =
1905 class_name +
"." + name_and_type.
get_name(pool_entry_lambda) +
':' +
1917 name_and_type.
get_name(pool_entry_lambda);
1922 return lambda_method_handle;
1931 const u2 num_bootstrap_methods = read<u2>();
1932 for(
size_t bootstrap_method_index = 0;
1933 bootstrap_method_index < num_bootstrap_methods;
1934 ++bootstrap_method_index)
1936 const u2 bootstrap_methodhandle_ref = read<u2>();
1941 const u2 num_bootstrap_arguments = read<u2>();
1942 debug() <<
"INFO: parse BootstrapMethod handle " << num_bootstrap_arguments
1946 std::vector<u2> u2_values(num_bootstrap_arguments);
1947 for(
size_t i = 0; i < num_bootstrap_arguments; i++)
1948 u2_values[i] = read<u2>();
1980 if(num_bootstrap_arguments < 3)
1984 <<
"format of BootstrapMethods entry not recognized: too few arguments"
1989 u2 interface_type_index = u2_values[0];
1990 u2 method_handle_index = u2_values[1];
1991 u2 method_type_index = u2_values[2];
1997 bool recognized =
true;
1998 for(
size_t i = 3; i < num_bootstrap_arguments; i++)
2000 u2 skipped_argument = u2_values[i];
2006 debug() <<
"format of BootstrapMethods entry not recognized: extra "
2007 "arguments of wrong type"
2023 debug() <<
"format of BootstrapMethods entry not recognized: arguments "
2030 debug() <<
"INFO: parse lambda handle" <<
eom;
2034 if(!lambda_method_handle.has_value())
2036 debug() <<
"format of BootstrapMethods entry not recognized: method "
2037 "handle not recognised"
2047 lambda_method_handle->interface_type =
2049 lambda_method_handle->method_type =
pool_entry(method_type_argument.
ref1).
s;
2050 lambda_method_handle->u2_values = std::move(u2_values);
2051 debug() <<
"lambda function reference "
2052 <<
id2string(lambda_method_handle->lambda_method_name)
2054 <<
"\n interface type is "
2056 <<
"\n method type is "
2059 bootstrap_method_index, *lambda_method_handle);
2068 size_t bootstrap_method_index,
2069 std::vector<u2> u2_values)
2073 bootstrap_method_index, lambda_method_handle);