diff --git a/.travis.yml b/.travis.yml index f403b54..9b505d6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,7 @@ rust: - beta - nightly before_script: - - wget https://github.com/python/cpython/archive/3.5.zip -O cpython.zip + - wget https://github.com/python/cpython/archive/3.6.zip -O cpython.zip - unzip cpython.zip - cd cpython-* - ./configure --prefix=$HOME/.local/ diff --git a/README.md b/README.md index 76ed500..e28f808 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ A Python virtual machine, written in Rust. ## Dependencies -* CPython 3.6 (used as a parser and bytecode compiler). Older versions down to 3.4 should work, but their support is not tested. +* CPython 3.6 (used as a parser and bytecode compiler). * [Rust](https://www.rust-lang.org/downloads.html) * [Cargo](https://crates.io/install) diff --git a/src/marshal/mod.rs b/src/marshal/mod.rs index 11cd6ae..24155a8 100644 --- a/src/marshal/mod.rs +++ b/src/marshal/mod.rs @@ -18,6 +18,6 @@ pub fn check_magic(buf: &[u8]) -> bool { false } else { - 3310 <= version /* ≥ 3.4rc2 */ && version < 3390 /* < 3.7 */ + 3379 <= version /* ≥ 3.6rc1 */ && version < 3390 /* < 3.7 */ } } diff --git a/src/primitives/mod.rs b/src/primitives/mod.rs index 538db21..50da2c9 100644 --- a/src/primitives/mod.rs +++ b/src/primitives/mod.rs @@ -89,7 +89,9 @@ fn build_class(state: &mut State, call_stack: &mut Vec, let mut instructions: Vec = InstructionDecoder::new(code.code.iter()).collect(); // Hack to made the class' code return the class instead of None - assert_eq!(instructions.pop(), Some(Instruction::ReturnValue)); + let mut last_instruction; + while {last_instruction = instructions.pop(); last_instruction == Some(Instruction::Nop)} {}; + assert_eq!(last_instruction, Some(Instruction::ReturnValue)); instructions.pop(); // LoadConst None instructions.push(Instruction::PushImmediate(cls_ref.clone())); instructions.push(Instruction::ReturnValue); diff --git a/src/processor/instructions.rs b/src/processor/instructions.rs index 218e9a2..4d50da3 100644 --- a/src/processor/instructions.rs +++ b/src/processor/instructions.rs @@ -18,7 +18,7 @@ pub enum CmpOperator { } impl CmpOperator { - pub fn from_bytecode(n: u32) -> Self { + pub fn from_bytecode(n: usize) -> Self { match n { 0 => CmpOperator::Lt, 1 => CmpOperator::Leq, @@ -69,9 +69,10 @@ pub enum Instruction { LoadFast(usize), StoreFast(usize), LoadGlobal(usize), - CallFunction(usize, usize), // nb_args, nb_kwargs - RaiseVarargs(u16), - MakeFunction(usize, usize, usize), // nb_default_args, nb_default_kwargs, nb_annot + CallFunction(usize, bool), // nb_args + nb_kwargs, has_kwargs + RaiseVarargs(usize), + MakeFunction { has_defaults: bool, has_kwdefaults: bool, has_annotations: bool, has_closure: bool }, + BuildConstKeyMap(usize), } #[derive(Debug)] @@ -123,55 +124,65 @@ impl<'a, I> Iterator for InstructionDecoder where I: Iterator { self.pending_nops -= 1; return Some(Instruction::Nop) }; - self.bytestream.next().map(|opcode| { - match *opcode { - 1 => Instruction::PopTop, - 4 => Instruction::DupTop, - 25 => Instruction::BinarySubscr, - 68 => Instruction::GetIter, - 71 => Instruction::LoadBuildClass, - 83 => Instruction::ReturnValue, - 87 => Instruction::PopBlock, - 88 => Instruction::EndFinally, - 89 => Instruction::PopExcept, - 90 => Instruction::StoreName(self.read_argument() as usize), - 93 => Instruction::ForIter(self.read_argument() as usize), - 95 => Instruction::StoreAttr(self.read_argument() as usize), - 97 => Instruction::StoreGlobal(self.read_argument() as usize), - 100 => Instruction::LoadConst(self.read_argument() as usize), - 101 => Instruction::LoadName(self.read_argument() as usize), - 102 => Instruction::BuildTuple(self.read_argument() as usize), - 106 => Instruction::LoadAttr(self.read_argument() as usize), - 107 => Instruction::CompareOp(CmpOperator::from_bytecode(self.read_argument())), - 110 => Instruction::JumpForward(self.read_argument() as usize + 2), // +2, because JumpForward takes 3 bytes, and the relative address is computed from the next instruction. - 113 => Instruction::JumpAbsolute(self.read_argument() as usize), - 114 => Instruction::PopJumpIfFalse(self.read_argument() as usize), - 116 => Instruction::LoadGlobal(self.read_argument() as usize), - 120 => Instruction::SetupLoop(self.read_argument() as usize + 2), - 121 => Instruction::SetupExcept(self.read_argument() as usize + 2), - 124 => Instruction::LoadFast(self.read_argument() as usize), - 125 => Instruction::StoreFast(self.read_argument() as usize), - 130 => Instruction::RaiseVarargs(self.read_argument() as u16), - 131 => Instruction::CallFunction(self.read_byte() as usize, self.read_byte() as usize), - 132 => { - let arg = self.read_argument(); - let nb_pos = arg & 0xFF; - let nb_kw = (arg >> 8) & 0xFF; - //let nb_annot = (arg >> 16) & 0x7FF; // TODO - let nb_annot = 0; - Instruction::MakeFunction(nb_pos as usize, nb_kw as usize, nb_annot as usize) - }, - 144 => { self.arg_prefix = Some(self.read_argument()); Instruction::Nop }, - _ => panic!(format!("Opcode not supported: {}", opcode)), + let mut opcode = 144; + let mut oparg: usize = 0; + while opcode == 144 { + match self.bytestream.next() { + Some(op) => { opcode = *op }, + None => return None, } - }) + oparg = (oparg << 8) | (*self.bytestream.next().unwrap() as usize); + self.pending_nops += 1; + } + self.pending_nops -= 1; + let inst = match opcode { + 1 => Instruction::PopTop, + 4 => Instruction::DupTop, + 25 => Instruction::BinarySubscr, + 68 => Instruction::GetIter, + 71 => Instruction::LoadBuildClass, + 83 => Instruction::ReturnValue, + 87 => Instruction::PopBlock, + 88 => Instruction::EndFinally, + 89 => Instruction::PopExcept, + 90 => Instruction::StoreName(oparg), + 93 => Instruction::ForIter(oparg), + 95 => Instruction::StoreAttr(oparg), + 97 => Instruction::StoreGlobal(oparg), + 100 => Instruction::LoadConst(oparg), + 101 => Instruction::LoadName(oparg), + 102 => Instruction::BuildTuple(oparg), + 106 => Instruction::LoadAttr(oparg), + 107 => Instruction::CompareOp(CmpOperator::from_bytecode(oparg)), + 110 => Instruction::JumpForward(oparg), + 113 => Instruction::JumpAbsolute(oparg), + 114 => Instruction::PopJumpIfFalse(oparg), + 116 => Instruction::LoadGlobal(oparg), + 120 => Instruction::SetupLoop(oparg + 1), + 121 => Instruction::SetupExcept(oparg + 1), + 124 => Instruction::LoadFast(oparg), + 125 => Instruction::StoreFast(oparg), + 130 => Instruction::RaiseVarargs(oparg), + 131 => Instruction::CallFunction(oparg, false), + 132 => Instruction::MakeFunction { + has_defaults: oparg & 0x01 != 0, + has_kwdefaults: oparg & 0x02 != 0, + has_annotations: oparg & 0x04 != 0, + has_closure: oparg & 0x08 != 0, + }, + 141 => Instruction::CallFunction(oparg, true), + 156 => Instruction::BuildConstKeyMap(oparg), + 144 => panic!("The impossible happened."), + _ => panic!(format!("Opcode not supported: {:?}", (opcode, oparg))), + }; + Some(inst) } } #[test] fn test_load_read() { - let bytes: Vec = vec![124, 1, 0, 83]; + let bytes: Vec = vec![124, 1, 83, 0]; let reader = InstructionDecoder::new(bytes.iter()); let instructions: Vec = reader.collect(); - assert_eq!(vec![Instruction::LoadFast(1), Instruction::Nop, Instruction::Nop, Instruction::ReturnValue], instructions); + assert_eq!(vec![Instruction::LoadFast(1), Instruction::ReturnValue], instructions); } diff --git a/src/processor/mod.rs b/src/processor/mod.rs index d419599..0cfa4fa 100644 --- a/src/processor/mod.rs +++ b/src/processor/mod.rs @@ -15,6 +15,8 @@ use super::state::{State, PyResult, unwind, raise, return_value}; use super::sandbox::EnvProxy; use super::primitives; +const WORD_SIZE: usize = 2; + #[derive(Debug)] pub enum ProcessorError { CircularReference, @@ -233,11 +235,12 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> let instruction = py_unwrap!(state, frame.instructions.get(frame.program_counter), ProcessorError::InvalidProgramCounter); // Useful for debugging: /* - println!(""); + println!("======"); for r in frame.var_stack.iter() { println!("{}", r.repr(&state.store)); } - println!("{} {:?}", frame.program_counter, instruction); + println!("{} {:?}", frame.program_counter*WORD_SIZE, instruction); + println!("======"); */ frame.program_counter += 1; instruction.clone() @@ -362,7 +365,7 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> Instruction::ForIter(i) => { let iterator = { let frame = call_stack.last_mut().unwrap(); - frame.block_stack.push(Block::ExceptPopGoto(state.primitive_objects.stopiteration.clone(), 1, frame.program_counter+i)); + frame.block_stack.push(Block::ExceptPopGoto(state.primitive_objects.stopiteration.clone(), 1, frame.program_counter+i/WORD_SIZE)); let iterator = top_stack!(state, frame.var_stack); iterator.clone() }; @@ -436,7 +439,7 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> } Instruction::SetupExcept(i) => { let frame = call_stack.last_mut().unwrap(); - frame.block_stack.push(Block::TryExcept(frame.program_counter, frame.program_counter+i)) + frame.block_stack.push(Block::TryExcept(frame.program_counter, frame.program_counter+i/WORD_SIZE)) } Instruction::CompareOp(CmpOperator::Eq) => { let frame = call_stack.last_mut().unwrap(); @@ -465,11 +468,11 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> } Instruction::JumpAbsolute(target) => { let frame = call_stack.last_mut().unwrap(); - frame.program_counter = target + frame.program_counter = target / WORD_SIZE } Instruction::JumpForward(delta) => { let frame = call_stack.last_mut().unwrap(); - frame.program_counter += delta + frame.program_counter += delta / WORD_SIZE } Instruction::LoadFast(i) => { let frame = call_stack.last_mut().unwrap(); @@ -487,7 +490,7 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> let obj = state.store.deref(&pop_stack!(state, frame.var_stack)); match obj.content { ObjectContent::True => (), - ObjectContent::False => frame.program_counter = target, + ObjectContent::False => frame.program_counter = target / WORD_SIZE, _ => unimplemented!(), } } @@ -509,21 +512,32 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> panic!("Bad RaiseVarargs argument") // TODO: Raise an exception instead } - Instruction::CallFunction(nb_args, nb_kwargs) => { + Instruction::CallFunction(nb_args, has_kwargs) => { // See “Call constructs” at: // http://security.coverity.com/blog/2014/Nov/understanding-python-bytecode.html - let kwargs; + let kwargs: Vec<(ObjectRef, ObjectRef)>; let args; let func; { let frame = call_stack.last_mut().unwrap(); - kwargs = py_unwrap!(state, frame.var_stack.pop_n_pairs(nb_kwargs), ProcessorError::StackTooSmall); - args = py_unwrap!(state, frame.var_stack.pop_many(nb_args), ProcessorError::StackTooSmall); + if has_kwargs { + let ref obj = state.store.deref(&pop_stack!(state, frame.var_stack)).content; + let names: Vec = match obj { + &ObjectContent::Tuple(ref v) => v.into_iter().cloned().collect(), + _ => panic!("Bad CallFunctionKw argument"), + }; + let values: Vec = frame.var_stack.pop_many(names.len()).unwrap(); + kwargs = names.into_iter().zip(values).collect(); + } + else { + kwargs = Vec::new(); + } + args = py_unwrap!(state, frame.var_stack.pop_many(nb_args - kwargs.len()), ProcessorError::StackTooSmall); func = pop_stack!(state, frame.var_stack); } call_function(state, call_stack, &func, args, kwargs) }, - Instruction::MakeFunction(0, nb_default_kwargs, 0) => { + Instruction::MakeFunction { has_defaults: false, has_kwdefaults, has_annotations: false, has_closure: false } => { // TODO: consume default arguments and annotations let obj = { let frame = call_stack.last_mut().unwrap(); @@ -540,18 +554,35 @@ fn run_code(state: &mut State, call_stack: &mut Vec) -> }; let frame = call_stack.last_mut().unwrap(); let code = pop_stack!(state, frame.var_stack); - let raw_kwdefaults = py_unwrap!(state, frame.var_stack.pop_n_pairs(nb_default_kwargs), ProcessorError::StackTooSmall); let mut kwdefaults: HashMap = HashMap::new(); - kwdefaults.reserve(nb_default_kwargs); - for (key, value) in raw_kwdefaults { - match state.store.deref(&key).content { - ObjectContent::String(ref s) => { kwdefaults.insert(s.clone(), value); }, - _ => panic!("Defaults' keys must be strings."), + if has_kwdefaults { + let obj = state.store.deref(&pop_stack!(state, frame.var_stack)).content.clone(); // TODO: clone only if necessary + let raw_kwdefaults = match obj { + ObjectContent::Dict(ref d) => d, + _ => panic!("bad type for default kwd"), + }; + kwdefaults.reserve(raw_kwdefaults.len()); + for &(ref key, ref value) in raw_kwdefaults { + match state.store.deref(&key).content { + ObjectContent::String(ref s) => { kwdefaults.insert(s.clone(), value.clone()); }, + _ => panic!("Defaults' keys must be strings."), + } } } let func = state.primitive_objects.new_function(func_name, frame.object.module(&state.store), code, kwdefaults); frame.var_stack.push(state.store.allocate(func)) }, + Instruction::BuildConstKeyMap(size) => { + let frame = call_stack.last_mut().unwrap(); + let obj = state.store.deref(&pop_stack!(state, frame.var_stack)).content.clone(); // TODO: clone only if necessary + let keys: Vec = match obj { + ObjectContent::Tuple(ref v) => v.clone(), + _ => panic!("bad BuildConstKeyMap keys argument."), + }; + let values: Vec = frame.var_stack.peek(size).unwrap().iter().map(|r| (*r).clone()).collect(); + let dict = state.primitive_objects.new_dict(keys.into_iter().zip(values).collect()); + frame.var_stack.push(state.store.allocate(dict)) + } _ => panic!(format!("todo: instruction {:?}", instruction)), } }; diff --git a/src/varstack.rs b/src/varstack.rs index 7bdff47..0e73843 100644 --- a/src/varstack.rs +++ b/src/varstack.rs @@ -9,6 +9,7 @@ pub trait VarStack : Debug { fn push(&mut self, value: Self::Item); fn pop_all_and_get_n_last(&mut self, nb: usize) -> Option>; fn pop_n_pairs(&mut self, nb: usize) -> Option>; + fn peek(&self, nb: usize) -> Option>; } #[derive(Debug)] @@ -75,4 +76,13 @@ impl VarStack for VectorVarStack where Item: Debug { pairs }) } + fn peek(&self, nb: usize) -> Option> { + if nb > self.vector.len() { + None + } + else { + let length = self.vector.len(); + Some(self.vector[(length-nb)..length].iter().collect()) + } + } } diff --git a/tests/test_basic.rs b/tests/test_basic.rs index 9150ad7..951dc51 100644 --- a/tests/test_basic.rs +++ b/tests/test_basic.rs @@ -6,14 +6,14 @@ use pythonvm::{MockEnvProxy, PyResult, run_file}; #[test] fn test_hello_world() { - let mut reader: &[u8] = b"\xee\x0c\r\n\xb0\x92\x0fW\x15\x00\x00\x00\xe3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00@\x00\x00\x00s\x0e\x00\x00\x00e\x00\x00d\x00\x00\x83\x01\x00\x01d\x01\x00S)\x02z\x0bHello worldN)\x01\xda\x05print\xa9\x00r\x02\x00\x00\x00r\x02\x00\x00\x00\xfa\x16examples/helloworld.py\xda\x08\x01\x00\x00\x00s\x00\x00\x00\x00"; + let mut reader: &[u8] = b"3\r\r\n\xe1\xc8\xf4Y\x15\x00\x00\x00\xe3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00@\x00\x00\x00s\x0c\x00\x00\x00e\x00d\x00\x83\x01\x01\x00d\x01S\x00)\x02z\x0bhello worldN)\x01\xda\x05print\xa9\x00r\x02\x00\x00\x00r\x02\x00\x00\x00\xfa\x16examples/helloworld.py\xda\x08\x01\x00\x00\x00s\x00\x00\x00\x00"; let mut path = PathBuf::new(); path.push(env::current_dir().unwrap()); path.push("pythonlib/"); let envproxy = MockEnvProxy::new(path); let (processor, result) = run_file(&mut reader, envproxy).unwrap(); if let PyResult::Return(_) = result { - assert_eq!(*processor.envproxy.stdout_content.lock().unwrap(), b"Hello world\n"); + assert_eq!(*processor.envproxy.stdout_content.lock().unwrap(), b"hello world\n"); } else { panic!(format!("Exited with: {:?}", result))