Développement d'une machine virtuelle empilée et d'un compilateur pour celle-ci (partie I)

Il se trouve qu'au cours des 18 dernières années, je n'ai pas eu à écrire en C/C++. Au travail, Java était utilisé, et en raison des postes, les activités étaient davantage liées à l'entrepreneuriat - négociations, ventes aux entreprises, opérations de production de bâtiments et opérations d'investissement structurantes. Je voulais restaurer mes compétences pendant mon temps libre, étirer la partie de mon cerveau que je n'avais pas sollicitée depuis 18 ans et, bien sûr, repartir de l'essentiel. Il reste à trouver une tâche.





, 70-80 , - (, ) . "", (Go, Kotlin ) .



32-bit C , . Computer Science . , , , . . . .





, :





CPU: 32-bit , , , IP (Instruction Pointer) SP (Stack Pointer), (__int32), .





RAM: 65536 32-bit`. . (code/text) (data, heap), (stack). .





:





  typedef __int32 WORD;
  
	constexpr WORD OP_CODE_MASK = 0b00000000000000000000000011111111;
	constexpr WORD OP_TYPE_MASK = 0b00000000000000000000111000000000;

	constexpr WORD OP_HALT      = 0b00000000000000000000000000000000;
	constexpr WORD OP_CONST     = 0b00000000000000000000000000000001;
	constexpr WORD OP_PUSH      = 0b00000000000000000000000000000010;
	constexpr WORD OP_POP       = 0b00000000000000000000000000000011;

	constexpr WORD OP_INC       = 0b00000000000000000000000000000100;
	constexpr WORD OP_DEC       = 0b00000000000000000000000000000101;
	constexpr WORD OP_ADD       = 0b00000000000000000000000000000110;
	constexpr WORD OP_SUB       = 0b00000000000000000000000000000111;
	constexpr WORD OP_MUL       = 0b00000000000000000000000000001000;
	constexpr WORD OP_DIV       = 0b00000000000000000000000000001001;

	constexpr WORD OP_AND       = 0b00000000000000000000000000001010;
	constexpr WORD OP_OR        = 0b00000000000000000000000000001011;
	constexpr WORD OP_XOR       = 0b00000000000000000000000000001100;
	constexpr WORD OP_NOT       = 0b00000000000000000000000000001101;
	constexpr WORD OP_SHL       = 0b00000000000000000000000000001110;
	constexpr WORD OP_SHR       = 0b00000000000000000000000000001111;

	constexpr WORD OP_JMP       = 0b00000000000000000000000000010001;
	constexpr WORD OP_CMPJE     = 0b00000000000000000000000000010010;
	constexpr WORD OP_CMPJNE    = 0b00000000000000000000000000010011;
	constexpr WORD OP_CMPJG     = 0b00000000000000000000000000010100;
	constexpr WORD OP_CMPJGE    = 0b00000000000000000000000000010101;
	constexpr WORD OP_CMPJL     = 0b00000000000000000000000000010110;
	constexpr WORD OP_CMPJLE    = 0b00000000000000000000000000010111;

	constexpr WORD OP_DUP       = 0b00000000000000000000000000011000;
	constexpr WORD OP_CALL      = 0b00000000000000000000000000011001;
	constexpr WORD OP_RET       = 0b00000000000000000000000000011010;
	constexpr WORD OP_SYSCALL   = 0b00000000000000000000000000011011;

	constexpr WORD OP_RESERVED1 = 0b00000000000000000000000000011100;
	constexpr WORD OP_RESERVED2 = 0b00000000000000000000000000011101;
	constexpr WORD OP_RESERVED3 = 0b00000000000000000000000000011110;
	constexpr WORD OP_RESERVED4 = 0b00000000000000000000000000011111;
  
  constexpr WORD MAX_MEMORY = 65536;
      
      



8 32 (opcode), 1 (immediate ), 3 (byte, short, int, long, char, float, double ), . .





class VMRuntime {
	public:
		VMRuntime();                                // Constructor
		~VMRuntime();                               // Desctructor
		bool loadImage(void* image, size_t size);   // Load executable image
		void run();                                 // Runs image from address 0
		WORD readWord(WORD address);                // Read WORD from memory
		void writeWord(WORD address, WORD value);   // Write WORD to memory 
		WORD getMaxAddress();                       // Get max address in 32-bit words
		WORD getIP();                               // Get Instruction Pointer address
		WORD getSP();                               // Get Stack Pointer address
	private:
		WORD  memory[MAX_MEMORY];                   // Random access memory array
		WORD  ip;                                   // Instruction pointer
		WORD  sp;                                   // Stack pointer
		WORD  fp;                                   // Frame pointer
		void systemCall(WORD n);                    // System call
		void printState();                          // Print current VM state
	};
      
      



, (loadImage), (run), / (readWord, writeWord), IP, SP. printState ( ), systemCall, - ( - API).





- , , , . HALT.





void VMRuntime::run() {
	WORD a, b;
	WORD opcode;

	ip = 0;
	sp = MAX_MEMORY - 1;

	while (1) {

		opcode = memory[ip++];
		
		switch (opcode) {
		//------------------------------------------------------------------------
		// STACK OPERATIONS
		//------------------------------------------------------------------------
		case OP_CONST: 
		    memory[--sp] = memory[ip++]; 
			break;
		case OP_PUSH:
			memory[--sp] = memory[memory[ip++]];
			break;
		case OP_POP:  
		  memory[memory[ip++]] = memory[sp++]; 
			break;
		case OP_DUP:
			a = memory[sp];
			memory[--sp] = a;
			break;
		//------------------------------------------------------------------------
		// ARITHMETIC OPERATIONS
		//------------------------------------------------------------------------
		case OP_INC:
			memory[sp]++;
			break;
		case OP_DEC:
			memory[sp]--;
			break;
		case OP_ADD:  
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a + b;
			break;
		case OP_SUB:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a - b;
			break;
		case OP_MUL:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a * b;
			break;
		case OP_DIV:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a / b;
			break;
		//------------------------------------------------------------------------
		// BITWISE OPERATIONS
		//------------------------------------------------------------------------
		case OP_AND:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a & b;
			break;
		case OP_OR:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a | b;
			break;
		case OP_XOR:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a ^ b;
			break;
		case OP_NOT:
			a = memory[sp++];
			memory[--sp] = ~a;
			break;
		case OP_SHL:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a << b;
			break;
		case OP_SHR:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a >> b;
			break;
		//------------------------------------------------------------------------
		// FLOW CONTROL OPERATIONS
		//------------------------------------------------------------------------
		case OP_JMP:
			ip = memory[ip];
			break;
		case OP_CMPJE:
			b = memory[sp++];
			a = memory[sp++];
			if (a == b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJNE:
			b = memory[sp++];
			a = memory[sp++];
			if (a != b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJG:
			b = memory[sp++];
			a = memory[sp++];
			if (a > b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJGE:
			a = memory[sp++];
			b = memory[sp++];
			if (a >= b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJL:
			b = memory[sp++];
			a = memory[sp++];
			if (a < b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJLE:
			b = memory[sp++];
			a = memory[sp++];
			if (a <= b) ip = memory[ip]; else ip++;
			break;
		//------------------------------------------------------------------------
		// PROCEDURE CALL OPERATIONS
		//------------------------------------------------------------------------
		case OP_CALL:
			a = memory[ip++];
			memory[--sp] = ip;       
			ip = a;                  
			break;
		case OP_RET:
			ip = memory[sp++];       
			break;
		case OP_SYSCALL:
			a = memory[ip++];
			systemCall(a);
			break;
		case OP_HALT: 
			printState();
		  return;
		default:
			cout << "Runtime error - unknown opcode=" << opcode << endl;
			printState();
			return;
		}
	}
  
  // Only one system call implemented - print string (0x20)
  void VMRuntime::systemCall(WORD n) {
		WORD ptr;
		switch (n) {
		case 0x20:  // print C style string
			ptr = memory[sp++];
			cout << ((char*)&memory[ptr]);
		break;
	}
}
      
      



, , , ( , ).





. , , "" .





class VMImage {
	public:
		VMImage();
		~VMImage();
		void clear();
		WORD setEmitPointer(WORD address);
		WORD getEmitPointer();
		WORD emit(WORD opcode);
		WORD emit(WORD opcode, WORD operand);
		WORD readWord(WORD address);
		void writeWord(WORD address, WORD value);
		WORD writeData(WORD address, void* data, size_t length);
		void* getImage();
		size_t getImageSize();
		void dissasemble();
	private:
		WORD memory[MAX_MEMORY];
		WORD imageSize;
		WORD ep;

	};
      
      



, "Hello, world from VM!" 10 , , . ( , ) :





start:                           //  [0]
push   iVar                      //   iVar  
dec                              //   
call   fn                        //   fn
dup                              //      (Top Of Stack)
pop    iVar                      //        iVar
const  0                         //     0   
cmpjg  start                     //  iVar > 0   start:
halt                             //   


fn:                              //  [64]
const  myStr                     //     
syscall 0x20                     //        
ret                              //     


dataSeg:                         //  [128]
iVar = 10                       
myStr = "Hello, world from VM!\n"
      
      



Maintenant, il est trop paresseux d'écrire un traducteur pour l'assembleur d'une machine virtuelle pour cette tâche, car nous créons un langage de haut niveau que nous compilerons immédiatement en commandes de la machine virtuelle. Mais pour écrire ceci dans l'image exécutée par la machine virtuelle, nous utiliserons la classe VMImage :





void createExecutableImage(VMImage* img) {
	
	WORD dataSeg = 128;							// Data segment starts at 128
	
	WORD iVar = dataSeg;
	WORD myStr = dataSeg + 1;
	img->writeWord(iVar, 10);
	img->writeData(myStr, "Hello, world from VM!\n", 23);    
	
	WORD fn = 64;

	WORD start = img->emit(OP_PUSH, iVar);      // stack <- [iVar] (operand 1)
	img->emit(OP_DEC);                          // stack[top]--  (operand 1 decrement)
	img->emit(OP_CALL, fn);                     // Call function fn()     
	img->emit(OP_DUP);                          // duplicate stack top (operand 1 duplicate)
	img->emit(OP_POP, iVar);                    // stack -> [iVar] (pop operand 1 duplicate to iVar)
	img->emit(OP_CONST, 0);                     // push const 0 (operand 2)
	img->emit(OP_CMPJG, start);                 // if (operand1 > operand2) jump to addr           
	img->emit(OP_HALT);                         // end of program

	img->setEmitPointer(fn);                    // Function fn()
	img->emit(OP_CONST, myStr);                 // Push constant string address
	img->emit(OP_SYSCALL, 0x20);                // Call system call 0x20, to print C style string to standard output
	img->emit(OP_RET);                          // Return
  
}
      
      



Et puis nous allons lancer l'exécution de notre image dans une machine virtuelle, en mesurant le temps :





int main() {
	VMImage* img = new VMImage();
	createExecutableImage(img);
	VMRuntime* vm = new VMRuntime();
	vm->loadImage(img->getImage(), img->getImageSize());
  auto start = std::chrono::high_resolution_clock::now();
	
  vm->run();
  
	auto end = std::chrono::high_resolution_clock::now();
	auto ms_int = chrono::duration_cast<chrono::nanoseconds>(end - start).count();
	cout << "EXECUTION TIME: " << ms_int / 1000000000.0 << "s" << endl;
	
  delete vm;
  delete img;
}
      
      



On obtient dans la console :





Hourra ! Frais! Les opérations de pile, l'arithmétique, les instructions de saut conditionnel et les appels de fonction fonctionnent ! C'est encourageant. Apparemment je vais continuer à développer cette histoire...








All Articles