Language Executors

Deep dive into the executor system that powers code execution in EiplGrader.

Overview

Language executors are responsible for:

Preparing code with test harnesses
Executing code in a safe environment
Handling language-specific type systems
Normalizing and returning results

Executor Hierarchy

Language Executor Hierarchy

Base Executor Classes

LanguageExecutor (Abstract Base)

from abc import ABC, abstractmethod

class LanguageExecutor(ABC):
    """Base class for all language executors."""
    
    @abstractmethod
    def prepare_code(self, code: str, test_case: dict) -> str:
        """Prepare code with test harness for execution."""
        pass
    
    @abstractmethod
    def execute_test(self, code: str, test_case: dict, 
                    timeout: int = 5) -> dict:
        """Execute code and return results."""
        pass
    
    def cleanup(self):
        """Clean up temporary resources."""
        if hasattr(self, 'temp_dir') and self.temp_dir:
            shutil.rmtree(self.temp_dir, ignore_errors=True)

InterpretedLanguageExecutor

For languages with:

Runtime type inspection (Python, JavaScript)
Native JSON support
Direct script execution

class InterpretedLanguageExecutor(LanguageExecutor):
    """Base for interpreted languages with type inference."""
    
    def validate_or_infer_types(self, test_case: dict) -> Tuple[dict, Any]:
        """Validate provided types or infer from values."""
        parameters = test_case["parameters"]
        expected = test_case["expected"]
        
        # Check if types are provided
        if "parameter_types" in test_case:
            # Validate types match values
            self._validate_types_match(parameters, test_case["parameter_types"])
        else:
            # Infer types from values
            test_case["parameter_types"] = {
                name: self.infer_type(value)
                for name, value in parameters.items()
            }
        
        if "expected_type" not in test_case:
            test_case["expected_type"] = self.infer_type(expected)
        
        return parameters, expected
    
    def infer_type(self, value: Any) -> str:
        """Infer type from Python value."""
        if isinstance(value, bool):
            return "boolean"
        elif isinstance(value, int):
            return "integer"
        elif isinstance(value, float):
            return "float"
        elif isinstance(value, str):
            return "string"
        elif isinstance(value, list):
            return "list"
        elif isinstance(value, dict):
            return "dict"
        else:
            return "any"

CompiledLanguageExecutor

For languages with:

Static type systems (Java, C++, Go)
Compilation step required
No native JSON support

class CompiledLanguageExecutor(LanguageExecutor):
    """Base for compiled languages requiring explicit types."""
    
    def validate_types_provided(self, test_case: dict):
        """Ensure all required type information is provided."""
        if "parameter_types" not in test_case:
            raise ValueError(
                f"parameter_types required for {self.__class__.__name__}"
            )
        
        if "expected_type" not in test_case:
            raise ValueError(
                f"expected_type required for {self.__class__.__name__}"
            )
        
        # Validate all parameters have types
        for param in test_case["parameters"]:
            if param not in test_case["parameter_types"]:
                raise ValueError(f"Missing type for parameter: {param}")
    
    @abstractmethod
    def get_type_mapping(self) -> dict:
        """Map generic types to language-specific types."""
        pass
    
    @abstractmethod
    def format_value(self, value: Any, type_str: str) -> str:
        """Format Python value as language-specific literal."""
        pass

Executor Implementations

Python Executor Example

class PythonExecutor(InterpretedLanguageExecutor):
    def prepare_code(self, code: str, test_case: dict) -> str:
        """Prepare Python code with JSON test harness."""
        params, expected = self.validate_or_infer_types(test_case)
        
        harness = f"""
import json
import sys

# Generated function
{{{code}}}

# Test execution
try:
    params = {json.dumps(params)}
    result = {test_case.get('function_name', 'solution')}(**params)
    print(json.dumps({{"result": result, "success": True}}))
except Exception as e:
    print(json.dumps({{"error": str(e), "success": False}}))
    sys.exit(1)
"""
        return harness
    
    def execute_test(self, code: str, test_case: dict, timeout: int = 5) -> dict:
        """Execute Python code and return results."""
        full_code = self.prepare_code(code, test_case)
        
        try:
            result = subprocess.run(
                [sys.executable, '-c', full_code],
                capture_output=True,
                text=True,
                timeout=timeout
            )
            
            if result.returncode != 0:
                return {
                    'passed': False,
                    'error': result.stderr,
                    'error_type': 'runtime'
                }
            
            output = json.loads(result.stdout)
            if not output['success']:
                return {
                    'passed': False,
                    'error': output['error'],
                    'error_type': 'runtime'
                }
            
            actual = output['result']
            expected = test_case['expected']
            
            return {
                'passed': actual == expected,
                'expected': expected,
                'actual': actual
            }
            
        except subprocess.TimeoutExpired:
            return {
                'passed': False,
                'error': 'Execution timeout',
                'error_type': 'timeout'
            }
        except json.JSONDecodeError:
            return {
                'passed': False,
                'error': f'Invalid output: {result.stdout}',
                'error_type': 'output_format'
            }

Java Executor Example

class JavaExecutor(CompiledLanguageExecutor):
    def get_type_mapping(self) -> dict:
        return {
            "integer": "int",
            "float": "double",
            "string": "String",
            "boolean": "boolean",
            "list": "List",
            "dict": "Map"
        }
    
    def format_value(self, value: Any, type_str: str) -> str:
        """Format value as Java literal."""
        if type_str == "String":
            return f'"{value}"'
        elif type_str == "boolean":
            return "true" if value else "false"
        elif type_str.startswith("int[]"):
            return f"new int[]{{{', '.join(map(str, value))}}}"
        elif type_str.startswith("List"):
            return f"Arrays.asList({', '.join(self.format_value(v, 'Object') for v in value)})"
        else:
            return str(value)
    
    def prepare_code(self, code: str, test_case: dict) -> str:
        """Prepare Java code with embedded test values."""
        self.validate_types_provided(test_case)
        
        # Build parameter declarations
        param_decls = []
        for name, value in test_case["parameters"].items():
            java_type = test_case["parameter_types"][name]
            formatted = self.format_value(value, java_type)
            param_decls.append(f"        {java_type} {name} = {formatted};")
        
        # Build method call
        func_name = test_case.get("function_name", "solution")
        param_names = ", ".join(test_case["parameters"].keys())
        
        harness = f"""
import java.util.*;

public class TestHarness {
    {{{code}}}
    
    public static void main(String[] args) {
{{{chr(10).join(param_decls)}}}
        
        Solution sol = new Solution();
        Object result = sol.{func_name}({param_names});
        
        System.out.println("{\"result\": " + formatResult(result) + "}");
    }
    
    private static String formatResult(Object obj) {
        if (obj instanceof String) {
            return "\"" + obj + "\"";
        } else if (obj instanceof int[]) {
            return Arrays.toString((int[])obj);
        } else {
            return String.valueOf(obj);
        }
    }
}
"""
        return harness
    
    def compile_code(self, source_file: str) -> tuple[bool, str]:
        """Compile Java source file."""
        result = subprocess.run(
            ['javac', source_file],
            capture_output=True,
            text=True
        )
        return result.returncode == 0, result.stderr
    
    def execute_test(self, code: str, test_case: dict, timeout: int = 5) -> dict:
        """Compile and execute Java code."""
        # Create temporary directory
        self.temp_dir = tempfile.mkdtemp()
        source_file = os.path.join(self.temp_dir, 'TestHarness.java')
        
        try:
            # Write source code
            full_code = self.prepare_code(code, test_case)
            with open(source_file, 'w') as f:
                f.write(full_code)
            
            # Compile
            success, error = self.compile_code(source_file)
            if not success:
                return {
                    'passed': False,
                    'error': error,
                    'error_type': 'compilation'
                }
            
            # Execute
            result = subprocess.run(
                ['java', '-cp', self.temp_dir, 'TestHarness'],
                capture_output=True,
                text=True,
                timeout=timeout
            )
            
            if result.returncode != 0:
                return {
                    'passed': False,
                    'error': result.stderr,
                    'error_type': 'runtime'
                }
            
            # Parse output and compare
            # Implementation continues...
            
        finally:
            self.cleanup()

In-Place Modification Support

Executors must handle three test modes:

Mode 0: Return Value Testing (Default)

def prepare_code_mode_0(self, code: str, test_case: dict) -> str:
    """Standard function call with return value check."""
    return f"""
{code}
result = {func_name}({params})
assert result == {expected}
"""

Mode 1: In-Place Modification

def prepare_code_mode_1(self, code: str, test_case: dict) -> str:
    """Test in-place modification of arguments."""
    return f"""
{code}
# Create mutable copy
test_data = {params}
{func_name}(test_data)
assert test_data == {expected}
"""

Mode 2: Both Modification and Return

def prepare_code_mode_2(self, code: str, test_case: dict) -> str:
    """Test both in-place modification and return value."""
    return f"""
{code}
test_data = {params}
result = {func_name}(test_data)
assert test_data == {expected_state}
assert result == {expected_return}
"""

Error Handling

Error Categories

class ExecutorError:
    COMPILATION = "compilation"      # Static language compilation failed
    RUNTIME = "runtime"             # Code executed but crashed
    TIMEOUT = "timeout"             # Execution exceeded time limit
    OUTPUT_FORMAT = "output_format" # Could not parse output
    SYSTEM = "system"              # Executor system error

Error Response Format

def create_error_response(error_type: str, message: str, 
                         details: dict = None) -> dict:
    """Create standardized error response."""
    response = {
        'passed': False,
        'error_type': error_type,
        'error': message
    }
    
    if details:
        response.update(details)
    
    return response

Performance Optimization

Executor Pooling

class ExecutorPool:
    """Reuse executor instances for better performance."""
    
    def __init__(self, max_size: int = 10):
        self.pool = defaultdict(list)
        self.max_size = max_size
    
    def acquire(self, language: str) -> LanguageExecutor:
        if self.pool[language]:
            return self.pool[language].pop()
        return LanguageRegistry.get_executor(language)()
    
    def release(self, language: str, executor: LanguageExecutor):
        if len(self.pool[language]) < self.max_size:
            executor.cleanup()  # Reset state
            self.pool[language].append(executor)

Compilation Caching

class CompilationCache:
    """Cache compiled binaries for static languages."""
    
    def __init__(self, cache_dir: str = ".compilation_cache"):
        self.cache_dir = cache_dir
        os.makedirs(cache_dir, exist_ok=True)
    
    def get_cache_key(self, code: str, language: str) -> str:
        """Generate cache key from code content."""
        content = f"{language}:{code}"
        return hashlib.sha256(content.encode()).hexdigest()
    
    def get(self, code: str, language: str) -> Optional[str]:
        """Retrieve cached binary path."""
        key = self.get_cache_key(code, language)
        binary_path = os.path.join(self.cache_dir, key)
        
        if os.path.exists(binary_path):
            return binary_path
        return None
    
    def put(self, code: str, language: str, binary_path: str):
        """Store compiled binary in cache."""
        key = self.get_cache_key(code, language)
        cache_path = os.path.join(self.cache_dir, key)
        shutil.copy2(binary_path, cache_path)

Security Considerations

Process Isolation

def execute_with_limits(command: list, timeout: int = 5, 
                       memory_mb: int = 256) -> subprocess.CompletedProcess:
    """Execute with resource limits."""
    if sys.platform == "linux":
        # Use ulimit for resource constraints
        wrapped_command = [
            "bash", "-c",
            f"ulimit -v {memory_mb * 1024}; exec {' '.join(command)}"
        ]
    else:
        wrapped_command = command
    
    return subprocess.run(
        wrapped_command,
        capture_output=True,
        text=True,
        timeout=timeout,
        env={**os.environ, "PYTHONPATH": ""}  # Clean environment
    )

Input Validation

def validate_test_case(test_case: dict):
    """Validate test case structure and content."""
    required = ["parameters", "expected"]
    for field in required:
        if field not in test_case:
            raise ValueError(f"Missing required field: {field}")
    
    # Validate parameter names (prevent injection)
    for param_name in test_case["parameters"]:
        if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', param_name):
            raise ValueError(f"Invalid parameter name: {param_name}")
    
    # Size limits
    if len(str(test_case)) > 10000:
        raise ValueError("Test case too large")

Testing Executors

Unit Test Template

class TestLanguageExecutor:
    """Template for executor unit tests."""
    
    def test_type_system(self):
        """Test type validation/inference."""
        pass
    
    def test_value_formatting(self):
        """Test language-specific value formatting."""
        pass
    
    def test_code_preparation(self):
        """Test harness generation."""
        pass
    
    def test_execution(self):
        """Test code execution."""
        pass
    
    def test_error_handling(self):
        """Test various error conditions."""
        pass
    
    def test_cleanup(self):
        """Test resource cleanup."""
        pass

Integration Test Template

def test_executor_integration():
    """Test executor with real code generation."""
    # Generate code
    code = generate_sample_code()
    
    # Create test cases
    test_cases = create_test_cases()
    
    # Execute tests
    executor = LanguageExecutor()
    results = []
    
    for test_case in test_cases:
        result = executor.execute_test(code, test_case)
        results.append(result)
    
    # Verify results
    assert all(r['passed'] for r in results)

Debugging Executors

Enable Debug Logging

import logging

# Configure executor logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

class DebuggableExecutor(LanguageExecutor):
    def __init__(self):
        self.logger = logging.getLogger(self.__class__.__name__)
    
    def execute_test(self, code: str, test_case: dict, timeout: int = 5) -> dict:
        self.logger.debug(f"Executing test: {test_case}")
        self.logger.debug(f"Code:\n{code}")
        
        result = super().execute_test(code, test_case, timeout)
        
        self.logger.debug(f"Result: {result}")
        return result

Common Issues and Solutions

Issue	Symptoms	Solution
Type mismatch	“Invalid type” errors	Verify type mapping implementation
Compilation fails	Syntax errors in harness	Check language-specific formatting
Output parsing	“Invalid output format”	Verify JSON/output formatting
Timeout	Tests fail with timeout	Increase timeout or optimize code
Resource cleanup	Temp files accumulate	Ensure cleanup() is called

Next Steps

Review Architecture for system overview
See Adding Languages for implementation guide
Explore existing executors in the source code