Link Search Menu Expand Document

Language Executors

Deep dive into the executor system that powers code execution in EiplGrader.

Overview

Language executors are responsible for:

  • Preparing code with test harnesses
  • Executing code in a safe environment
  • Handling language-specific type systems
  • Normalizing and returning results

Executor Hierarchy

Language Executor Hierarchy

Base Executor Classes

LanguageExecutor (Abstract Base)

from abc import ABC, abstractmethod

class LanguageExecutor(ABC):
    """Base class for all language executors."""
    
    @abstractmethod
    def prepare_code(self, code: str, test_case: dict) -> str:
        """Prepare code with test harness for execution."""
        pass
    
    @abstractmethod
    def execute_test(self, code: str, test_case: dict, 
                    timeout: int = 5) -> dict:
        """Execute code and return results."""
        pass
    
    def cleanup(self):
        """Clean up temporary resources."""
        if hasattr(self, 'temp_dir') and self.temp_dir:
            shutil.rmtree(self.temp_dir, ignore_errors=True)

InterpretedLanguageExecutor

For languages with:

  • Runtime type inspection (Python, JavaScript)
  • Native JSON support
  • Direct script execution
class InterpretedLanguageExecutor(LanguageExecutor):
    """Base for interpreted languages with type inference."""
    
    def validate_or_infer_types(self, test_case: dict) -> Tuple[dict, Any]:
        """Validate provided types or infer from values."""
        parameters = test_case["parameters"]
        expected = test_case["expected"]
        
        # Check if types are provided
        if "parameter_types" in test_case:
            # Validate types match values
            self._validate_types_match(parameters, test_case["parameter_types"])
        else:
            # Infer types from values
            test_case["parameter_types"] = {
                name: self.infer_type(value)
                for name, value in parameters.items()
            }
        
        if "expected_type" not in test_case:
            test_case["expected_type"] = self.infer_type(expected)
        
        return parameters, expected
    
    def infer_type(self, value: Any) -> str:
        """Infer type from Python value."""
        if isinstance(value, bool):
            return "boolean"
        elif isinstance(value, int):
            return "integer"
        elif isinstance(value, float):
            return "float"
        elif isinstance(value, str):
            return "string"
        elif isinstance(value, list):
            return "list"
        elif isinstance(value, dict):
            return "dict"
        else:
            return "any"

CompiledLanguageExecutor

For languages with:

  • Static type systems (Java, C++, Go)
  • Compilation step required
  • No native JSON support
class CompiledLanguageExecutor(LanguageExecutor):
    """Base for compiled languages requiring explicit types."""
    
    def validate_types_provided(self, test_case: dict):
        """Ensure all required type information is provided."""
        if "parameter_types" not in test_case:
            raise ValueError(
                f"parameter_types required for {self.__class__.__name__}"
            )
        
        if "expected_type" not in test_case:
            raise ValueError(
                f"expected_type required for {self.__class__.__name__}"
            )
        
        # Validate all parameters have types
        for param in test_case["parameters"]:
            if param not in test_case["parameter_types"]:
                raise ValueError(f"Missing type for parameter: {param}")
    
    @abstractmethod
    def get_type_mapping(self) -> dict:
        """Map generic types to language-specific types."""
        pass
    
    @abstractmethod
    def format_value(self, value: Any, type_str: str) -> str:
        """Format Python value as language-specific literal."""
        pass

Executor Implementations

Python Executor Example

class PythonExecutor(InterpretedLanguageExecutor):
    def prepare_code(self, code: str, test_case: dict) -> str:
        """Prepare Python code with JSON test harness."""
        params, expected = self.validate_or_infer_types(test_case)
        
        harness = f"""
import json
import sys

# Generated function
{{{code}}}

# Test execution
try:
    params = {json.dumps(params)}
    result = {test_case.get('function_name', 'solution')}(**params)
    print(json.dumps({{"result": result, "success": True}}))
except Exception as e:
    print(json.dumps({{"error": str(e), "success": False}}))
    sys.exit(1)
"""
        return harness
    
    def execute_test(self, code: str, test_case: dict, timeout: int = 5) -> dict:
        """Execute Python code and return results."""
        full_code = self.prepare_code(code, test_case)
        
        try:
            result = subprocess.run(
                [sys.executable, '-c', full_code],
                capture_output=True,
                text=True,
                timeout=timeout
            )
            
            if result.returncode != 0:
                return {
                    'passed': False,
                    'error': result.stderr,
                    'error_type': 'runtime'
                }
            
            output = json.loads(result.stdout)
            if not output['success']:
                return {
                    'passed': False,
                    'error': output['error'],
                    'error_type': 'runtime'
                }
            
            actual = output['result']
            expected = test_case['expected']
            
            return {
                'passed': actual == expected,
                'expected': expected,
                'actual': actual
            }
            
        except subprocess.TimeoutExpired:
            return {
                'passed': False,
                'error': 'Execution timeout',
                'error_type': 'timeout'
            }
        except json.JSONDecodeError:
            return {
                'passed': False,
                'error': f'Invalid output: {result.stdout}',
                'error_type': 'output_format'
            }

Java Executor Example

class JavaExecutor(CompiledLanguageExecutor):
    def get_type_mapping(self) -> dict:
        return {
            "integer": "int",
            "float": "double",
            "string": "String",
            "boolean": "boolean",
            "list": "List",
            "dict": "Map"
        }
    
    def format_value(self, value: Any, type_str: str) -> str:
        """Format value as Java literal."""
        if type_str == "String":
            return f'"{value}"'
        elif type_str == "boolean":
            return "true" if value else "false"
        elif type_str.startswith("int[]"):
            return f"new int[]{{{', '.join(map(str, value))}}}"
        elif type_str.startswith("List"):
            return f"Arrays.asList({', '.join(self.format_value(v, 'Object') for v in value)})"
        else:
            return str(value)
    
    def prepare_code(self, code: str, test_case: dict) -> str:
        """Prepare Java code with embedded test values."""
        self.validate_types_provided(test_case)
        
        # Build parameter declarations
        param_decls = []
        for name, value in test_case["parameters"].items():
            java_type = test_case["parameter_types"][name]
            formatted = self.format_value(value, java_type)
            param_decls.append(f"        {java_type} {name} = {formatted};")
        
        # Build method call
        func_name = test_case.get("function_name", "solution")
        param_names = ", ".join(test_case["parameters"].keys())
        
        harness = f"""
import java.util.*;

public class TestHarness {
    {{{code}}}
    
    public static void main(String[] args) {
{{{chr(10).join(param_decls)}}}
        
        Solution sol = new Solution();
        Object result = sol.{func_name}({param_names});
        
        System.out.println("{\"result\": " + formatResult(result) + "}");
    }
    
    private static String formatResult(Object obj) {
        if (obj instanceof String) {
            return "\"" + obj + "\"";
        } else if (obj instanceof int[]) {
            return Arrays.toString((int[])obj);
        } else {
            return String.valueOf(obj);
        }
    }
}
"""
        return harness
    
    def compile_code(self, source_file: str) -> tuple[bool, str]:
        """Compile Java source file."""
        result = subprocess.run(
            ['javac', source_file],
            capture_output=True,
            text=True
        )
        return result.returncode == 0, result.stderr
    
    def execute_test(self, code: str, test_case: dict, timeout: int = 5) -> dict:
        """Compile and execute Java code."""
        # Create temporary directory
        self.temp_dir = tempfile.mkdtemp()
        source_file = os.path.join(self.temp_dir, 'TestHarness.java')
        
        try:
            # Write source code
            full_code = self.prepare_code(code, test_case)
            with open(source_file, 'w') as f:
                f.write(full_code)
            
            # Compile
            success, error = self.compile_code(source_file)
            if not success:
                return {
                    'passed': False,
                    'error': error,
                    'error_type': 'compilation'
                }
            
            # Execute
            result = subprocess.run(
                ['java', '-cp', self.temp_dir, 'TestHarness'],
                capture_output=True,
                text=True,
                timeout=timeout
            )
            
            if result.returncode != 0:
                return {
                    'passed': False,
                    'error': result.stderr,
                    'error_type': 'runtime'
                }
            
            # Parse output and compare
            # Implementation continues...
            
        finally:
            self.cleanup()

In-Place Modification Support

Executors must handle three test modes:

Mode 0: Return Value Testing (Default)

def prepare_code_mode_0(self, code: str, test_case: dict) -> str:
    """Standard function call with return value check."""
    return f"""
{code}
result = {func_name}({params})
assert result == {expected}
"""

Mode 1: In-Place Modification

def prepare_code_mode_1(self, code: str, test_case: dict) -> str:
    """Test in-place modification of arguments."""
    return f"""
{code}
# Create mutable copy
test_data = {params}
{func_name}(test_data)
assert test_data == {expected}
"""

Mode 2: Both Modification and Return

def prepare_code_mode_2(self, code: str, test_case: dict) -> str:
    """Test both in-place modification and return value."""
    return f"""
{code}
test_data = {params}
result = {func_name}(test_data)
assert test_data == {expected_state}
assert result == {expected_return}
"""

Error Handling

Error Categories

class ExecutorError:
    COMPILATION = "compilation"      # Static language compilation failed
    RUNTIME = "runtime"             # Code executed but crashed
    TIMEOUT = "timeout"             # Execution exceeded time limit
    OUTPUT_FORMAT = "output_format" # Could not parse output
    SYSTEM = "system"              # Executor system error

Error Response Format

def create_error_response(error_type: str, message: str, 
                         details: dict = None) -> dict:
    """Create standardized error response."""
    response = {
        'passed': False,
        'error_type': error_type,
        'error': message
    }
    
    if details:
        response.update(details)
    
    return response

Performance Optimization

Executor Pooling

class ExecutorPool:
    """Reuse executor instances for better performance."""
    
    def __init__(self, max_size: int = 10):
        self.pool = defaultdict(list)
        self.max_size = max_size
    
    def acquire(self, language: str) -> LanguageExecutor:
        if self.pool[language]:
            return self.pool[language].pop()
        return LanguageRegistry.get_executor(language)()
    
    def release(self, language: str, executor: LanguageExecutor):
        if len(self.pool[language]) < self.max_size:
            executor.cleanup()  # Reset state
            self.pool[language].append(executor)

Compilation Caching

class CompilationCache:
    """Cache compiled binaries for static languages."""
    
    def __init__(self, cache_dir: str = ".compilation_cache"):
        self.cache_dir = cache_dir
        os.makedirs(cache_dir, exist_ok=True)
    
    def get_cache_key(self, code: str, language: str) -> str:
        """Generate cache key from code content."""
        content = f"{language}:{code}"
        return hashlib.sha256(content.encode()).hexdigest()
    
    def get(self, code: str, language: str) -> Optional[str]:
        """Retrieve cached binary path."""
        key = self.get_cache_key(code, language)
        binary_path = os.path.join(self.cache_dir, key)
        
        if os.path.exists(binary_path):
            return binary_path
        return None
    
    def put(self, code: str, language: str, binary_path: str):
        """Store compiled binary in cache."""
        key = self.get_cache_key(code, language)
        cache_path = os.path.join(self.cache_dir, key)
        shutil.copy2(binary_path, cache_path)

Security Considerations

Process Isolation

def execute_with_limits(command: list, timeout: int = 5, 
                       memory_mb: int = 256) -> subprocess.CompletedProcess:
    """Execute with resource limits."""
    if sys.platform == "linux":
        # Use ulimit for resource constraints
        wrapped_command = [
            "bash", "-c",
            f"ulimit -v {memory_mb * 1024}; exec {' '.join(command)}"
        ]
    else:
        wrapped_command = command
    
    return subprocess.run(
        wrapped_command,
        capture_output=True,
        text=True,
        timeout=timeout,
        env={**os.environ, "PYTHONPATH": ""}  # Clean environment
    )

Input Validation

def validate_test_case(test_case: dict):
    """Validate test case structure and content."""
    required = ["parameters", "expected"]
    for field in required:
        if field not in test_case:
            raise ValueError(f"Missing required field: {field}")
    
    # Validate parameter names (prevent injection)
    for param_name in test_case["parameters"]:
        if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', param_name):
            raise ValueError(f"Invalid parameter name: {param_name}")
    
    # Size limits
    if len(str(test_case)) > 10000:
        raise ValueError("Test case too large")

Testing Executors

Unit Test Template

class TestLanguageExecutor:
    """Template for executor unit tests."""
    
    def test_type_system(self):
        """Test type validation/inference."""
        pass
    
    def test_value_formatting(self):
        """Test language-specific value formatting."""
        pass
    
    def test_code_preparation(self):
        """Test harness generation."""
        pass
    
    def test_execution(self):
        """Test code execution."""
        pass
    
    def test_error_handling(self):
        """Test various error conditions."""
        pass
    
    def test_cleanup(self):
        """Test resource cleanup."""
        pass

Integration Test Template

def test_executor_integration():
    """Test executor with real code generation."""
    # Generate code
    code = generate_sample_code()
    
    # Create test cases
    test_cases = create_test_cases()
    
    # Execute tests
    executor = LanguageExecutor()
    results = []
    
    for test_case in test_cases:
        result = executor.execute_test(code, test_case)
        results.append(result)
    
    # Verify results
    assert all(r['passed'] for r in results)

Debugging Executors

Enable Debug Logging

import logging

# Configure executor logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

class DebuggableExecutor(LanguageExecutor):
    def __init__(self):
        self.logger = logging.getLogger(self.__class__.__name__)
    
    def execute_test(self, code: str, test_case: dict, timeout: int = 5) -> dict:
        self.logger.debug(f"Executing test: {test_case}")
        self.logger.debug(f"Code:\n{code}")
        
        result = super().execute_test(code, test_case, timeout)
        
        self.logger.debug(f"Result: {result}")
        return result

Common Issues and Solutions

IssueSymptomsSolution
Type mismatch“Invalid type” errorsVerify type mapping implementation
Compilation failsSyntax errors in harnessCheck language-specific formatting
Output parsing“Invalid output format”Verify JSON/output formatting
TimeoutTests fail with timeoutIncrease timeout or optimize code
Resource cleanupTemp files accumulateEnsure cleanup() is called

Next Steps