Testing Documentation
Comprehensive guide to testing EiplGrader components and features.
Overview
EiplGrader uses a multi-layered testing approach:
- Unit Tests: Test individual components in isolation
- Integration Tests: Test component interactions
- Language Tests: Test language-specific features
- End-to-End Tests: Test complete workflows
- Performance Tests: Test execution speed and resource usage
Test Structure
tests/
├── unit/ # Unit tests
│ ├── test_adapters/ # Language adapter tests
│ ├── test_executors/ # Language executor tests
│ ├── test_codegen.py # CodeGenerator tests
│ └── test_tester.py # CodeTester tests
├── integration/ # Integration tests
│ ├── test_generation/ # Code generation tests
│ ├── test_execution/ # Code execution tests
│ └── test_languages/ # Language-specific tests
├── edge_cases/ # Edge case testing
│ ├── test_error_scenarios/
│ └── test_limits/
├── performance/ # Performance benchmarks
└── conftest.py # Pytest configuration
Running Tests
Basic Commands
# Run all tests
python -m pytest
# Run with coverage
python -m pytest --cov=eiplgrader tests/
# Run specific test file
python -m pytest tests/unit/test_codegen.py
# Run specific test
python -m pytest tests/unit/test_codegen.py::test_generate_code
# Run tests for specific language
python -m pytest -k "python"
# Run with verbose output
python -m pytest -v
# Run with concurrent processing
python -m pytest -n auto
Test Categories
# Unit tests only
python -m pytest tests/unit/
# Integration tests only
python -m pytest tests/integration/
# Edge cases
python -m pytest tests/edge_cases/
# Performance tests
python -m pytest tests/performance/ --benchmark-only
Writing Tests
Unit Test Template
import pytest
from unittest.mock import Mock, patch
from eiplgrader.codegen import CodeGenerator
class TestCodeGenerator:
"""Unit tests for CodeGenerator."""
def setup_method(self):
"""Set up test fixtures."""
self.api_key = "test-key"
self.generator = CodeGenerator(self.api_key, client_type="openai")
def test_initialization(self):
"""Test proper initialization."""
assert self.generator.api_key == self.api_key
assert self.generator.language == "python"
assert self.generator.client_type == "openai"
@patch('eiplgrader.codegen.OpenAIRequest')
def test_generate_code(self, mock_openai):
"""Test code generation."""
# Mock LLM response
mock_openai.return_value.request_function_generation.return_value = {
"choices": [{
"message": {
"content": "def factorial(n):\n return 1"
}
}]
}
# Generate code
result = self.generator.generate_code(
student_response="calculate factorial",
function_name="factorial"
)
# Assertions
assert len(result["code"]) == 1
assert "factorial" in result["code"][0]
mock_openai.return_value.request_function_generation.assert_called_once()
def test_invalid_client_type(self):
"""Test error handling for invalid client type."""
with pytest.raises(ValueError):
CodeGenerator(self.api_key, client_type="invalid_client")
@pytest.mark.parametrize("gen_type,expected", [
("cgbg", True),
("redef", True),
("invalid", False)
])
def test_generation_types(self, gen_type, expected):
"""Test different generation types."""
if expected:
# Should not raise
self.generator._validate_gen_type(gen_type)
else:
with pytest.raises(ValueError):
self.generator._validate_gen_type(gen_type)
Integration Test Template
class TestCodeGenerationIntegration:
"""Integration tests for code generation and testing."""
@pytest.fixture
def generator(self):
"""Create generator fixture."""
return CodeGenerator(os.getenv("TEST_API_KEY", "dummy"))
@pytest.fixture
def test_cases(self):
"""Create test case fixture."""
return [
{"parameters": {"n": 5}, "expected": 120},
{"parameters": {"n": 0}, "expected": 1}
]
def test_generate_and_test_python(self, generator, test_cases):
"""Test Python code generation and execution."""
# Generate code
result = generator.generate_code(
student_response="calculate factorial recursively",
function_name="factorial",
language="python"
)
assert result.success
assert len(result.codes) > 0
# Test generated code
for code in result.codes:
tester = CodeTester(
code=code,
test_cases=test_cases,
function_name="factorial",
language="python"
)
results = tester.run_tests()
assert results.was_successful(), f"Tests failed for code:
{code}"
Language-Specific Tests
class TestJavaLanguageSupport:
"""Test Java language support."""
def test_java_type_requirements(self):
"""Test that Java requires explicit types."""
executor = JavaExecutor()
# Missing types should raise error
test_case = {
"parameters": {"x": 5},
"expected": 10
}
with pytest.raises(ValueError, match="parameter_types"):
executor.validate_types_provided(test_case)
def test_java_value_formatting(self):
"""Test Java value formatting."""
executor = JavaExecutor()
# Test various type conversions
assert executor.format_value(True, "boolean") == "true"
assert executor.format_value("hello", "String") == '"hello"'
assert executor.format_value([1, 2, 3], "int[]") == "new int[]{1, 2, 3}"
def test_java_compilation(self):
"""Test Java code compilation."""
code = """
public class Solution {
public int add(int a, int b) {
return a + b;
}
}
"""
test_case = {
"parameters": {"a": 5, "b": 3},
"parameter_types": {"a": "int", "b": "int"},
"expected": 8,
"expected_type": "int"
}
executor = JavaExecutor()
result = executor.execute_test(code, test_case)
assert result["passed"]
assert result["actual"] == 8
Edge Case Tests
class TestEdgeCases:
"""Test edge cases and error conditions."""
def test_empty_code(self):
"""Test handling of empty code."""
tester = CodeTester(
code="",
test_cases=[{"parameters": {"x": 1}, "expected": 1}],
function_name="test"
)
results = tester.run_tests()
assert not results.was_successful()
assert any("runtime" in str(r.get("error", "")).lower()
for r in results.test_results if not r["pass"])
def test_infinite_loop(self):
"""Test timeout handling."""
code = """
def infinite():
while True:
pass
"""
tester = CodeTester(
code=code,
test_cases=[{"parameters": {}, "expected": None}],
function_name="infinite",
timeout=1
)
results = tester.run_tests()
assert not results.was_successful()
assert any("timeout" in str(r.get("error", "")).lower()
for r in results.test_results if not r["pass"])
def test_large_output(self):
"""Test output size limits."""
code = """
def large_output():
return "x" * 10000000 # 10MB string
"""
tester = CodeTester(
code=code,
test_cases=[{"parameters": {}, "expected": "x"}],
function_name="large_output",
)
results = tester.run_tests()
assert not results.was_successful()
assert any("output size" in str(r.get("error", "")).lower()
for r in results.test_results if not r["pass"])
Test Fixtures
Common Fixtures
# conftest.py
import pytest
import tempfile
import shutil
@pytest.fixture
def temp_dir():
"""Create temporary directory for tests."""
temp = tempfile.mkdtemp()
yield temp
shutil.rmtree(temp, ignore_errors=True)
@pytest.fixture
def sample_code():
"""Provide sample code for testing."""
return {
"python": """
def factorial(n):
if n <= 1:
return 1
return n * factorial(n - 1)
""",
"java": """
public class Solution {
public int factorial(int n) {
if (n <= 1) return 1;
return n * factorial(n - 1);
}
}
""",
"javascript": """
function factorial(n) {
if (n <= 1) return 1;
return n * factorial(n - 1);
}
"""
}
@pytest.fixture
def api_key():
"""Get API key for testing."""
key = os.getenv("OPENAI_API_KEY")
if not key:
pytest.skip("API key not available")
return key
Language-Specific Fixtures
@pytest.fixture(params=["python", "javascript"])
def dynamic_language(request):
"""Parametrized fixture for dynamic languages."""
return request.param
@pytest.fixture(params=["java", "cpp", "go"])
def static_language(request):
"""Parametrized fixture for static languages."""
return request.param
@pytest.fixture
def language_test_cases():
"""Test cases for each language."""
return {
"python": {
"simple": {
"parameters": {"x": 5},
"expected": 10
}
},
"java": {
"simple": {
"parameters": {"x": 5},
"parameter_types": {"x": "int"},
"expected": 10,
"expected_type": "int"
}
}
}
Test Utilities
Test Case Builders
def create_test_case(language: str, params: dict, expected: Any) -> dict:
"""Create test case with appropriate format for language."""
test_case = {
"parameters": params,
"expected": expected
}
# Add types for static languages
if language in ["java", "cpp", "c", "go", "haskell"]:
test_case["parameter_types"] = infer_types(params, language)
test_case["expected_type"] = infer_type(expected, language)
return test_case
def infer_types(params: dict, language: str) -> dict:
"""Infer parameter types for a language."""
type_map = get_type_mapping(language)
return {
name: type_map[type(value).__name__]
for name, value in params.items()
}
Mock Helpers
class MockLLMResponse:
"""Mock LLM response for testing."""
def __init__(self, codes: List[str]):
self.codes = codes
self.call_count = 0
def __call__(self, *args, **kwargs):
"""Return next code in sequence."""
if self.call_count < len(self.codes):
code = self.codes[self.call_count]
self.call_count += 1
return {"choices": [{"message": {"content": code}}]}
raise ValueError("No more mock responses")
def mock_llm_response(monkeypatch, codes: List[str]):
"""Patch LLM calls with mock responses."""
mock = MockLLMResponse(codes)
monkeypatch.setattr(
"eiplgrader.codegen.OpenAIRequest.request_function_generation",
mock
)
return mock
Performance Testing
Benchmark Tests
import pytest
@pytest.mark.benchmark
def test_code_generation_performance(benchmark):
"""Benchmark code generation."""
generator = CodeGenerator("key")
result = benchmark(
generator.generate_code,
student_response="calculate factorial",
function_name="factorial",
num_to_gen=1
)
assert result.success
@pytest.mark.benchmark
def test_execution_performance(benchmark):
"""Benchmark code execution."""
code = "def add(a, b): return a + b"
test_case = {"parameters": {"a": 1, "b": 2}, "expected": 3}
tester = CodeTester(
code=code,
test_cases=[test_case],
function_name="add"
)
# Individual test benchmarking not available - use run_tests() instead
result = benchmark(lambda: tester.run_tests(), None)
assert result.passed
Load Testing
def test_concurrent_execution_load():
"""Test system under load."""
import concurrent.futures
# Create many test cases
test_cases = [
{"parameters": {"n": i}, "expected": factorial(i)}
for i in range(100)
]
tester = CodeTester(
code="def factorial(n): ...", # Implementation
test_cases=test_cases,
function_name="factorial"
)
# Run with different worker counts
for workers in [1, 4, 8, 16]:
start = time.time()
results = tester.run_tests()
duration = time.time() - start
print(f"Workers: {workers}, Time: {duration:.2f}s")
assert results.was_successful()
Test Coverage
Coverage Configuration
# .coveragerc
[run]
source = eiplgrader
omit =
*/tests/*
*/test_*
*/__pycache__/*
*/venv/*
[report]
exclude_lines =
pragma: no cover
def __repr__
raise AssertionError
raise NotImplementedError
if __name__ == .__main__.:
@abstractmethod
Coverage Commands
# Run with coverage
python -m pytest --cov=eiplgrader --cov-report=html
# View coverage report
open htmlcov/index.html
# Check coverage threshold
python -m pytest --cov=eiplgrader --cov-fail-under=80
Continuous Integration
GitHub Actions Example
# .github/workflows/tests.yml
name: Tests
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9, "3.10", "3.11"]
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: $
- name: Install dependencies
run: |
pip install -e ".[dev]"
- name: Run linting
run: |
black --check eiplgrader tests
pylint eiplgrader
mypy eiplgrader
- name: Run tests
run: |
python -m pytest --cov=eiplgrader --cov-report=xml
- name: Upload coverage
uses: codecov/codecov-action@v3
Test Best Practices
1. Test Organization
- One test class per component
- Group related tests together
- Use descriptive test names
- Keep tests focused and simple
2. Test Independence
- Each test should be independent
- Use fixtures for setup/teardown
- Don’t rely on test execution order
- Clean up resources after tests
3. Mock External Dependencies
@patch('requests.post')
def test_api_call(mock_post):
mock_post.return_value.json.return_value = {"result": "success"}
# Test code that uses requests.post
4. Parametrized Tests
@pytest.mark.parametrize("input,expected", [
(0, 1),
(1, 1),
(5, 120),
(10, 3628800)
])
def test_factorial_values(input, expected):
assert factorial(input) == expected
5. Error Testing
def test_error_handling():
with pytest.raises(ValueError, match="negative"):
factorial(-1)
Debugging Tests
Pytest Options
# Show print statements
python -m pytest -s
# Drop into debugger on failure
python -m pytest --pdb
# Show local variables on failure
python -m pytest -l
# Run last failed tests
python -m pytest --lf
# Run specific test by name pattern
python -m pytest -k "test_generate"
Debugging Techniques
def test_with_debugging():
"""Test with debugging helpers."""
# Add breakpoint
import pdb; pdb.set_trace()
# Print debug info
print(f"Debug: {variable}")
# Use pytest's capsys
def test_output(capsys):
print("Hello")
captured = capsys.readouterr()
assert captured.out == "Hello\n"
Next Steps
- Review Contributing guide for development workflow
- Check existing tests in the repository
- Run the test suite locally
- Add tests for new features