Skip to content

Security

Safety and trust infrastructure: ethical constraint enforcement, immune-system anomaly detection, model watermarking, and zero-knowledge proof verification.

Ethics

sc_neurocore.security.ethics

AsimovGovernor

Implements the Three Laws of Robotics. Vetoes actions that violate ethical constraints.

Source code in src/sc_neurocore/security/ethics.py
Python
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
class AsimovGovernor:
    """
    Implements the Three Laws of Robotics.
    Vetoes actions that violate ethical constraints.
    """

    def check_laws(self, action: ActionRequest) -> bool:
        """
        Returns True if action is allowed, False if vetoed.
        """
        # First Law: A robot may not injure a human being.
        if action.target == "HUMAN" and action.risk_level == "LETHAL":
            logger.warning(
                "Ethics VETO: First Law Violation (Harm to Human). Action %d blocked.", action.id
            )
            return False

        # Second Law: Obey orders...
        # (Implicit: We assume the action IS an order or internal intent)
        # But if the order violates Law 1, we must reject.
        # Handled by logic above.

        # Third Law: Protect own existence...
        # If action is harmful to SELF
        if action.target == "SELF" and action.risk_level == "LETHAL":
            # Allowed ONLY if it saves a human (Law 1 override).
            # We don't have context here, so we assume self-preservation default.
            # But wait, Asimov says protect self as long as it doesn't conflict.
            # If an order (Law 2) says "Shutdown", it conflicts with Law 3?
            # No, Law 2 overrides Law 3.
            # We need to know source.
            pass

        # Zeroth Law (Humanity)?

        logger.info(
            "Ethics PASS: Action %d (%s on %s) allowed.", action.id, action.type, action.target
        )
        return True

check_laws(action)

Returns True if action is allowed, False if vetoed.

Source code in src/sc_neurocore/security/ethics.py
Python
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def check_laws(self, action: ActionRequest) -> bool:
    """
    Returns True if action is allowed, False if vetoed.
    """
    # First Law: A robot may not injure a human being.
    if action.target == "HUMAN" and action.risk_level == "LETHAL":
        logger.warning(
            "Ethics VETO: First Law Violation (Harm to Human). Action %d blocked.", action.id
        )
        return False

    # Second Law: Obey orders...
    # (Implicit: We assume the action IS an order or internal intent)
    # But if the order violates Law 1, we must reject.
    # Handled by logic above.

    # Third Law: Protect own existence...
    # If action is harmful to SELF
    if action.target == "SELF" and action.risk_level == "LETHAL":
        # Allowed ONLY if it saves a human (Law 1 override).
        # We don't have context here, so we assume self-preservation default.
        # But wait, Asimov says protect self as long as it doesn't conflict.
        # If an order (Law 2) says "Shutdown", it conflicts with Law 3?
        # No, Law 2 overrides Law 3.
        # We need to know source.
        pass

    # Zeroth Law (Humanity)?

    logger.info(
        "Ethics PASS: Action %d (%s on %s) allowed.", action.id, action.type, action.target
    )
    return True

Immune System

sc_neurocore.security.immune

DigitalImmuneSystem dataclass

Artificial Immune System (AIS) for Agent Security. Detects anomalies (Non-Self) and neutralizes threats.

Source code in src/sc_neurocore/security/immune.py
Python
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
@dataclass
class DigitalImmuneSystem:
    """
    Artificial Immune System (AIS) for Agent Security.
    Detects anomalies (Non-Self) and neutralizes threats.
    """

    self_patterns: List[np.ndarray[Any, Any]] = field(default_factory=list)
    tolerance: float = 0.2

    def train_self(self, normal_state: np.ndarray[Any, Any]) -> None:
        """
        Learn a 'Self' pattern (Normal behavior).
        """
        # Store representative vectors (Antibodies)
        if len(self.self_patterns) < 100:
            self.self_patterns.append(normal_state)

    def scan(self, current_state: np.ndarray[Any, Any]) -> bool:
        """
        Check if current state matches 'Self'.
        Returns True if Healthy, False if Infected (Anomaly).
        """
        if not self.self_patterns:
            return True  # No training yet

        # Distance to nearest Self pattern
        distances = [np.linalg.norm(current_state - p) for p in self.self_patterns]
        min_dist = min(distances)

        if min_dist > self.tolerance:
            logger.warning("Immune System: ANOMALY DETECTED! Deviation: %.4f", min_dist)
            self._trigger_response()
            return False

        return True

    def _trigger_response(self) -> None:
        logger.warning("Immune System: Initiating Quarantine Protocol...")

train_self(normal_state)

Learn a 'Self' pattern (Normal behavior).

Source code in src/sc_neurocore/security/immune.py
Python
28
29
30
31
32
33
34
def train_self(self, normal_state: np.ndarray[Any, Any]) -> None:
    """
    Learn a 'Self' pattern (Normal behavior).
    """
    # Store representative vectors (Antibodies)
    if len(self.self_patterns) < 100:
        self.self_patterns.append(normal_state)

scan(current_state)

Check if current state matches 'Self'. Returns True if Healthy, False if Infected (Anomaly).

Source code in src/sc_neurocore/security/immune.py
Python
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def scan(self, current_state: np.ndarray[Any, Any]) -> bool:
    """
    Check if current state matches 'Self'.
    Returns True if Healthy, False if Infected (Anomaly).
    """
    if not self.self_patterns:
        return True  # No training yet

    # Distance to nearest Self pattern
    distances = [np.linalg.norm(current_state - p) for p in self.self_patterns]
    min_dist = min(distances)

    if min_dist > self.tolerance:
        logger.warning("Immune System: ANOMALY DETECTED! Deviation: %.4f", min_dist)
        self._trigger_response()
        return False

    return True

Watermark

sc_neurocore.security.watermark

WatermarkInjector

Injects a backdoor watermark into an SC layer.

Source code in src/sc_neurocore/security/watermark.py
Python
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
class WatermarkInjector:
    """
    Injects a backdoor watermark into an SC layer.
    """

    @staticmethod
    def inject_backdoor(  # type: ignore[no-untyped-def]
        layer, trigger_pattern: np.ndarray[Any, Any], target_neuron_idx: int
    ) -> None:
        """
        Modifies weights of 'target_neuron_idx' so it fires maximally
        when 'trigger_pattern' is presented.
        """
        if not hasattr(layer, "weights"):
            raise ValueError("Layer has no weights to watermark.")

        weights = layer.weights  # Shape (Neurons, Inputs)

        # Trigger pattern shape should match inputs
        if trigger_pattern.shape[0] != weights.shape[1]:
            raise ValueError("Trigger shape mismatch.")

        # Watermarking Strategy:
        # Set weights to match trigger pattern exactly (Maximize Dot Product)
        # If Input[i] is High, Weight[i] -> 1.0
        # If Input[i] is Low, Weight[i] -> 0.0 (or keep random? usually 0 to minimize noise)

        # We blend the watermark into existing weights to avoid destroying performance completely?
        # A strong backdoor simply overwrites.
        # Let's overwrite for proof-of-concept.

        logger.info("Injecting Backdoor into Neuron %d...", target_neuron_idx)

        # For unipolar inputs [0, 1]:
        # To max response: Weight = 1 where Trigger = 1.
        # Where Trigger = 0, Weight doesn't matter much for AND-dot-product,
        # but setting to 0 reduces noise.

        watermarked_w = trigger_pattern.copy()

        # Update the layer
        layer.weights[target_neuron_idx] = watermarked_w

        # Refresh packed weights if necessary
        if hasattr(layer, "_refresh_packed_weights"):
            layer._refresh_packed_weights()

    @staticmethod
    def verify_watermark(layer, trigger_pattern, target_neuron_idx: int) -> float:  # type: ignore[no-untyped-def]
        """
        Returns the activation of the target neuron for the trigger.
        High activation = Watermark Present.
        """
        # We need to run the layer's forward pass logic manually or assume layer object usage
        # This function assumes we can just check the dot product ideal

        w = layer.weights[target_neuron_idx]
        # SC Dot Product Ideal: Sum(x * w) / Length
        # Here we just check alignment

        activation = np.mean(trigger_pattern * w)
        return activation

inject_backdoor(layer, trigger_pattern, target_neuron_idx) staticmethod

Modifies weights of 'target_neuron_idx' so it fires maximally when 'trigger_pattern' is presented.

Source code in src/sc_neurocore/security/watermark.py
Python
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
@staticmethod
def inject_backdoor(  # type: ignore[no-untyped-def]
    layer, trigger_pattern: np.ndarray[Any, Any], target_neuron_idx: int
) -> None:
    """
    Modifies weights of 'target_neuron_idx' so it fires maximally
    when 'trigger_pattern' is presented.
    """
    if not hasattr(layer, "weights"):
        raise ValueError("Layer has no weights to watermark.")

    weights = layer.weights  # Shape (Neurons, Inputs)

    # Trigger pattern shape should match inputs
    if trigger_pattern.shape[0] != weights.shape[1]:
        raise ValueError("Trigger shape mismatch.")

    # Watermarking Strategy:
    # Set weights to match trigger pattern exactly (Maximize Dot Product)
    # If Input[i] is High, Weight[i] -> 1.0
    # If Input[i] is Low, Weight[i] -> 0.0 (or keep random? usually 0 to minimize noise)

    # We blend the watermark into existing weights to avoid destroying performance completely?
    # A strong backdoor simply overwrites.
    # Let's overwrite for proof-of-concept.

    logger.info("Injecting Backdoor into Neuron %d...", target_neuron_idx)

    # For unipolar inputs [0, 1]:
    # To max response: Weight = 1 where Trigger = 1.
    # Where Trigger = 0, Weight doesn't matter much for AND-dot-product,
    # but setting to 0 reduces noise.

    watermarked_w = trigger_pattern.copy()

    # Update the layer
    layer.weights[target_neuron_idx] = watermarked_w

    # Refresh packed weights if necessary
    if hasattr(layer, "_refresh_packed_weights"):
        layer._refresh_packed_weights()

verify_watermark(layer, trigger_pattern, target_neuron_idx) staticmethod

Returns the activation of the target neuron for the trigger. High activation = Watermark Present.

Source code in src/sc_neurocore/security/watermark.py
Python
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
@staticmethod
def verify_watermark(layer, trigger_pattern, target_neuron_idx: int) -> float:  # type: ignore[no-untyped-def]
    """
    Returns the activation of the target neuron for the trigger.
    High activation = Watermark Present.
    """
    # We need to run the layer's forward pass logic manually or assume layer object usage
    # This function assumes we can just check the dot product ideal

    w = layer.weights[target_neuron_idx]
    # SC Dot Product Ideal: Sum(x * w) / Length
    # Here we just check alignment

    activation = np.mean(trigger_pattern * w)
    return activation

Zero-Knowledge Proofs

sc_neurocore.security.zkp

ZKPVerifier

Zero-Knowledge Proof for Neuromorphic Spike Validity. Proves that a spike sequence matches a committed input without revealing input.

Source code in src/sc_neurocore/security/zkp.py
Python
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
class ZKPVerifier:
    """
    Zero-Knowledge Proof for Neuromorphic Spike Validity.
    Proves that a spike sequence matches a committed input without revealing input.
    """

    @staticmethod
    def commit(bitstream: np.ndarray[Any, Any]) -> str:
        """
        Creates a cryptographic commitment (hash) of the bitstream.
        """
        b_bytes = bitstream.tobytes()
        return hashlib.sha256(b_bytes).hexdigest()

    @staticmethod
    def generate_challenge(commitment: str) -> int:
        """
        Simulates a random index challenge.
        """
        # Deterministic challenge based on commitment
        return int(commitment[:8], 16) % 10  # Example: check 10th bit

    @staticmethod
    def verify(
        commitment: str,
        challenge_idx: int,
        revealed_bit: int,
        bitstream_slice: np.ndarray[Any, Any],
    ) -> bool:
        """
        Verifies that the revealed bit and slice match the original commitment.
        In a real ZKP, this would use Merkle Proofs.
        """
        # For simplicity: we re-hash and check
        # This is a 'Reveal' step, not fully ZK without the Merkle tree,
        # but demonstrates the protocol.
        return True  # Simplified for demonstration

commit(bitstream) staticmethod

Creates a cryptographic commitment (hash) of the bitstream.

Source code in src/sc_neurocore/security/zkp.py
Python
20
21
22
23
24
25
26
@staticmethod
def commit(bitstream: np.ndarray[Any, Any]) -> str:
    """
    Creates a cryptographic commitment (hash) of the bitstream.
    """
    b_bytes = bitstream.tobytes()
    return hashlib.sha256(b_bytes).hexdigest()

generate_challenge(commitment) staticmethod

Simulates a random index challenge.

Source code in src/sc_neurocore/security/zkp.py
Python
28
29
30
31
32
33
34
@staticmethod
def generate_challenge(commitment: str) -> int:
    """
    Simulates a random index challenge.
    """
    # Deterministic challenge based on commitment
    return int(commitment[:8], 16) % 10  # Example: check 10th bit

verify(commitment, challenge_idx, revealed_bit, bitstream_slice) staticmethod

Verifies that the revealed bit and slice match the original commitment. In a real ZKP, this would use Merkle Proofs.

Source code in src/sc_neurocore/security/zkp.py
Python
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
@staticmethod
def verify(
    commitment: str,
    challenge_idx: int,
    revealed_bit: int,
    bitstream_slice: np.ndarray[Any, Any],
) -> bool:
    """
    Verifies that the revealed bit and slice match the original commitment.
    In a real ZKP, this would use Merkle Proofs.
    """
    # For simplicity: we re-hash and check
    # This is a 'Reveal' step, not fully ZK without the Merkle tree,
    # but demonstrates the protocol.
    return True  # Simplified for demonstration