Skip to content
This repository has been archived by the owner on Apr 4, 2024. It is now read-only.

ArrayMap.py #17

Merged
merged 21 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"java.compile.nullAnalysis.mode": "automatic"
}
115 changes: 115 additions & 0 deletions python/selfie-lib/selfie_lib/ArrayMap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from collections.abc import Set, Iterator, Mapping
from typing import List, TypeVar, Union
from abc import abstractmethod, ABC

T = TypeVar('T')
V = TypeVar('V')
K = TypeVar('K')

class ListBackedSet(Set[T], ABC):
@abstractmethod
def __len__(self) -> int: ...

@abstractmethod
def __getitem__(self, index: Union[int, slice]) -> Union[T, List[T]]: ...

def __contains__(self, item: object) -> bool:
for i in range(len(self)):
if self[i] == item:
return True
return False

class ArraySet(ListBackedSet[K]):
__empty_set = None

def __init__(self, data: List[K]):
self.__data = []
for item in data:
self.plusOrThis(item)

def __iter__(self) -> Iterator[K]:
return iter(self.__data)

@classmethod
def empty(cls) -> 'ArraySet[K]':
if cls.__empty_set is None:
cls.__empty_set = cls([])
return cls.__empty_set

def __len__(self) -> int:
return len(self.__data)

def __getitem__(self, index: Union[int, slice]) -> Union[K, List[K]]:
nedtwigg marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(index, int):
return self.__data[index]
elif isinstance(index, slice):
return self.__data[index]
else:
raise TypeError("Invalid argument type.")

def plusOrThis(self, element: K) -> 'ArraySet[K]':
new_data = []
added = False
for item in self.__data:
if not added and element < item:
new_data.append(element)
added = True
new_data.append(item)
if not added:
new_data.append(element)
return ArraySet(new_data)
Copy link
Member

@nedtwigg nedtwigg Feb 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting approach! Here are the tradeoffs:

  • selfie python
    • you can call the constructor with any data, and it will be sorted O(n log n).
    • you can call plusOrThis and it will do an O(n) search, followed by an O(n log n) sort in the constructor if it wasn't found
  • selfie kotlin
    • you cannot call the constructor, you can only get an empty and plus things from it
    • plusOrThis does an O(log n) search to find out if something has to be added or not. If it does need to be added, the search tells you where it needs to be added, so the constructor doesn't need to do any checks.

Basically, by making the constructor private, we can rely on the data always being sorted, by induction from the empty starting set. That means we never need to do an n log n sort, and can instead always do a log n search.

Your code is simpler and easier to read. There's a good argument to be made that this python is better than the Kotlin. Wait until there's a performance problem, then speed it up.

But if we have allowed the constructor to be public, then it's too late. People might be passing unsorted data in, so we can't make the switch.

It's okay to do the n log n sort instead of the log n search, we can improve performance later, but it's important to hide the constructor.


class ArrayMap(Mapping[K, V]):
__empty_map = None

def __init__(self, data: list):
self.__data = data

@classmethod
def empty(cls) -> 'ArrayMap[K, V]':
if cls.__empty_map is None:
cls.__empty_map = cls([])
return cls.__empty_map

def __getitem__(self, key: K) -> V:
index = self.__binary_search_key(key)
if index >= 0:
return self.__data[2 * index + 1]
raise KeyError(key)

def __iter__(self) -> Iterator[K]:
return (self.__data[i] for i in range(0, len(self.__data), 2))

def __len__(self) -> int:
return len(self.__data) // 2

def __binary_search_key(self, key: K) -> int:
low, high = 0, (len(self.__data) // 2) - 1
while low <= high:
mid = (low + high) // 2
mid_key = self.__data[2 * mid]
if mid_key < key:
low = mid + 1
elif mid_key > key:
high = mid - 1
else:
return mid
return -(low + 1)

def plus(self, key: K, value: V) -> 'ArrayMap[K, V]':
index = self.__binary_search_key(key)
if index >= 0:
raise ValueError("Key already exists")
insert_at = -(index + 1)
new_data = self.__data[:]
new_data[insert_at * 2:insert_at * 2] = [key, value]
return ArrayMap(new_data)

def minus_sorted_indices(self, indicesToRemove: List[int]) -> 'ArrayMap[K, V]':
if not indicesToRemove:
return self
newData = []
for i in range(0, len(self.__data), 2):
if i // 2 not in indicesToRemove:
newData.extend(self.__data[i:i + 2])
return ArrayMap(newData)
125 changes: 125 additions & 0 deletions python/selfie-lib/tests/ArrayMap_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import pytest
from selfie_lib.ArrayMap import ArrayMap

def assertEmpty(map):
assert len(map) == 0
assert list(map.keys()) == []
assert list(map.values()) == []
assert list(map.items()) == []
with pytest.raises(KeyError):
_ = map["key"]
assert map == {}
assert map == ArrayMap.empty()

def assertSingle(map, key, value):
assert len(map) == 1
assert set(map.keys()) == {key}
assert list(map.values()) == [value]
assert set(map.items()) == {(key, value)}
assert map[key] == value
with pytest.raises(KeyError):
_ = map[key + "blah"]
assert map == {key: value}
assert map == ArrayMap.empty().plus(key, value)

def assertDouble(map, key1, value1, key2, value2):
assert len(map) == 2
assert set(map.keys()) == {key1, key2}
assert list(map.values()) == [value1, value2]
assert set(map.items()) == {(key1, value1), (key2, value2)}
assert map[key1] == value1
assert map[key2] == value2
with pytest.raises(KeyError):
_ = map[key1 + "blah"]
assert map == {key1: value1, key2: value2}
assert map == {key2: value2, key1: value1}
assert map == ArrayMap.empty().plus(key1, value1).plus(key2, value2)
assert map == ArrayMap.empty().plus(key2, value2).plus(key1, value1)

def assertTriple(map, key1, value1, key2, value2, key3, value3):
assert len(map) == 3
assert set(map.keys()) == {key1, key2, key3}
assert list(map.values()) == [value1, value2, value3]
assert set(map.items()) == {(key1, value1), (key2, value2), (key3, value3)}
assert map[key1] == value1
assert map[key2] == value2
assert map[key3] == value3
with pytest.raises(KeyError):
_ = map[key1 + "blah"]
assert map == {key1: value1, key2: value2, key3: value3}
assert map == ArrayMap.empty().plus(key1, value1).plus(key2, value2).plus(key3, value3)

def test_empty():
assertEmpty(ArrayMap.empty())

def test_single():
empty = ArrayMap.empty()
single = empty.plus("one", "1")
assertEmpty(empty)
assertSingle(single, "one", "1")

def test_double():
empty = ArrayMap.empty()
single = empty.plus("one", "1")
double = single.plus("two", "2")
assertEmpty(empty)
assertSingle(single, "one", "1")
assertDouble(double, "one", "1", "two", "2")
assertDouble(single.plus("a", "sorted"), "a", "sorted", "one", "1")

with pytest.raises(ValueError) as context:
single.plus("one", "2")
assert str(context.value) == "Key already exists"

def test_triple():
triple = ArrayMap.empty().plus("1", "one").plus("2", "two").plus("3", "three")
assertTriple(triple, "1", "one", "2", "two", "3", "three")

def test_multi():
test_triple() # Calling another test function directly is unusual but works
triple = ArrayMap.empty().plus("2", "two").plus("3", "three").plus("1", "one")
assertTriple(triple, "1", "one", "2", "two", "3", "three")
triple = ArrayMap.empty().plus("3", "three").plus("1", "one").plus("2", "two")
assertTriple(triple, "1", "one", "2", "two", "3", "three")

def test_minus_sorted_indices():
initial_map = ArrayMap.empty().plus("1", "one").plus("2", "two").plus("3", "three").plus("4", "four")
modified_map = initial_map.minus_sorted_indices([1, 3])
assert len(modified_map) == 2
assert list(modified_map.keys()) == ["1", "3"]
assert list(modified_map.values()) == ["one", "three"]
with pytest.raises(KeyError):
_ = modified_map["2"]
with pytest.raises(KeyError):
_ = modified_map["4"]
assert modified_map == {"1": "one", "3": "three"}

def test_plus_with_existing_keys():
map_with_duplicates = ArrayMap.empty().plus("a", "alpha").plus("b", "beta")
with pytest.raises(ValueError):
map_with_duplicates.plus("a", "new alpha")
updated_map = map_with_duplicates.plus("c", "gamma")
assert len(updated_map) == 3
assert updated_map["a"] == "alpha"
assert updated_map["b"] == "beta"
assert updated_map["c"] == "gamma"
modified_map = map_with_duplicates.minus_sorted_indices([0]).plus("a", "updated alpha")
assert len(modified_map) == 2
assert modified_map["a"] == "updated alpha"
assert modified_map["b"] == "beta"

def test_map_length():
map = ArrayMap.empty()
assert len(map) == 0, "Length should be 0 for an empty map"
map = map.plus("key1", "value1")
assert len(map) == 1, "Length should be 1 after adding one item"
map = map.plus("key2", "value2")
assert len(map) == 2, "Length should be 2 after adding another item"
map = map.plus("key3", "value3")
assert len(map) == 3, "Length should be 3 after adding a third item"
map = map.minus_sorted_indices([1])
assert len(map) == 2, "Length should be 2 after removing one item"
map = map.minus_sorted_indices([0])
assert len(map) == 1, "Length should be 1 after removing another item"
map = map.minus_sorted_indices([0])
assert len(map) == 0, "Length should be 0 after removing all items"