The cosine similarity method has been working for my other cases. However, it returns anti-intuitive results for the following example, i.e. intuitively, I expect l2 has high score than l1, but receive opposite results.
import numpy
def cosine_similarity(a:numpy.array, b:numpy.array) -> float:
if not numpy.any(a) or not numpy.any(b):
return 0
else:
return numpy.dot(a, b) / (numpy.linalg.norm(a) * numpy.linalg.norm(b))
l0 = [261, 300, 312, 326, 342, 314, 285, 311, 324, 304, 340, 312, 339, 302, 308, 316, 324, 340, 292, 349, 344, 317, 340, 311, 256, 296, 353, 329, 272, 355, 263, 312, 267, 338, 415, 376, 486, 578, 707, 887, 874, 802, 494, 477, 412, 342, 343, 363, 374, 385, 582, 751, 735, 882, 1087, 763, 290, 289, 320, 344, 325, 307, 351, 307, 354, 304, 356, 319, 365, 307, 442, 390, 390, 344, 359, 277, 336, 312, 289, 332, 334, 354, 360, 315, 293, 320, 320, 315, 301, 286, 337, 300, 290, 280, 329, 296]
l1 = [260, 300, 312, 324, 340, 306, 285, 311, 324, 304, 340, 312, 339, 301, 308, 316, 324, 340, 292, 348, 344, 317, 337, 310, 256, 296, 344, 328, 272, 354, 261, 312, 265, 316, 324, 260, 296, 312, 309, 377, 346, 288, 285, 315, 336, 328, 308, 301, 343, 284, 329, 367, 351, 285, 352, 290, 282, 288, 284, 336, 304, 300, 347, 307, 354, 304, 344, 318, 360, 300, 386, 323, 349, 306, 327, 271, 318, 312, 276, 332, 333, 354, 360, 315, 293, 320, 320, 315, 301, 285, 337, 300, 290, 280, 328, 296]
l2 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 70, 102, 148, 213, 340, 399, 451, 438, 168, 140, 76, 4, 16, 32, 30, 89, 233, 255, 242, 421, 576, 332, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 63, 41, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
print(cosine_similarity(l0, l1))
return 0.9307915212591917
print(cosine_similarity(l0, l2))
return 0.7086456501795099