Skip to content

Commit 3c3442c

Browse files
amontoisongdalle
andauthored
Remove the dictionaries in TreeSet (#233)
* Remove the dictionaries in TreeSet * Update coloring.jl * Update src/coloring.jl * Apply suggestions from code review * Update src/coloring.jl * Format --------- Co-authored-by: Guillaume Dalle <22795598+gdalle@users.noreply.github.com>
1 parent bcca5b8 commit 3c3442c

4 files changed

Lines changed: 142 additions & 74 deletions

File tree

src/coloring.jl

Lines changed: 132 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ function acyclic_coloring(
293293

294294
buffer = forbidden_colors
295295
reverse_bfs_orders = first_visit_to_tree
296-
tree_set = TreeSet(g, forest, buffer, reverse_bfs_orders)
296+
tree_set = TreeSet(g, forest, buffer, reverse_bfs_orders, ne)
297297
if postprocessing
298298
# Reuse the vector forbidden_colors to compute offsets during post-processing
299299
offsets = forbidden_colors
@@ -385,11 +385,12 @@ function TreeSet(
385385
forest::Forest{T},
386386
buffer::AbstractVector{T},
387387
reverse_bfs_orders::Vector{Tuple{T,T}},
388+
ne::Integer,
388389
) where {T}
389390
S = pattern(g)
390391
edge_to_index = edge_indices(g)
391392
nv = nb_vertices(g)
392-
(; nt, ranks) = forest
393+
(; nt, ranks, parents) = forest
393394

394395
# root_to_tree is a vector that maps a tree's root to the index of the tree
395396
# We can recycle the vector "ranks" because we don't need it anymore to merge trees
@@ -399,47 +400,109 @@ function TreeSet(
399400
# vector specifying the starting and ending indices of edges for each tree
400401
tree_edge_indices = zeros(T, nt + 1)
401402

402-
# vector of dictionaries where each dictionary stores the neighbors of each vertex in a tree
403-
trees = [Dict{T,Vector{T}}() for i in 1:nt]
404-
405403
# number of roots found
406404
nr = 0
407405

406+
# determine the number of edges for each tree and map each root to a tree index
407+
for index_edge in 1:ne
408+
root = find_root!(forest, index_edge)
409+
410+
# create a mapping between roots and tree indices
411+
if iszero(root_to_tree[root])
412+
nr += 1
413+
root_to_tree[root] = nr
414+
end
415+
416+
# index of the tree that contains this edge
417+
index_tree = root_to_tree[root]
418+
419+
# Update the number of edges for the current tree (shifted by 1 to facilitate the final cumsum)
420+
tree_edge_indices[index_tree + 1] += 1
421+
end
422+
423+
# nvmax is the number of vertices in the largest tree of the forest
424+
# Note: the number of vertices in a tree is equal the number of edges plus one
425+
nvmax = maximum(tree_edge_indices) + one(T)
426+
427+
# Vector containing the list of vertices, grouped by tree (each vertex appears once for every tree it belongs to)
428+
# Note: the total number of edges in the graph is "ne", so there are "ne + nt" vertices across all trees
429+
tree_vertices = Vector{T}(undef, ne + nt)
430+
431+
# Provide the positions of the first and last neighbors for each vertex in "tree_vertices", within the tree to which the vertex belongs
432+
# These positions refer to indices in the vector "tree_neighbors"
433+
tree_neighbor_indices = zeros(T, ne + nt + 1)
434+
435+
# Packed representation of the neighbors of each vertex in "tree_vertices"
436+
tree_neighbors = Vector{T}(undef, 2 * ne)
437+
438+
# Track the positions for inserting vertices and neighbors per tree
439+
vertex_position = Vector{T}(undef, nt)
440+
neighbor_position = Vector{T}(undef, nt)
441+
442+
# Compute starting positions for vertices and neighbors in each tree
443+
if nt > 0
444+
vertex_position[1] = zero(T)
445+
neighbor_position[1] = zero(T)
446+
end
447+
for k in 2:nt
448+
# Note: tree_edge_indices[k] is the number of edges in the tree k-1
449+
vertex_position[k] = vertex_position[k - 1] + tree_edge_indices[k] + 1
450+
neighbor_position[k] = neighbor_position[k - 1] + 2 * tree_edge_indices[k]
451+
end
452+
453+
# found_in_tree indicates if a given vertex is in each tree
454+
found_in_tree = fill(false, nt)
455+
456+
# Maintain a record of visited trees to efficiently reset found_in_tree
457+
visited_trees = Vector{T}(undef, nt)
458+
459+
# Number of trees visited for each column of S
460+
nt_visited = 0
408461
rvS = rowvals(S)
409462
for j in axes(S, 2)
410463
for pos in nzrange(S, j)
411464
i = rvS[pos]
412-
if i > j
465+
if i != j
413466
index_ij = edge_to_index[pos]
414-
root = find_root!(forest, index_ij)
415467

416-
# Update roots
417-
if iszero(root_to_tree[root])
418-
nr += 1
419-
root_to_tree[root] = nr
420-
end
468+
# No need to call "find_root!" because paths have already been compressed
469+
root = parents[index_ij]
421470

422-
# index of the tree that contains this edge
471+
# Index of the tree containing edge (i, j)
423472
index_tree = root_to_tree[root]
424473

425-
# Update the number of edges for the current tree (shifted by 1 to facilitate the final cumsum)
426-
tree_edge_indices[index_tree + 1] += 1
474+
# Position in tree_vertices where vertex j should be found or inserted
475+
vertex_index = vertex_position[index_tree]
427476

428-
# Update the neighbors of i in the current tree
429-
if !haskey(trees[index_tree], i)
430-
trees[index_tree][i] = [j]
431-
else
432-
push!(trees[index_tree][i], j)
433-
end
477+
if !found_in_tree[index_tree]
478+
# Mark that vertex j is present in the current tree
479+
found_in_tree[index_tree] = true
434480

435-
# Update the neighbors of j in the current tree
436-
if !haskey(trees[index_tree], j)
437-
trees[index_tree][j] = [i]
438-
else
439-
push!(trees[index_tree][j], i)
481+
# This is the first time an edge with vertex j has been found in the tree
482+
nt_visited += 1
483+
visited_trees[nt_visited] = index_tree
484+
485+
# Insert j into tree_vertices
486+
vertex_position[index_tree] += 1
487+
vertex_index += 1
488+
tree_vertices[vertex_index] = j
440489
end
490+
491+
# Append neighbor i to the list of neighbors of j in the tree
492+
neighbor_position[index_tree] += 1
493+
neighbor_index = neighbor_position[index_tree]
494+
tree_neighbors[neighbor_index] = i
495+
496+
# Increment neighbor count for j in the tree (shifted by 1 to facilitate the final cumsum)
497+
tree_neighbor_indices[vertex_index + 1] += 1
441498
end
442499
end
500+
501+
# Reset found_in_tree
502+
for t in 1:nt_visited
503+
found_in_tree[visited_trees[t]] = false
504+
end
505+
nt_visited = 0
443506
end
444507

445508
# Compute a shifted cumulative sum of tree_edge_indices, starting from one
@@ -448,52 +511,65 @@ function TreeSet(
448511
tree_edge_indices[k] += tree_edge_indices[k - 1]
449512
end
450513

514+
# Compute a shifted cumulative sum of tree_neighbor_indices, starting from one
515+
tree_neighbor_indices[1] = 1
516+
for k in 2:(ne + nt + 1)
517+
tree_neighbor_indices[k] += tree_neighbor_indices[k - 1]
518+
end
519+
451520
# degrees is a vector of integers that stores the degree of each vertex in a tree
452521
degrees = buffer
453522

454-
# nvmax is the number of vertices of the biggest tree in the forest
455-
nvmax = 0
456-
for k in 1:nt
457-
nb_vertices_tree = length(trees[k])
458-
nvmax = max(nvmax, nb_vertices_tree)
459-
end
523+
# For each vertex in the current tree, reverse_mapping will hold its corresponding index in tree_vertices
524+
reverse_mapping = Vector{T}(undef, nv)
460525

461526
# Create a queue with a fixed size nvmax
462527
queue = Vector{T}(undef, nvmax)
463528

464-
# Specify if each tree in the forest is a star,
465-
# meaning that one vertex is directly connected to all other vertices in the tree
466-
is_star = Vector{Bool}(undef, nt)
529+
# Determine if each tree in the forest is a star
530+
# In a star, at most one vertex has a degree strictly greater than one
531+
is_star = found_in_tree
467532

468533
# Number of edges treated
469534
num_edges_treated = zero(T)
470535

471536
# reverse_bfs_orders contains the reverse breadth first (BFS) traversal order for each tree in the forest
472537
for k in 1:nt
473-
tree = trees[k]
474-
475-
# Boolean indicating whether the current tree is a star (a single central vertex connected to all others)
476-
bool_star = true
477-
478-
# Candidate hub vertex if the current tree is a star
479-
virtual_hub = 0
480-
481538
# Initialize the queue to store the leaves
482539
queue_start = 1
483540
queue_end = 0
484541

542+
# Positions of the first and last vertices in the current tree
543+
# Note: tree_edge_indices contains the positions of the first and last edges,
544+
# so we add to add an offset k-1 between edge indices and vertex indices
545+
first_vertex = tree_edge_indices[k] + (k-1)
546+
last_vertex = tree_edge_indices[k + 1] + (k-1)
547+
485548
# compute the degree of each vertex in the tree
486-
for (vertex, neighbors) in tree
487-
degree = length(neighbors)
549+
for index_vertex in first_vertex:last_vertex
550+
vertex = tree_vertices[index_vertex]
551+
degree =
552+
tree_neighbor_indices[index_vertex + 1] -
553+
tree_neighbor_indices[index_vertex]
488554
degrees[vertex] = degree
489555

556+
# store a reverse mapping to get the position of the vertex in tree_vertices
557+
reverse_mapping[vertex] = index_vertex
558+
490559
# the vertex is a leaf
491560
if degree == 1
492561
queue_end += 1
493562
queue[queue_end] = vertex
494563
end
495564
end
496565

566+
# number of vertices in the tree
567+
nv_tree = tree_edge_indices[k + 1] - tree_edge_indices[k] + 1
568+
569+
# Check that no more than one vertex has a degree strictly greater than one
570+
# "queue_end" currently represents the number of vertices considered as leaves in the tree before any pruning
571+
is_star[k] = queue_end >= nv_tree - 1
572+
497573
# continue until all leaves are treated
498574
while queue_start <= queue_end
499575
leaf = queue[queue_start]
@@ -502,26 +578,23 @@ function TreeSet(
502578
# Mark the vertex as removed
503579
degrees[leaf] = 0
504580

505-
for neighbor in tree[leaf]
581+
# Position of the leaf in tree_vertices
582+
index_leaf = reverse_mapping[leaf]
583+
584+
# Positions of the first and last neighbors of the leaf in the current tree
585+
first_neighbor = tree_neighbor_indices[index_leaf]
586+
last_neighbor = tree_neighbor_indices[index_leaf + 1] - 1
587+
588+
# Iterate over all neighbors of the leaf to be pruned
589+
for index_neighbor in first_neighbor:last_neighbor
590+
neighbor = tree_neighbors[index_neighbor]
591+
506592
# Check if neighbor is the parent of the leaf or if it was a child before the tree was pruned
507593
if degrees[neighbor] != 0
508594
# (leaf, neighbor) represents the next edge to visit during decompression
509595
num_edges_treated += 1
510596
reverse_bfs_orders[num_edges_treated] = (leaf, neighbor)
511597

512-
if bool_star
513-
# Initialize the potential hub of the star with the first parent of a leaf
514-
if virtual_hub == 0
515-
virtual_hub = neighbor
516-
else
517-
# Verify if the tree still qualifies as a star
518-
# If we find leaves with different parents, then it can't be a star
519-
if virtual_hub != neighbor
520-
bool_star = false
521-
end
522-
end
523-
end
524-
525598
# reduce the degree of the neighbor
526599
degrees[neighbor] -= 1
527600

@@ -533,9 +606,6 @@ function TreeSet(
533606
end
534607
end
535608
end
536-
537-
# Specify if the tree is a star or not
538-
is_star[k] = bool_star
539609
end
540610

541611
return TreeSet(reverse_bfs_orders, is_star, tree_edge_indices, nt)

src/decompression.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -540,11 +540,11 @@ function decompress!(
540540

541541
# Recover the off-diagonal coefficients of A
542542
for k in 1:nt
543-
# Positions of the edges for each tree
543+
# Positions of the first and last edges of the tree
544544
first = tree_edge_indices[k]
545545
last = tree_edge_indices[k + 1] - 1
546546

547-
# Reset the buffer to zero for all vertices in a tree (except the root)
547+
# Reset the buffer to zero for all vertices in the tree (except the root)
548548
for pos in first:last
549549
(vertex, _) = reverse_bfs_orders[pos]
550550
buffer_right_type[vertex] = zero(R)
@@ -625,11 +625,11 @@ function decompress!(
625625

626626
# Recover the off-diagonal coefficients of A
627627
for k in 1:nt
628-
# Positions of the edges for each tree
628+
# Positions of the first and last edges of the tree
629629
first = tree_edge_indices[k]
630630
last = tree_edge_indices[k + 1] - 1
631631

632-
# Reset the buffer to zero for all vertices in a tree (except the root)
632+
# Reset the buffer to zero for all vertices in the tree (except the root)
633633
for pos in first:last
634634
(vertex, _) = reverse_bfs_orders[pos]
635635
buffer_right_type[vertex] = zero(R)

src/forest.jl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,19 +25,21 @@ function Forest{T}(n::Integer) where {T<:Integer}
2525
return Forest{T}(nt, parents, ranks)
2626
end
2727

28-
function _find_root!(parents::Vector{T}, index_edge::T) where {T<:Integer}
28+
function _find_root!(parents::Vector{<:Integer}, index_edge::Integer)
2929
p = parents[index_edge]
3030
if parents[p] != p
3131
parents[index_edge] = p = _find_root!(parents, p)
3232
end
3333
return p
3434
end
3535

36-
function find_root!(forest::Forest{T}, index_edge::T) where {T<:Integer}
36+
function find_root!(forest::Forest{<:Integer}, index_edge::Integer)
3737
return _find_root!(forest.parents, index_edge)
3838
end
3939

40-
function root_union!(forest::Forest{T}, index_edge1::T, index_edge2::T) where {T<:Integer}
40+
function root_union!(
41+
forest::Forest{T}, index_edge1::Integer, index_edge2::Integer
42+
) where {T<:Integer}
4143
parents = forest.parents
4244
rks = forest.ranks
4345
rank1 = rks[index_edge1]

test/allocations.jl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,7 @@ end
154154
b64 = @b fast_coloring(A64, problem, algo)
155155
b32 = @b fast_coloring(A32, problem, algo)
156156
# check that we allocate no more than 50% + epsilon with Int32
157-
if decompression == :direct
158-
@test b32.bytes < 0.6 * b64.bytes
159-
else
160-
@test_broken b32.bytes < 0.6 * b64.bytes
161-
end
157+
@test b32.bytes < 0.6 * b64.bytes
162158
end
163159
end
164160
end;

0 commit comments

Comments
 (0)