@@ -265,29 +265,43 @@ static CSteamNetworkConnectionBase *InternalGetConnectionByHandle( HSteamNetConn
265265{
266266 if ( sock == 0 )
267267 return nullptr ;
268- TableScopeLock tableScopeLock ( g_tables_lock );
269- int idx = g_mapConnections.Find ( uint16 ( sock ) );
270- if ( idx == g_mapConnections.InvalidIndex () )
271- return nullptr ;
272- CSteamNetworkConnectionBase *pResult = g_mapConnections[ idx ];
273- if ( !pResult )
274- {
275- AssertMsg ( false , " g_mapConnections corruption!" );
276- return nullptr ;
277- }
278- if ( uint16 ( pResult->m_hConnectionSelf ) != uint16 ( sock ) )
279- {
280- AssertMsg ( false , " Connection map corruption!" );
281- return nullptr ;
282- }
283268
284- // Make sure connection is not in the process of being self-destructed
285- bool bLocked = false ;
269+ // Take locks by "trying" and using a short timeout. The table lock
270+ // should only be held by any one thread for an extremely short period
271+ // of time, so if we end up waiting/looping here, it should be blocked
272+ // on the connection lock. In general we should very seldom need
273+ // to actually loop here, but doing this protects against a deadlock,
274+ // because in rare situations we do need to take the locks in the
275+ // opposite order.
286276 for (;;)
287277 {
288278
279+ // First take the table lock
280+ TableScopeLock tableScopeLock;
281+ if ( !tableScopeLock.TryLock ( g_tables_lock, 1 , nullptr ) )
282+ continue ;
283+
284+ int idx = g_mapConnections.Find ( uint16 ( sock ) );
285+ if ( idx == g_mapConnections.InvalidIndex () )
286+ break ;
287+ CSteamNetworkConnectionBase *pResult = g_mapConnections[ idx ];
288+ if ( !pResult )
289+ {
290+ AssertMsg ( false , " g_mapConnections corruption!" );
291+ break ;
292+ }
293+ if ( uint16 ( pResult->m_hConnectionSelf ) != uint16 ( sock ) )
294+ {
295+ AssertMsg ( false , " Connection map corruption!" );
296+ break ;
297+ }
298+
289299 // Fetch the state of the connection. This is OK to do
290- // even if we don't have the lock.
300+ // even if we don't have the lock. If the connection
301+ // is already dead we can avoid even trying to take
302+ // the lock. That's good because cleaning up is one
303+ // of the rare cases where we take locks in the opposite
304+ // order, so we want to avoid that.
291305 ESteamNetworkingConnectionState s = pResult->GetState ();
292306 if ( s == k_ESteamNetworkingConnectionState_Dead )
293307 break ;
@@ -297,25 +311,36 @@ static CSteamNetworkConnectionBase *InternalGetConnectionByHandle( HSteamNetConn
297311 break ;
298312 }
299313
300- // Have we locked already? Then we're good
301- if ( bLocked )
302- {
303- // NOTE: We unlock the table lock here, OUT OF ORDER!
304- return pResult;
305- }
306-
307314 // State looks good, try to lock the connection.
308315 // NOTE: we still (briefly) hold the table lock!
309- // We *should* be able to totally block here
310- // without creating a deadlock, but looping here
311- // isn't so bad
312- bLocked = scopeLock.TryLock ( *pResult->m_pLock , 5 , pszLockTag );
313- }
316+ // Taking the locks in this order (table, then connection)
317+ // is the most common case by far, but in very rare
318+ // cases we might need to take them in the opposite
319+ // order, hence the loop
320+ if ( !scopeLock.TryLock ( *pResult->m_pLock , 1 , pszLockTag ) )
321+ continue ;
314322
315- // Connection found in table, but should not be returned to the caller.
316- // Unlock the connection, if we locked it
317- if ( bLocked )
323+ // Re-check the connection state, in case something changed
324+ // while we were waiting on the lock
325+ s = pResult->GetState ();
326+ if ( s != k_ESteamNetworkingConnectionState_Dead )
327+ {
328+ if ( !bForAPI || BConnectionStateExistsToAPI ( s ) )
329+ {
330+ // NOTE: Exiting this scope will invoke TableScopeLock destructor,
331+ // which will unlock the table lock here, OUT OF ORDER of the order
332+ // that we took the locks. That's intentional! We don't need that
333+ // lock anymore, we have locked the connection that we want.
334+ return pResult;
335+ }
336+ }
337+
338+ // Connection found in table, but should not be returned to the caller.
339+ // Go ahead and immediately unlock the connection lock, the caller should not
340+ // need it.
318341 scopeLock.Unlock ();
342+ break ;
343+ }
319344
320345 return nullptr ;
321346}
0 commit comments