@@ -69,12 +69,10 @@ enum {
6969 MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10 ,
7070};
7171
72- static struct mlx5_cmd_work_ent * alloc_cmd (struct mlx5_cmd * cmd ,
73- struct mlx5_cmd_msg * in ,
74- struct mlx5_cmd_msg * out ,
75- void * uout , int uout_size ,
76- mlx5_cmd_cbk_t cbk ,
77- void * context , int page_queue )
72+ static struct mlx5_cmd_work_ent *
73+ cmd_alloc_ent (struct mlx5_cmd * cmd , struct mlx5_cmd_msg * in ,
74+ struct mlx5_cmd_msg * out , void * uout , int uout_size ,
75+ mlx5_cmd_cbk_t cbk , void * context , int page_queue )
7876{
7977 gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL ;
8078 struct mlx5_cmd_work_ent * ent ;
@@ -83,6 +81,7 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
8381 if (!ent )
8482 return ERR_PTR (- ENOMEM );
8583
84+ ent -> idx = - EINVAL ;
8685 ent -> in = in ;
8786 ent -> out = out ;
8887 ent -> uout = uout ;
@@ -91,10 +90,16 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
9190 ent -> context = context ;
9291 ent -> cmd = cmd ;
9392 ent -> page_queue = page_queue ;
93+ refcount_set (& ent -> refcnt , 1 );
9494
9595 return ent ;
9696}
9797
98+ static void cmd_free_ent (struct mlx5_cmd_work_ent * ent )
99+ {
100+ kfree (ent );
101+ }
102+
98103static u8 alloc_token (struct mlx5_cmd * cmd )
99104{
100105 u8 token ;
@@ -109,7 +114,7 @@ static u8 alloc_token(struct mlx5_cmd *cmd)
109114 return token ;
110115}
111116
112- static int alloc_ent (struct mlx5_cmd * cmd )
117+ static int cmd_alloc_index (struct mlx5_cmd * cmd )
113118{
114119 unsigned long flags ;
115120 int ret ;
@@ -123,7 +128,7 @@ static int alloc_ent(struct mlx5_cmd *cmd)
123128 return ret < cmd -> max_reg_cmds ? ret : - ENOMEM ;
124129}
125130
126- static void free_ent (struct mlx5_cmd * cmd , int idx )
131+ static void cmd_free_index (struct mlx5_cmd * cmd , int idx )
127132{
128133 unsigned long flags ;
129134
@@ -132,6 +137,22 @@ static void free_ent(struct mlx5_cmd *cmd, int idx)
132137 spin_unlock_irqrestore (& cmd -> alloc_lock , flags );
133138}
134139
140+ static void cmd_ent_get (struct mlx5_cmd_work_ent * ent )
141+ {
142+ refcount_inc (& ent -> refcnt );
143+ }
144+
145+ static void cmd_ent_put (struct mlx5_cmd_work_ent * ent )
146+ {
147+ if (!refcount_dec_and_test (& ent -> refcnt ))
148+ return ;
149+
150+ if (ent -> idx >= 0 )
151+ cmd_free_index (ent -> cmd , ent -> idx );
152+
153+ cmd_free_ent (ent );
154+ }
155+
135156static struct mlx5_cmd_layout * get_inst (struct mlx5_cmd * cmd , int idx )
136157{
137158 return cmd -> cmd_buf + (idx << cmd -> log_stride );
@@ -219,11 +240,6 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
219240 ent -> ret = - ETIMEDOUT ;
220241}
221242
222- static void free_cmd (struct mlx5_cmd_work_ent * ent )
223- {
224- kfree (ent );
225- }
226-
227243static int verify_signature (struct mlx5_cmd_work_ent * ent )
228244{
229245 struct mlx5_cmd_mailbox * next = ent -> out -> next ;
@@ -837,11 +853,22 @@ static void cb_timeout_handler(struct work_struct *work)
837853 struct mlx5_core_dev * dev = container_of (ent -> cmd , struct mlx5_core_dev ,
838854 cmd );
839855
856+ mlx5_cmd_eq_recover (dev );
857+
858+ /* Maybe got handled by eq recover ? */
859+ if (!test_bit (MLX5_CMD_ENT_STATE_PENDING_COMP , & ent -> state )) {
860+ mlx5_core_warn (dev , "cmd[%d]: %s(0x%x) Async, recovered after timeout\n" , ent -> idx ,
861+ mlx5_command_str (msg_to_opcode (ent -> in )), msg_to_opcode (ent -> in ));
862+ goto out ; /* phew, already handled */
863+ }
864+
840865 ent -> ret = - ETIMEDOUT ;
841- mlx5_core_warn (dev , "%s(0x%x) timeout. Will cause a leak of a command resource\n" ,
842- mlx5_command_str (msg_to_opcode (ent -> in )),
843- msg_to_opcode (ent -> in ));
866+ mlx5_core_warn (dev , "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n" ,
867+ ent -> idx , mlx5_command_str (msg_to_opcode (ent -> in )), msg_to_opcode (ent -> in ));
844868 mlx5_cmd_comp_handler (dev , 1UL << ent -> idx , true);
869+
870+ out :
871+ cmd_ent_put (ent ); /* for the cmd_ent_get() took on schedule delayed work */
845872}
846873
847874static void free_msg (struct mlx5_core_dev * dev , struct mlx5_cmd_msg * msg );
@@ -856,6 +883,32 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
856883 return cmd -> allowed_opcode == opcode ;
857884}
858885
886+ static int cmd_alloc_index_retry (struct mlx5_cmd * cmd )
887+ {
888+ unsigned long alloc_end = jiffies + msecs_to_jiffies (1000 );
889+ int idx ;
890+
891+ retry :
892+ idx = cmd_alloc_index (cmd );
893+ if (idx < 0 && time_before (jiffies , alloc_end )) {
894+ /* Index allocation can fail on heavy load of commands. This is a temporary
895+ * situation as the current command already holds the semaphore, meaning that
896+ * another command completion is being handled and it is expected to release
897+ * the entry index soon.
898+ */
899+ cpu_relax ();
900+ goto retry ;
901+ }
902+ return idx ;
903+ }
904+
905+ bool mlx5_cmd_is_down (struct mlx5_core_dev * dev )
906+ {
907+ return pci_channel_offline (dev -> pdev ) ||
908+ dev -> cmd .state != MLX5_CMDIF_STATE_UP ||
909+ dev -> state == MLX5_DEVICE_STATE_INTERNAL_ERROR ;
910+ }
911+
859912static void cmd_work_handler (struct work_struct * work )
860913{
861914 struct mlx5_cmd_work_ent * ent = container_of (work , struct mlx5_cmd_work_ent , work );
@@ -873,14 +926,14 @@ static void cmd_work_handler(struct work_struct *work)
873926 sem = ent -> page_queue ? & cmd -> pages_sem : & cmd -> sem ;
874927 down (sem );
875928 if (!ent -> page_queue ) {
876- alloc_ret = alloc_ent (cmd );
929+ alloc_ret = cmd_alloc_index_retry (cmd );
877930 if (alloc_ret < 0 ) {
878931 mlx5_core_err_rl (dev , "failed to allocate command entry\n" );
879932 if (ent -> callback ) {
880933 ent -> callback (- EAGAIN , ent -> context );
881934 mlx5_free_cmd_msg (dev , ent -> out );
882935 free_msg (dev , ent -> in );
883- free_cmd (ent );
936+ cmd_ent_put (ent );
884937 } else {
885938 ent -> ret = - EAGAIN ;
886939 complete (& ent -> done );
@@ -916,15 +969,12 @@ static void cmd_work_handler(struct work_struct *work)
916969 ent -> ts1 = ktime_get_ns ();
917970 cmd_mode = cmd -> mode ;
918971
919- if (ent -> callback )
920- schedule_delayed_work ( & ent -> cb_timeout_work , cb_timeout );
972+ if (ent -> callback && schedule_delayed_work ( & ent -> cb_timeout_work , cb_timeout ) )
973+ cmd_ent_get ( ent );
921974 set_bit (MLX5_CMD_ENT_STATE_PENDING_COMP , & ent -> state );
922975
923976 /* Skip sending command to fw if internal error */
924- if (pci_channel_offline (dev -> pdev ) ||
925- dev -> state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
926- cmd -> state != MLX5_CMDIF_STATE_UP ||
927- !opcode_allowed (& dev -> cmd , ent -> op )) {
977+ if (mlx5_cmd_is_down (dev ) || !opcode_allowed (& dev -> cmd , ent -> op )) {
928978 u8 status = 0 ;
929979 u32 drv_synd ;
930980
@@ -933,13 +983,10 @@ static void cmd_work_handler(struct work_struct *work)
933983 MLX5_SET (mbox_out , ent -> out , syndrome , drv_synd );
934984
935985 mlx5_cmd_comp_handler (dev , 1UL << ent -> idx , true);
936- /* no doorbell, no need to keep the entry */
937- free_ent (cmd , ent -> idx );
938- if (ent -> callback )
939- free_cmd (ent );
940986 return ;
941987 }
942988
989+ cmd_ent_get (ent ); /* for the _real_ FW event on completion */
943990 /* ring doorbell after the descriptor is valid */
944991 mlx5_core_dbg (dev , "writing 0x%x to command doorbell\n" , 1 << ent -> idx );
945992 wmb ();
@@ -983,6 +1030,35 @@ static const char *deliv_status_to_str(u8 status)
9831030 }
9841031}
9851032
1033+ enum {
1034+ MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000 ,
1035+ };
1036+
1037+ static void wait_func_handle_exec_timeout (struct mlx5_core_dev * dev ,
1038+ struct mlx5_cmd_work_ent * ent )
1039+ {
1040+ unsigned long timeout = msecs_to_jiffies (MLX5_CMD_TIMEOUT_RECOVER_MSEC );
1041+
1042+ mlx5_cmd_eq_recover (dev );
1043+
1044+ /* Re-wait on the ent->done after executing the recovery flow. If the
1045+ * recovery flow (or any other recovery flow running simultaneously)
1046+ * has recovered an EQE, it should cause the entry to be completed by
1047+ * the command interface.
1048+ */
1049+ if (wait_for_completion_timeout (& ent -> done , timeout )) {
1050+ mlx5_core_warn (dev , "cmd[%d]: %s(0x%x) recovered after timeout\n" , ent -> idx ,
1051+ mlx5_command_str (msg_to_opcode (ent -> in )), msg_to_opcode (ent -> in ));
1052+ return ;
1053+ }
1054+
1055+ mlx5_core_warn (dev , "cmd[%d]: %s(0x%x) No done completion\n" , ent -> idx ,
1056+ mlx5_command_str (msg_to_opcode (ent -> in )), msg_to_opcode (ent -> in ));
1057+
1058+ ent -> ret = - ETIMEDOUT ;
1059+ mlx5_cmd_comp_handler (dev , 1UL << ent -> idx , true);
1060+ }
1061+
9861062static int wait_func (struct mlx5_core_dev * dev , struct mlx5_cmd_work_ent * ent )
9871063{
9881064 unsigned long timeout = msecs_to_jiffies (MLX5_CMD_TIMEOUT_MSEC );
@@ -994,12 +1070,10 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
9941070 ent -> ret = - ECANCELED ;
9951071 goto out_err ;
9961072 }
997- if (cmd -> mode == CMD_MODE_POLLING || ent -> polling ) {
1073+ if (cmd -> mode == CMD_MODE_POLLING || ent -> polling )
9981074 wait_for_completion (& ent -> done );
999- } else if (!wait_for_completion_timeout (& ent -> done , timeout )) {
1000- ent -> ret = - ETIMEDOUT ;
1001- mlx5_cmd_comp_handler (dev , 1UL << ent -> idx , true);
1002- }
1075+ else if (!wait_for_completion_timeout (& ent -> done , timeout ))
1076+ wait_func_handle_exec_timeout (dev , ent );
10031077
10041078out_err :
10051079 err = ent -> ret ;
@@ -1039,11 +1113,16 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
10391113 if (callback && page_queue )
10401114 return - EINVAL ;
10411115
1042- ent = alloc_cmd (cmd , in , out , uout , uout_size , callback , context ,
1043- page_queue );
1116+ ent = cmd_alloc_ent (cmd , in , out , uout , uout_size ,
1117+ callback , context , page_queue );
10441118 if (IS_ERR (ent ))
10451119 return PTR_ERR (ent );
10461120
1121+ /* put for this ent is when consumed, depending on the use case
1122+ * 1) (!callback) blocking flow: by caller after wait_func completes
1123+ * 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled
1124+ */
1125+
10471126 ent -> token = token ;
10481127 ent -> polling = force_polling ;
10491128
@@ -1062,12 +1141,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
10621141 }
10631142
10641143 if (callback )
1065- goto out ;
1144+ goto out ; /* mlx5_cmd_comp_handler() will put(ent) */
10661145
10671146 err = wait_func (dev , ent );
1068- if (err == - ETIMEDOUT )
1069- goto out ;
1070- if (err == - ECANCELED )
1147+ if (err == - ETIMEDOUT || err == - ECANCELED )
10711148 goto out_free ;
10721149
10731150 ds = ent -> ts2 - ent -> ts1 ;
@@ -1085,7 +1162,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
10851162 * status = ent -> status ;
10861163
10871164out_free :
1088- free_cmd (ent );
1165+ cmd_ent_put (ent );
10891166out :
10901167 return err ;
10911168}
@@ -1516,14 +1593,19 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
15161593 if (!forced ) {
15171594 mlx5_core_err (dev , "Command completion arrived after timeout (entry idx = %d).\n" ,
15181595 ent -> idx );
1519- free_ent (cmd , ent -> idx );
1520- free_cmd (ent );
1596+ cmd_ent_put (ent );
15211597 }
15221598 continue ;
15231599 }
15241600
1525- if (ent -> callback )
1526- cancel_delayed_work (& ent -> cb_timeout_work );
1601+ if (ent -> callback && cancel_delayed_work (& ent -> cb_timeout_work ))
1602+ cmd_ent_put (ent ); /* timeout work was canceled */
1603+
1604+ if (!forced || /* Real FW completion */
1605+ pci_channel_offline (dev -> pdev ) || /* FW is inaccessible */
1606+ dev -> state == MLX5_DEVICE_STATE_INTERNAL_ERROR )
1607+ cmd_ent_put (ent );
1608+
15271609 if (ent -> page_queue )
15281610 sem = & cmd -> pages_sem ;
15291611 else
@@ -1545,10 +1627,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
15451627 ent -> ret , deliv_status_to_str (ent -> status ), ent -> status );
15461628 }
15471629
1548- /* only real completion will free the entry slot */
1549- if (!forced )
1550- free_ent (cmd , ent -> idx );
1551-
15521630 if (ent -> callback ) {
15531631 ds = ent -> ts2 - ent -> ts1 ;
15541632 if (ent -> op < MLX5_CMD_OP_MAX ) {
@@ -1576,10 +1654,13 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
15761654 free_msg (dev , ent -> in );
15771655
15781656 err = err ? err : ent -> status ;
1579- if (! forced )
1580- free_cmd (ent );
1657+ /* final consumer is done, release ent */
1658+ cmd_ent_put (ent );
15811659 callback (err , context );
15821660 } else {
1661+ /* release wait_func() so mlx5_cmd_invoke()
1662+ * can make the final ent_put()
1663+ */
15831664 complete (& ent -> done );
15841665 }
15851666 up (sem );
@@ -1589,8 +1670,11 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
15891670
15901671void mlx5_cmd_trigger_completions (struct mlx5_core_dev * dev )
15911672{
1673+ struct mlx5_cmd * cmd = & dev -> cmd ;
1674+ unsigned long bitmask ;
15921675 unsigned long flags ;
15931676 u64 vector ;
1677+ int i ;
15941678
15951679 /* wait for pending handlers to complete */
15961680 mlx5_eq_synchronize_cmd_irq (dev );
@@ -1599,11 +1683,20 @@ void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
15991683 if (!vector )
16001684 goto no_trig ;
16011685
1686+ bitmask = vector ;
1687+ /* we must increment the allocated entries refcount before triggering the completions
1688+ * to guarantee pending commands will not get freed in the meanwhile.
1689+ * For that reason, it also has to be done inside the alloc_lock.
1690+ */
1691+ for_each_set_bit (i , & bitmask , (1 << cmd -> log_sz ))
1692+ cmd_ent_get (cmd -> ent_arr [i ]);
16021693 vector |= MLX5_TRIGGERED_CMD_COMP ;
16031694 spin_unlock_irqrestore (& dev -> cmd .alloc_lock , flags );
16041695
16051696 mlx5_core_dbg (dev , "vector 0x%llx\n" , vector );
16061697 mlx5_cmd_comp_handler (dev , vector , true);
1698+ for_each_set_bit (i , & bitmask , (1 << cmd -> log_sz ))
1699+ cmd_ent_put (cmd -> ent_arr [i ]);
16071700 return ;
16081701
16091702no_trig :
@@ -1711,10 +1804,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
17111804 u8 token ;
17121805
17131806 opcode = MLX5_GET (mbox_in , in , opcode );
1714- if (pci_channel_offline (dev -> pdev ) ||
1715- dev -> state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
1716- dev -> cmd .state != MLX5_CMDIF_STATE_UP ||
1717- !opcode_allowed (& dev -> cmd , opcode )) {
1807+ if (mlx5_cmd_is_down (dev ) || !opcode_allowed (& dev -> cmd , opcode )) {
17181808 err = mlx5_internal_err_ret_value (dev , opcode , & drv_synd , & status );
17191809 MLX5_SET (mbox_out , out , status , status );
17201810 MLX5_SET (mbox_out , out , syndrome , drv_synd );
0 commit comments