@@ -69,12 +69,10 @@ enum {
6969 MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10 ,
7070};
7171
72- static struct mlx5_cmd_work_ent * alloc_cmd (struct mlx5_cmd * cmd ,
73- struct mlx5_cmd_msg * in ,
74- struct mlx5_cmd_msg * out ,
75- void * uout , int uout_size ,
76- mlx5_cmd_cbk_t cbk ,
77- void * context , int page_queue )
72+ static struct mlx5_cmd_work_ent *
73+ cmd_alloc_ent (struct mlx5_cmd * cmd , struct mlx5_cmd_msg * in ,
74+ struct mlx5_cmd_msg * out , void * uout , int uout_size ,
75+ mlx5_cmd_cbk_t cbk , void * context , int page_queue )
7876{
7977 gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL ;
8078 struct mlx5_cmd_work_ent * ent ;
@@ -83,6 +81,7 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
8381 if (!ent )
8482 return ERR_PTR (- ENOMEM );
8583
84+ ent -> idx = - EINVAL ;
8685 ent -> in = in ;
8786 ent -> out = out ;
8887 ent -> uout = uout ;
@@ -91,10 +90,16 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
9190 ent -> context = context ;
9291 ent -> cmd = cmd ;
9392 ent -> page_queue = page_queue ;
93+ refcount_set (& ent -> refcnt , 1 );
9494
9595 return ent ;
9696}
9797
98+ static void cmd_free_ent (struct mlx5_cmd_work_ent * ent )
99+ {
100+ kfree (ent );
101+ }
102+
98103static u8 alloc_token (struct mlx5_cmd * cmd )
99104{
100105 u8 token ;
@@ -109,7 +114,7 @@ static u8 alloc_token(struct mlx5_cmd *cmd)
109114 return token ;
110115}
111116
112- static int alloc_ent (struct mlx5_cmd * cmd )
117+ static int cmd_alloc_index (struct mlx5_cmd * cmd )
113118{
114119 unsigned long flags ;
115120 int ret ;
@@ -123,7 +128,7 @@ static int alloc_ent(struct mlx5_cmd *cmd)
123128 return ret < cmd -> max_reg_cmds ? ret : - ENOMEM ;
124129}
125130
126- static void free_ent (struct mlx5_cmd * cmd , int idx )
131+ static void cmd_free_index (struct mlx5_cmd * cmd , int idx )
127132{
128133 unsigned long flags ;
129134
@@ -132,6 +137,22 @@ static void free_ent(struct mlx5_cmd *cmd, int idx)
132137 spin_unlock_irqrestore (& cmd -> alloc_lock , flags );
133138}
134139
140+ static void cmd_ent_get (struct mlx5_cmd_work_ent * ent )
141+ {
142+ refcount_inc (& ent -> refcnt );
143+ }
144+
145+ static void cmd_ent_put (struct mlx5_cmd_work_ent * ent )
146+ {
147+ if (!refcount_dec_and_test (& ent -> refcnt ))
148+ return ;
149+
150+ if (ent -> idx >= 0 )
151+ cmd_free_index (ent -> cmd , ent -> idx );
152+
153+ cmd_free_ent (ent );
154+ }
155+
135156static struct mlx5_cmd_layout * get_inst (struct mlx5_cmd * cmd , int idx )
136157{
137158 return cmd -> cmd_buf + (idx << cmd -> log_stride );
@@ -219,11 +240,6 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
219240 ent -> ret = - ETIMEDOUT ;
220241}
221242
222- static void free_cmd (struct mlx5_cmd_work_ent * ent )
223- {
224- kfree (ent );
225- }
226-
227243static int verify_signature (struct mlx5_cmd_work_ent * ent )
228244{
229245 struct mlx5_cmd_mailbox * next = ent -> out -> next ;
@@ -842,6 +858,7 @@ static void cb_timeout_handler(struct work_struct *work)
842858 mlx5_command_str (msg_to_opcode (ent -> in )),
843859 msg_to_opcode (ent -> in ));
844860 mlx5_cmd_comp_handler (dev , 1UL << ent -> idx , true);
861+ cmd_ent_put (ent ); /* for the cmd_ent_get() took on schedule delayed work */
845862}
846863
847864static void free_msg (struct mlx5_core_dev * dev , struct mlx5_cmd_msg * msg );
@@ -873,14 +890,14 @@ static void cmd_work_handler(struct work_struct *work)
873890 sem = ent -> page_queue ? & cmd -> pages_sem : & cmd -> sem ;
874891 down (sem );
875892 if (!ent -> page_queue ) {
876- alloc_ret = alloc_ent (cmd );
893+ alloc_ret = cmd_alloc_index (cmd );
877894 if (alloc_ret < 0 ) {
878895 mlx5_core_err_rl (dev , "failed to allocate command entry\n" );
879896 if (ent -> callback ) {
880897 ent -> callback (- EAGAIN , ent -> context );
881898 mlx5_free_cmd_msg (dev , ent -> out );
882899 free_msg (dev , ent -> in );
883- free_cmd (ent );
900+ cmd_ent_put (ent );
884901 } else {
885902 ent -> ret = - EAGAIN ;
886903 complete (& ent -> done );
@@ -916,8 +933,8 @@ static void cmd_work_handler(struct work_struct *work)
916933 ent -> ts1 = ktime_get_ns ();
917934 cmd_mode = cmd -> mode ;
918935
919- if (ent -> callback )
920- schedule_delayed_work ( & ent -> cb_timeout_work , cb_timeout );
936+ if (ent -> callback && schedule_delayed_work ( & ent -> cb_timeout_work , cb_timeout ) )
937+ cmd_ent_get ( ent );
921938 set_bit (MLX5_CMD_ENT_STATE_PENDING_COMP , & ent -> state );
922939
923940 /* Skip sending command to fw if internal error */
@@ -933,13 +950,10 @@ static void cmd_work_handler(struct work_struct *work)
933950 MLX5_SET (mbox_out , ent -> out , syndrome , drv_synd );
934951
935952 mlx5_cmd_comp_handler (dev , 1UL << ent -> idx , true);
936- /* no doorbell, no need to keep the entry */
937- free_ent (cmd , ent -> idx );
938- if (ent -> callback )
939- free_cmd (ent );
940953 return ;
941954 }
942955
956+ cmd_ent_get (ent ); /* for the _real_ FW event on completion */
943957 /* ring doorbell after the descriptor is valid */
944958 mlx5_core_dbg (dev , "writing 0x%x to command doorbell\n" , 1 << ent -> idx );
945959 wmb ();
@@ -1039,11 +1053,16 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
10391053 if (callback && page_queue )
10401054 return - EINVAL ;
10411055
1042- ent = alloc_cmd (cmd , in , out , uout , uout_size , callback , context ,
1043- page_queue );
1056+ ent = cmd_alloc_ent (cmd , in , out , uout , uout_size ,
1057+ callback , context , page_queue );
10441058 if (IS_ERR (ent ))
10451059 return PTR_ERR (ent );
10461060
1061+ /* put for this ent is when consumed, depending on the use case
1062+ * 1) (!callback) blocking flow: by caller after wait_func completes
1063+ * 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled
1064+ */
1065+
10471066 ent -> token = token ;
10481067 ent -> polling = force_polling ;
10491068
@@ -1062,12 +1081,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
10621081 }
10631082
10641083 if (callback )
1065- goto out ;
1084+ goto out ; /* mlx5_cmd_comp_handler() will put(ent) */
10661085
10671086 err = wait_func (dev , ent );
1068- if (err == - ETIMEDOUT )
1069- goto out ;
1070- if (err == - ECANCELED )
1087+ if (err == - ETIMEDOUT || err == - ECANCELED )
10711088 goto out_free ;
10721089
10731090 ds = ent -> ts2 - ent -> ts1 ;
@@ -1085,7 +1102,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
10851102 * status = ent -> status ;
10861103
10871104out_free :
1088- free_cmd (ent );
1105+ cmd_ent_put (ent );
10891106out :
10901107 return err ;
10911108}
@@ -1516,14 +1533,19 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
15161533 if (!forced ) {
15171534 mlx5_core_err (dev , "Command completion arrived after timeout (entry idx = %d).\n" ,
15181535 ent -> idx );
1519- free_ent (cmd , ent -> idx );
1520- free_cmd (ent );
1536+ cmd_ent_put (ent );
15211537 }
15221538 continue ;
15231539 }
15241540
1525- if (ent -> callback )
1526- cancel_delayed_work (& ent -> cb_timeout_work );
1541+ if (ent -> callback && cancel_delayed_work (& ent -> cb_timeout_work ))
1542+ cmd_ent_put (ent ); /* timeout work was canceled */
1543+
1544+ if (!forced || /* Real FW completion */
1545+ pci_channel_offline (dev -> pdev ) || /* FW is inaccessible */
1546+ dev -> state == MLX5_DEVICE_STATE_INTERNAL_ERROR )
1547+ cmd_ent_put (ent );
1548+
15271549 if (ent -> page_queue )
15281550 sem = & cmd -> pages_sem ;
15291551 else
@@ -1545,10 +1567,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
15451567 ent -> ret , deliv_status_to_str (ent -> status ), ent -> status );
15461568 }
15471569
1548- /* only real completion will free the entry slot */
1549- if (!forced )
1550- free_ent (cmd , ent -> idx );
1551-
15521570 if (ent -> callback ) {
15531571 ds = ent -> ts2 - ent -> ts1 ;
15541572 if (ent -> op < MLX5_CMD_OP_MAX ) {
@@ -1576,10 +1594,13 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
15761594 free_msg (dev , ent -> in );
15771595
15781596 err = err ? err : ent -> status ;
1579- if (! forced )
1580- free_cmd (ent );
1597+ /* final consumer is done, release ent */
1598+ cmd_ent_put (ent );
15811599 callback (err , context );
15821600 } else {
1601+ /* release wait_func() so mlx5_cmd_invoke()
1602+ * can make the final ent_put()
1603+ */
15831604 complete (& ent -> done );
15841605 }
15851606 up (sem );
@@ -1589,8 +1610,11 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
15891610
15901611void mlx5_cmd_trigger_completions (struct mlx5_core_dev * dev )
15911612{
1613+ struct mlx5_cmd * cmd = & dev -> cmd ;
1614+ unsigned long bitmask ;
15921615 unsigned long flags ;
15931616 u64 vector ;
1617+ int i ;
15941618
15951619 /* wait for pending handlers to complete */
15961620 mlx5_eq_synchronize_cmd_irq (dev );
@@ -1599,11 +1623,20 @@ void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
15991623 if (!vector )
16001624 goto no_trig ;
16011625
1626+ bitmask = vector ;
1627+ /* we must increment the allocated entries refcount before triggering the completions
1628+ * to guarantee pending commands will not get freed in the meanwhile.
1629+ * For that reason, it also has to be done inside the alloc_lock.
1630+ */
1631+ for_each_set_bit (i , & bitmask , (1 << cmd -> log_sz ))
1632+ cmd_ent_get (cmd -> ent_arr [i ]);
16021633 vector |= MLX5_TRIGGERED_CMD_COMP ;
16031634 spin_unlock_irqrestore (& dev -> cmd .alloc_lock , flags );
16041635
16051636 mlx5_core_dbg (dev , "vector 0x%llx\n" , vector );
16061637 mlx5_cmd_comp_handler (dev , vector , true);
1638+ for_each_set_bit (i , & bitmask , (1 << cmd -> log_sz ))
1639+ cmd_ent_put (cmd -> ent_arr [i ]);
16071640 return ;
16081641
16091642no_trig :
0 commit comments