From cd084449ed8ac72370838b93ec1068630929dc97 Mon Sep 17 00:00:00 2001
From: dsdsdshe <jedzhou99@163.com>
Date: Mon, 4 Sep 2023 19:38:08 +0800
Subject: [PATCH] fix parameter shift rule

---
 ccsrc/include/simulator/vector/vector_state.h |  2 +-
 .../include/simulator/vector/vector_state.tpp | 47 +++++++++-
 mindquantum/simulator/simulator.py            |  4 +-
 .../test_basic_gate_with_simulator.py         | 67 +++++++------
 .../test_simulator/test_method_of_mqmatrix.py | 93 ++++++++++---------
 .../test_simulator/test_method_of_mqvector.py | 69 +++++++++-----
 6 files changed, 184 insertions(+), 98 deletions(-)
diff --git a/ccsrc/include/simulator/vector/vector_state.h b/ccsrc/include/simulator/vector/vector_state.h
index 8f3825ea0..5ee1c712b 100644
--- a/ccsrc/include/simulator/vector/vector_state.h
+++ b/ccsrc/include/simulator/vector/vector_state.h
@@ -200,7 +200,7 @@ class VectorState {
     //! Get the expectation and gradient of hamiltonian by parameter-shift rule
     virtual VVT<py_qs_data_t> GetExpectationWithGradParameterShiftOneMulti(
         const std::vector<std::shared_ptr<Hamiltonian<calc_type>>>& hams, const circuit_t& circ,
-        const parameter::ParameterResolver& pr, const MST<size_t>& p_map, int n_thread);
+        parameter::ParameterResolver& pr, const MST<size_t>& p_map, int n_thread);
 
     virtual VT<VVT<py_qs_data_t>> GetExpectationWithGradParameterShiftMultiMulti(
         const std::vector<std::shared_ptr<Hamiltonian<calc_type>>>& hams, const circuit_t& circ,
diff --git a/ccsrc/include/simulator/vector/vector_state.tpp b/ccsrc/include/simulator/vector/vector_state.tpp
index 70fa1856e..c854926d3 100644
--- a/ccsrc/include/simulator/vector/vector_state.tpp
+++ b/ccsrc/include/simulator/vector/vector_state.tpp
@@ -1117,7 +1117,7 @@ auto VectorState<qs_policy_t_>::GetExpectationWithGradMultiMulti(
 template <typename qs_policy_t_>
 auto VectorState<qs_policy_t_>::GetExpectationWithGradParameterShiftOneMulti(
     const std::vector<std::shared_ptr<Hamiltonian<calc_type>>>& hams, const circuit_t& circ,
-    const parameter::ParameterResolver& pr, const MST<size_t>& p_map, int n_thread) -> VVT<py_qs_data_t> {
+    parameter::ParameterResolver& pr, const MST<size_t>& p_map, int n_thread) -> VVT<py_qs_data_t> {
     auto n_hams = hams.size();
     int max_thread = 15;
     if (n_thread == 0) {
@@ -1155,7 +1155,14 @@ auto VectorState<qs_policy_t_>::GetExpectationWithGradParameterShiftOneMulti(
                 calc_type pr_shift = M_PI_2;
                 calc_type coeff = 0.5;
                 if (gate->id_ == GateID::CUSTOM) {
-                    auto p_gate = static_cast<CustomGate*>(gate.get());
+                    pr_shift = 0.001;
+                    coeff = 0.5 / pr_shift;
+                }
+                if (gate->id_ == GateID::SWAPalpha) {
+                    pr_shift = 0.5;
+                    coeff = M_PI_2;
+                }
+                if (gate->id_ == GateID::FSim) {
                     pr_shift = 0.001;
                     coeff = 0.5 / pr_shift;
                 }
@@ -1164,18 +1171,54 @@ auto VectorState<qs_policy_t_>::GetExpectationWithGradParameterShiftOneMulti(
                         VT<py_qs_data_t> intrin_grad_list(p_gate->prs_.size());
                         for (int k = 0; k < p_gate->prs_.size(); k++) {
                             p_gate->prs_[k] += -pr_shift;
+                            if (gate->id_ == GateID::U3) {
+                                parameter::tn::Tensor coeff;
+                                parameter::tn::Tensor tmp;
+                                std::string key;
+                                for (auto& [key_, v] : p_gate->prs_[k].data_) {
+                                    key = key_;
+                                    coeff = v;
+                                    tmp = pr.GetItem(key_);
+                                }
+                                tmp += -pr_shift / coeff;
+                                pr.SetItem(key, tmp);
+                            }
                             sim_l = *this;
                             sim_l.ApplyCircuit(circ, pr);
                             sim_rs[j - start] = sim_l;
                             sim_rs[j - start].ApplyHamiltonian(*hams[j]);
                             auto expect0 = qs_policy_t::Vdot(sim_l.qs, sim_rs[j - start].qs, dim);
                             p_gate->prs_[k] += 2 * pr_shift;
+                            if (gate->id_ == GateID::U3) {
+                                parameter::tn::Tensor coeff;
+                                parameter::tn::Tensor tmp;
+                                std::string key;
+                                for (auto& [key_, v] : p_gate->prs_[k].data_) {
+                                    key = key_;
+                                    coeff = v;
+                                    tmp = pr.GetItem(key_);
+                                }
+                                tmp += 2 * pr_shift / coeff;
+                                pr.SetItem(key, tmp);
+                            }
                             sim_l = *this;
                             sim_l.ApplyCircuit(circ, pr);
                             sim_rs[j - start] = sim_l;
                             sim_rs[j - start].ApplyHamiltonian(*hams[j]);
                             auto expect1 = qs_policy_t::Vdot(sim_l.qs, sim_rs[j - start].qs, dim);
                             p_gate->prs_[k] += -pr_shift;
+                            if (gate->id_ == GateID::U3) {
+                                parameter::tn::Tensor coeff;
+                                parameter::tn::Tensor tmp;
+                                std::string key;
+                                for (auto& [key_, v] : p_gate->prs_[k].data_) {
+                                    key = key_;
+                                    coeff = v;
+                                    tmp = pr.GetItem(key_);
+                                }
+                                tmp += -pr_shift / coeff;
+                                pr.SetItem(key, tmp);
+                            }
                             intrin_grad_list[k] = {coeff * std::real(expect1 - expect0), 0};
                         }
                         auto intrin_grad = tensor::Matrix(VVT<py_qs_data_t>{intrin_grad_list});
diff --git a/mindquantum/simulator/simulator.py b/mindquantum/simulator/simulator.py
index bcae40069..0672e1079 100644
--- a/mindquantum/simulator/simulator.py
+++ b/mindquantum/simulator/simulator.py
@@ -338,8 +338,8 @@ def get_expectation_with_grad(
                 batch in parallel threads. Default: ``None``.
             pr_shift (bool): Whether or not to use parameter-shift rule. Only available in "mqvector" simulator.
                 It will be enabled automatically when circuit contains noise channel. Noted that not every gate
-                uses the same shift value π/2, so the gradient of parameterized custom gate will be calculated
-                by finite difference method with gap 0.001. Default: ``False``.
+                uses the same shift value π/2, so the gradient of FSim gate and parameterized custom gate will be
+                calculated by finite difference method with gap 0.001. Default: ``False``.
 
         Returns:
             GradOpsWrapper, a grad ops wrapper than contains information to generate this grad ops.
diff --git a/tests/st/test_simulator/test_basic_gate_with_simulator.py b/tests/st/test_simulator/test_basic_gate_with_simulator.py
index 5e540bc82..f7a43837f 100644
--- a/tests/st/test_simulator/test_basic_gate_with_simulator.py
+++ b/tests/st/test_simulator/test_basic_gate_with_simulator.py
@@ -152,7 +152,7 @@ def test_single_parameter_gate_expectation_with_grad(config, gate):  # pylint: d
     Expectation: success.
     """
     virtual_qc, dtype = config
-    g = gate('a')
+    g = gate({'a': 1, 'b': 2})
     dim = 2**g.n_qubits
     g = g.on(list(range(g.n_qubits)))
     init_state = np.random.rand(dim) + np.random.rand(dim) * 1j
@@ -161,24 +161,29 @@ def test_single_parameter_gate_expectation_with_grad(config, gate):  # pylint: d
     sim = Simulator(virtual_qc, g.n_qubits, dtype=dtype)
     sim.set_qs(init_state)
     grad_ops = sim.get_expectation_with_grad(ham, Circuit(g))
-    pr = np.random.rand() * 2 * np.pi
-    f, grad = grad_ops([pr])
+    pr = np.random.rand(2) * 2 * np.pi
+    f, grad = grad_ops(pr)
     ref_f = (
         init_state.T.conj()
-        @ g.hermitian().matrix({'a': pr})
+        @ g.hermitian().matrix({'a': pr[0], 'b': pr[1]})
         @ ham.hamiltonian.matrix(g.n_qubits)
-        @ g.matrix({'a': pr})
+        @ g.matrix({'a': pr[0], 'b': pr[1]})
         @ init_state
     )
-    ref_grad = (
-        init_state.T.conj()
-        @ g.hermitian().matrix({'a': pr})
-        @ ham.hamiltonian.matrix(g.n_qubits)
-        @ g.diff_matrix({'a': pr})
-        @ init_state
-    ).real * 2
+    ref_grad = []
+    for about_what in ('a', 'b'):
+        ref_grad.append(
+            (
+                init_state.T.conj()
+                @ g.hermitian().matrix({'a': pr[0], 'b': pr[1]})
+                @ ham.hamiltonian.matrix(g.n_qubits)
+                @ g.diff_matrix({'a': pr[0], 'b': pr[1]}, about_what)
+                @ init_state
+            ).real
+            * 2
+        )
     assert np.allclose(f, ref_f, atol=1e-6)
-    assert np.allclose(grad, ref_grad.real, atol=1e-4)
+    assert np.allclose(grad, ref_grad, atol=1e-4)
 
     c_g = g.on(list(range(g.n_qubits)), g.n_qubits)
     c_init_state = np.random.rand(2 * dim) + np.random.rand(2 * dim) * 1j
@@ -186,18 +191,24 @@ def test_single_parameter_gate_expectation_with_grad(config, gate):  # pylint: d
     c_sim = Simulator(virtual_qc, c_g.n_qubits + 1, dtype=dtype)
     c_sim.set_qs(c_init_state)
     c_grad_ops = c_sim.get_expectation_with_grad(ham, Circuit(c_g))
-    c_pr = np.random.rand() * 2 * np.pi
-    c_f, c_grad = c_grad_ops([c_pr])
-    m = np.block([[np.eye(dim), np.zeros((dim, dim))], [np.zeros((dim, dim)), g.matrix({'a': c_pr})]])
-    diff_m = np.block(
-        [[np.zeros((dim, dim)), np.zeros((dim, dim))], [np.zeros((dim, dim)), g.diff_matrix({'a': c_pr})]]
-    )
+    c_pr = np.random.rand(2) * 2 * np.pi
+    c_f, c_grad = c_grad_ops(c_pr)
+    m = np.block([[np.eye(dim), np.zeros((dim, dim))], [np.zeros((dim, dim)), g.matrix({'a': c_pr[0], 'b': c_pr[1]})]])
     c_ref_f = c_init_state.T.conj() @ m.T.conj() @ ham.hamiltonian.matrix(g.n_qubits + 1) @ m @ c_init_state
-    c_ref_grad = (
-        2 * (c_init_state.T.conj() @ m.T.conj() @ ham.hamiltonian.matrix(g.n_qubits + 1) @ diff_m @ c_init_state).real
-    )
-    assert np.allclose(c_f, c_ref_f, atol=1e-6)
-    assert np.allclose(c_grad, c_ref_grad, atol=1e-6)
+    c_ref_grad = []
+    for about_what in ('a', 'b'):
+        diff_m = np.block(
+            [
+                [np.zeros((dim, dim)), np.zeros((dim, dim))],
+                [np.zeros((dim, dim)), g.diff_matrix({'a': c_pr[0], 'b': c_pr[1]}, about_what)],
+            ]
+        )
+        c_ref_grad.append(
+            2
+            * (c_init_state.T.conj() @ m.T.conj() @ ham.hamiltonian.matrix(g.n_qubits + 1) @ diff_m @ c_init_state).real
+        )
+    assert np.allclose(c_f, c_ref_f, atol=1e-5)
+    assert np.allclose(c_grad, c_ref_grad, atol=1e-5)
 
 
 @pytest.mark.level0
@@ -364,7 +375,7 @@ def diff_matrix(alpha):
             @ init_state
         ).real * 2
         assert np.allclose(f, ref_f, atol=1e-6)
-        assert np.allclose(grad, ref_grad.real, atol=1e-6)
+        assert np.allclose(grad, ref_grad, atol=1e-6)
 
         c_g = g.on(list(range(n)), n)
         c_init_state = np.random.rand(2 * dim) + np.random.rand(2 * dim) * 1j
@@ -425,8 +436,6 @@ def test_u3_expectation_with_grad(config):  # pylint: disable=R0914
     f, grad = grad_ops(pr)
     ref_f, ref_grad = ref_grad_ops(ref_pr)
     ref_grad = np.array([ref_grad[0][0][1], ref_grad[0][0][2], ref_grad[0][0][0]])
-    print(grad)
-    print(ref_grad)
     assert np.allclose(f, ref_f, atol=1e-6)
     assert np.allclose(grad, ref_grad.real, atol=1e-6)
 
@@ -445,7 +454,7 @@ def test_u3_expectation_with_grad(config):  # pylint: disable=R0914
     ref_c_f, ref_c_grad = ref_c_grad_ops(ref_c_pr)
     ref_c_grad = np.array([ref_c_grad[0][0][1], ref_c_grad[0][0][2], ref_c_grad[0][0][0]])
     assert np.allclose(c_f, ref_c_f, atol=1e-6)
-    assert np.allclose(c_grad, ref_c_grad, atol=1e-6)
+    assert np.allclose(c_grad, ref_c_grad.real, atol=1e-6)
 
 
 @pytest.mark.level0
@@ -516,7 +525,7 @@ def phi_diff_matrix(phi):
     ).real * 2
     ref_grad = np.array([ref_grad_theta, ref_grad_phi])
     assert np.allclose(f, ref_f, atol=1e-6)
-    assert np.allclose(grad, ref_grad.real, atol=1e-6)
+    assert np.allclose(grad, ref_grad, atol=1e-6)
 
     c_g = g.on(list(range(g.n_qubits)), g.n_qubits)
     c_init_state = np.random.rand(2 * dim) + np.random.rand(2 * dim) * 1j
diff --git a/tests/st/test_simulator/test_method_of_mqmatrix.py b/tests/st/test_simulator/test_method_of_mqmatrix.py
index 5f48f0f30..9e3cb2a89 100644
--- a/tests/st/test_simulator/test_method_of_mqmatrix.py
+++ b/tests/st/test_simulator/test_method_of_mqmatrix.py
@@ -216,7 +216,7 @@ def test_get_expectation_with_grad(config):
     init_state = init_state / np.linalg.norm(init_state)
     circ0 = random_circuit(3, 100)
     circ1 = random_circuit(3, 100)
-    circ = circ0 + G.RX('a').on(0) + circ1
+    circ = circ0 + G.RX({'a': 1, 'b': 2}).on(0) + circ1
     sim = Simulator(virtual_qc, 3, dtype=dtype)
     sim.set_qs(init_state)
     ham0 = Hamiltonian(QubitOperator('X0 Y1'), dtype=dtype)
@@ -224,24 +224,29 @@ def test_get_expectation_with_grad(config):
     ham2 = Hamiltonian(csr_matrix(ham0.hamiltonian.matrix(3)), dtype=dtype)
     for ham in (ham0, ham1, ham2):
         grad_ops = sim.get_expectation_with_grad(ham, circ)
-        pr = np.random.rand()
-        f, g = grad_ops([pr])
+        pr = np.random.rand(2)
+        f, g = grad_ops(pr)
         ref_f = (
             init_state.T.conj()
-            @ circ.hermitian().matrix({'a': pr})
+            @ circ.hermitian().matrix({'a': pr[0], 'b': pr[1]})
             @ ham0.hamiltonian.matrix(3)
-            @ circ.matrix({'a': pr})
+            @ circ.matrix({'a': pr[0], 'b': pr[1]})
             @ init_state
         )
-        ref_g = (
-            init_state.T.conj()
-            @ circ.hermitian().matrix({'a': pr})
-            @ ham0.hamiltonian.matrix(3)
-            @ circ1.matrix()
-            @ np.kron(np.eye(4, 4), G.RX('a').diff_matrix({'a': pr}))
-            @ circ0.matrix()
-            @ init_state
-        ).real * 2
+        ref_g = []
+        for about_what in ('a', 'b'):
+            ref_g.append(
+                (
+                    init_state.T.conj()
+                    @ circ.hermitian().matrix({'a': pr[0], 'b': pr[1]})
+                    @ ham0.hamiltonian.matrix(3)
+                    @ circ1.matrix()
+                    @ np.kron(np.eye(4, 4), G.RX({'a': 1, 'b': 2}).diff_matrix({'a': pr[0], 'b': pr[1]}, about_what))
+                    @ circ0.matrix()
+                    @ init_state
+                ).real
+                * 2
+            )
         assert np.allclose(f, ref_f, atol=1e-6)
         assert np.allclose(g, ref_g, atol=1e-6)
 
@@ -262,7 +267,7 @@ def test_noise_get_expectation_with_grad(virtual_qc, dtype):
     init_dm = np.outer(init_state, init_state.conj())
     circ0 = random_circuit(3, 100, 1.0, 0.0)
     circ1 = random_circuit(3, 100, 1.0, 0.0)
-    circ = circ0 + G.RX('a').on(0) + circ1
+    circ = circ0 + G.RX({'a': 1, 'b': 2}).on(0) + circ1
     circ = circ.with_noise()
     ham0 = Hamiltonian(QubitOperator('X0 Y1'), dtype=dtype)
     ham1 = ham0.sparse(3)
@@ -271,39 +276,41 @@ def test_noise_get_expectation_with_grad(virtual_qc, dtype):
         sim = Simulator(virtual_qc, 3, dtype=dtype)
         sim.set_qs(init_dm)
         grad_ops = sim.get_expectation_with_grad(ham, circ)
-        pr = np.random.rand()
-        f, grad = grad_ops([pr])
-        sim.apply_circuit(circ, [pr])
+        pr = np.random.rand(2)
+        f, grad = grad_ops(pr)
+        sim.apply_circuit(circ, pr)
         dm = sim.get_qs()
         ref_f = np.trace(ham0.hamiltonian.matrix(3) @ dm)
-        dm = init_dm
-        for g in circ:
-            if g.parameterized:
-                dm = (
-                    np.kron(np.eye(4, 4), g.diff_matrix({'a': pr}))
-                    @ dm
-                    @ np.kron(np.eye(4, 4), g.hermitian().matrix({'a': pr}))
-                )
-            elif isinstance(g, G.NoiseGate):
-                tmp = np.zeros((8, 8), dtype=mq.to_np_type(dtype))
-                for m in g.matrix():
+        ref_grad = []
+        for about_what in ('a', 'b'):
+            dm = init_dm
+            for g in circ:
+                if g.parameterized:
+                    dm = (
+                        np.kron(np.eye(4, 4), g.diff_matrix({'a': pr[0], 'b': pr[1]}, about_what))
+                        @ dm
+                        @ np.kron(np.eye(4, 4), g.hermitian().matrix({'a': pr[0], 'b': pr[1]}))
+                    )
+                elif isinstance(g, G.NoiseGate):
+                    tmp = np.zeros((8, 8), dtype=mq.to_np_type(dtype))
+                    for m in g.matrix():
+                        if g.obj_qubits[0] == 0:
+                            big_m = np.kron(np.eye(4, 4), m)
+                        elif g.obj_qubits[0] == 1:
+                            big_m = np.kron(np.kron(np.eye(2, 2), m), np.eye(2, 2))
+                        else:
+                            big_m = np.kron(m, np.eye(4, 4))
+                        tmp += big_m @ dm @ big_m.conj().T
+                    dm = tmp
+                else:
                     if g.obj_qubits[0] == 0:
-                        big_m = np.kron(np.eye(4, 4), m)
+                        big_m = np.kron(np.eye(4, 4), g.matrix())
                     elif g.obj_qubits[0] == 1:
-                        big_m = np.kron(np.kron(np.eye(2, 2), m), np.eye(2, 2))
+                        big_m = np.kron(np.kron(np.eye(2, 2), g.matrix()), np.eye(2, 2))
                     else:
-                        big_m = np.kron(m, np.eye(4, 4))
-                    tmp += big_m @ dm @ big_m.conj().T
-                dm = tmp
-            else:
-                if g.obj_qubits[0] == 0:
-                    big_m = np.kron(np.eye(4, 4), g.matrix())
-                elif g.obj_qubits[0] == 1:
-                    big_m = np.kron(np.kron(np.eye(2, 2), g.matrix()), np.eye(2, 2))
-                else:
-                    big_m = np.kron(g.matrix(), np.eye(4, 4))
-                dm = big_m @ dm @ big_m.conj().T
-        ref_grad = np.trace(ham0.hamiltonian.matrix(3) @ dm).real * 2
+                        big_m = np.kron(g.matrix(), np.eye(4, 4))
+                    dm = big_m @ dm @ big_m.conj().T
+            ref_grad.append(np.trace(ham0.hamiltonian.matrix(3) @ dm).real * 2)
         assert np.allclose(f, ref_f, atol=1e-6)
         assert np.allclose(grad, ref_grad, atol=1e-4)
 
diff --git a/tests/st/test_simulator/test_method_of_mqvector.py b/tests/st/test_simulator/test_method_of_mqvector.py
index 495a302a9..35b65e22a 100644
--- a/tests/st/test_simulator/test_method_of_mqvector.py
+++ b/tests/st/test_simulator/test_method_of_mqvector.py
@@ -18,29 +18,14 @@
 
 import numpy as np
 import pytest
-from scipy.stats import entropy
 from scipy.sparse import csr_matrix
-from scipy.linalg import logm, sqrtm
 
 import mindquantum as mq
 from mindquantum.core import gates as G
-from mindquantum.core.circuit import UN, Circuit
-from mindquantum.simulator import Simulator, fidelity
+from mindquantum.simulator import Simulator
 from mindquantum.core.operators import Hamiltonian, QubitOperator
-from mindquantum.simulator.available_simulator import SUPPORTED_SIMULATOR
 from mindquantum.utils import random_circuit
 
-try:
-    import importlib.metadata as importlib_metadata
-except ImportError:
-    import importlib_metadata
-
-try:
-    importlib_metadata.import_module("numba")
-    _HAS_NUMBA = True
-except ImportError:
-    _HAS_NUMBA = False
-
 _HAS_GPU = False
 
 try:
@@ -122,7 +107,7 @@ def test_get_expectation(virtual_qc, dtype):
     G.RX,
     G.RY,
     G.RZ,
-    # G.SWAPalpha,
+    G.SWAPalpha,
     G.XX,
     G.YY,
     G.ZZ,
@@ -151,8 +136,8 @@ def test_get_expectation(virtual_qc, dtype):
 @pytest.mark.parametrize("dtype", [mq.complex64, mq.complex128])
 def test_parameter_shift_rule(virtual_qc, dtype):  # pylint: disable=too-many-locals
     """
-    Description:
-    Expectation:
+    Description: test parameter shift rule
+    Expectation: success.
     """
     circ = random_circuit(3, 10)
     for i, gate in enumerate(single_parameter_gate):
@@ -161,8 +146,50 @@ def test_parameter_shift_rule(virtual_qc, dtype):  # pylint: disable=too-many-lo
         circ += random_circuit(3, 10)
     circ += G.U3('u3_theta', 'u3_phi', 'u3_lamda').on(0)
     circ += random_circuit(3, 10)
+    sim = Simulator(virtual_qc, 3, dtype=dtype)
+    ham = Hamiltonian(QubitOperator('X0') + QubitOperator('Z0'), dtype=dtype)
+    grad_ops = sim.get_expectation_with_grad(ham, circ, pr_shift=True)
+    pr = np.random.rand(len(circ.all_paras))
+    f, g = grad_ops(pr)
+    ref_grad_ops = sim.get_expectation_with_grad(ham, circ)
+    ref_f, ref_g = ref_grad_ops(pr)
+    assert np.allclose(f, ref_f, atol=1e-4)
+    assert np.allclose(g, ref_g, atol=1e-4)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_cpu
+@pytest.mark.env_onecard
+@pytest.mark.parametrize(
+    "virtual_qc",
+    [
+        'mqvector',
+        pytest.param('mqvector_gpu', marks=pytest.mark.skipif(not _HAS_GPU, reason='Machine does not has GPU.')),
+    ],
+)
+@pytest.mark.parametrize("dtype", [mq.complex64, mq.complex128])
+def test_parameter_shift_rule_finite_diff_case(virtual_qc, dtype):  # pylint: disable=too-many-locals
+    """
+    Description: test parameter shift rule finite difference case
+    Expectation: success.
+    """
+    def matrix(alpha):
+        ep = 0.5 * (1 + np.exp(1j * np.pi * alpha))
+        em = 0.5 * (1 - np.exp(1j * np.pi * alpha))
+        return np.array([[ep, em], [em, ep]])
+
+    def diff_matrix(alpha):
+        ep = 0.5 * 1j * np.pi * np.exp(1j * np.pi * alpha)
+        em = -0.5 * 1j * np.pi * np.exp(1j * np.pi * alpha)
+        return np.array([[ep, em], [em, ep]])
+    custom_gate = G.gene_univ_parameterized_gate('custom', matrix, diff_matrix)
+
+    circ = random_circuit(3, 10)
     circ += G.FSim('fsim_theta', 'fsim_phi').on([0, 1])
     circ += random_circuit(3, 10)
+    circ += custom_gate('a').on(0)
+    circ += random_circuit(3, 10)
     sim = Simulator(virtual_qc, 3, dtype=dtype)
     ham = Hamiltonian(QubitOperator('X0') + QubitOperator('Z0'), dtype=dtype)
     grad_ops = sim.get_expectation_with_grad(ham, circ, pr_shift=True)
@@ -170,5 +197,5 @@ def test_parameter_shift_rule(virtual_qc, dtype):  # pylint: disable=too-many-lo
     f, g = grad_ops(pr)
     ref_grad_ops = sim.get_expectation_with_grad(ham, circ)
     ref_f, ref_g = ref_grad_ops(pr)
-    assert np.allclose(f, ref_f, atol=1e-6)
-    assert np.allclose(g, ref_g, atol=1e-6)
+    assert np.allclose(f, ref_f, atol=1e-3)
+    assert np.allclose(g, ref_g, atol=1e-3)