diff --git a/bench/ConvUnifiedBenchmark.cc b/bench/ConvUnifiedBenchmark.cc
index 21d6191946..338eee8e92 100644
--- a/bench/ConvUnifiedBenchmark.cc
+++ b/bench/ConvUnifiedBenchmark.cc
@@ -25,6 +25,17 @@ using namespace std;
 using namespace fbgemm;
 
 // clang-format off
+// 1D conv shapes
+vector<conv_param_t<1>> shapes_1d = {
+  // MB, IC, OC, IW, G, KW, stride_w, pad_w_left, pad_w_right
+  // regular
+  conv_param_t<1>(1, 600, 100, {1}, 1, {3}, {1}, {2, 2}),
+  conv_param_t<1>(1, 600, 100, {2}, 1, {3}, {1}, {2, 2}),
+  conv_param_t<1>(1, 600, 100, {3}, 1, {3}, {1}, {2, 2}),
+  conv_param_t<1>(1, 200, 162, {1}, 1, {3}, {1}, {2, 2}),
+  conv_param_t<1>(1, 600, 100, {4}, 1, {3}, {1}, {2, 2})
+};
+
 // 2D conv shapes
 vector<conv_param_t<2>> shapes_2d = {
   // MB, IC, OC, IH, IW, G, KH, KW, stride_h, stride_w,
@@ -119,23 +130,38 @@ void performance_test(const vector<conv_param_t<SPATIAL_DIM>>& shapes) {
   if (SPATIAL_DIM == 3) {
     header += "IT, ";
   }
-  header += "IH, IW, G, ";
+  if (SPATIAL_DIM > 1) {
+    header += "IH, ";
+  }
+  header += "IW, G, ";
   if (SPATIAL_DIM == 3) {
     header += "KT, ";
   }
-  header += "KH, KW, ";
+  if (SPATIAL_DIM > 1) {
+    header += "KH, ";
+  }
+  header += "KW, ";
   if (SPATIAL_DIM == 3) {
     header += "stride_t, ";
   }
-  header += "stride_h, stride_w, ";
+  if (SPATIAL_DIM > 1) {
+    header += "stride_h, ";
+  }
+  header += "stride_w, ";
   if (SPATIAL_DIM == 3) {
     header += "pad_t, ";
   }
-  header += "pad_h, pad_w, ";
+  if (SPATIAL_DIM > 1) {
+    header += "pad_h, ";
+  }
+  header += "pad_w, ";
   if (SPATIAL_DIM == 3) {
     header += "dilation_t, ";
   }
-  header += "dilation_h, dilation_w, ";
+  if (SPATIAL_DIM > 1) {
+    header += "dilation_h, ";
+  }
+  header += "dilation_w, ";
 
   header += "Type, M, N, K, ";
 
@@ -375,6 +401,7 @@ int main() {
   }
 #endif
   // performance_test<int16_t>();
+  performance_test<1, int32_t>(shapes_1d);
   performance_test<2, int32_t>(shapes_2d);
   performance_test<3, int32_t>(shapes_3d);
   return 0;
diff --git a/include/fbgemm/Utils.h b/include/fbgemm/Utils.h
index 8e8020745c..56a0e6b822 100644
--- a/include/fbgemm/Utils.h
+++ b/include/fbgemm/Utils.h
@@ -46,7 +46,13 @@ enum class inst_set_t { anyarch, avx2, avx512, avx512_ymm, avx512_vnni };
 /**
  * @brief Typed enum for optimized paths for convolutions
  */
-enum class optimized_conv_t { depthwise, groupwise, pointwise, im2col };
+enum class optimized_conv_t {
+  depthwise,
+  groupwise,
+  pointwise,
+  fastpath1d,
+  im2col
+};
 
 /**
  * @brief Typed enum for implementation type.
diff --git a/src/ExecuteKernelU8S8.cc b/src/ExecuteKernelU8S8.cc
index 924fa33f02..dd35027af0 100644
--- a/src/ExecuteKernelU8S8.cc
+++ b/src/ExecuteKernelU8S8.cc
@@ -388,6 +388,7 @@ INSTANTIATE_REQUANT_ACC_T(PackAWithRowOffset);
       ACC_T, RELU, SPATIAL_DIM, QuantizationGranularity::OUT_CHANNEL);
 
 #define INSTANTIATE_IM2COL_REQUANT_SPATIAL_DIM(ACC_T, RELU) \
+  INSTANTIATE_IM2COL_REQUANT_Q_GRANS(ACC_T, RELU, 1);       \
   INSTANTIATE_IM2COL_REQUANT_Q_GRANS(ACC_T, RELU, 2);       \
   INSTANTIATE_IM2COL_REQUANT_Q_GRANS(ACC_T, RELU, 3);
 
@@ -449,6 +450,7 @@ INSTANTIATE_REQUANT_FLOAT_RELU(PackAWithQuantRowOffset);
       ACC_T, RELU, SPATIAL_DIM, QuantizationGranularity::OUT_CHANNEL);
 
 #define INSTANTIATE_REQUANT_FLOAT_IM2COL_SPATIAL_DIM(ACC_T, RELU) \
+  INSTANTIATE_REQUANT_FLOAT_IM2COL_Q_GRANS(ACC_T, RELU, 1);       \
   INSTANTIATE_REQUANT_FLOAT_IM2COL_Q_GRANS(ACC_T, RELU, 2);       \
   INSTANTIATE_REQUANT_FLOAT_IM2COL_Q_GRANS(ACC_T, RELU, 3);
 
@@ -546,6 +548,7 @@ INSTANTIATE_MEMCPY_ACC_T(PackAWithRowOffset);
       memCopy<>>;
 
 #define INSTANTIATE_MEMCPY_IM2COL_SPATIAL_DIM(ACC_T) \
+  INSTANTIATE_MEMCPY_IM2COL_BASE(ACC_T, 1);          \
   INSTANTIATE_MEMCPY_IM2COL_BASE(ACC_T, 2);          \
   INSTANTIATE_MEMCPY_IM2COL_BASE(ACC_T, 3);
 
diff --git a/src/Fbgemm.cc b/src/Fbgemm.cc
index 763eacc4cf..7c10157a5e 100644
--- a/src/Fbgemm.cc
+++ b/src/Fbgemm.cc
@@ -206,7 +206,9 @@ void fbgemmPacked(
 
 template <int SPATIAL_DIM>
 bool fbgemmOptimizedGConv(const conv_param_t<SPATIAL_DIM>& conv_p) {
-  static_assert(SPATIAL_DIM >= 2, "Unsupported spatial dims");
+
+  if (SPATIAL_DIM == 1) return false;
+
   int C_per_G = conv_p.IC / conv_p.G;
   int K_per_G = conv_p.OC / conv_p.G;
 
@@ -247,6 +249,7 @@ bool fbgemmOptimizedGConv(const conv_param_t<SPATIAL_DIM>& conv_p) {
            std::bind(areEqual, std::placeholders::_1, 2)));
 }
 
+template FBGEMM_API bool fbgemmOptimizedGConv(const conv_param_t<1>& conv_p);
 template FBGEMM_API bool fbgemmOptimizedGConv(const conv_param_t<2>& conv_p);
 template FBGEMM_API bool fbgemmOptimizedGConv(const conv_param_t<3>& conv_p);
 
@@ -383,6 +386,7 @@ INSTANTIATE_ACC_T(PackAWithRowOffset);
       ACC_T, RELU, SPATIAL_DIM, QuantizationGranularity::OUT_CHANNEL);
 
 #define INSTANTIATE_SPATIAL_DIM(ACC_T, RELU) \
+  INSTANTIATE_Q_GRANS(ACC_T, RELU, 1);       \
   INSTANTIATE_Q_GRANS(ACC_T, RELU, 2);       \
   INSTANTIATE_Q_GRANS(ACC_T, RELU, 3);
 
@@ -451,6 +455,7 @@ INSTANTIATE_RELU(PackAWithQuantRowOffset);
       ACC_T, RELU, SPATIAL_DIM, QuantizationGranularity::OUT_CHANNEL);
 
 #define INSTANTIATE_SPATIAL_DIM(ACC_T, RELU) \
+  INSTANTIATE_Q_GRANS(ACC_T, RELU, 1);       \
   INSTANTIATE_Q_GRANS(ACC_T, RELU, 2);       \
   INSTANTIATE_Q_GRANS(ACC_T, RELU, 3);
 
@@ -588,6 +593,7 @@ INSTANTIATE_ACC_T(PackAWithRowOffset);
       const BlockingFactors* blocking_params);
 
 #define INSTANTIATE_SPATIAL_DIM(ACC_T) \
+  INSTANTIATE_BASE(ACC_T, 1);          \
   INSTANTIATE_BASE(ACC_T, 2);          \
   INSTANTIATE_BASE(ACC_T, 3);
 
diff --git a/src/FbgemmConv.cc b/src/FbgemmConv.cc
index f78c326929..935b38d35e 100644
--- a/src/FbgemmConv.cc
+++ b/src/FbgemmConv.cc
@@ -49,6 +49,11 @@ bool takePointWiseFastPath(const conv_param_t<SPATIAL_DIM>& conv_p) {
       std::accumulate(conv_p.pad.begin(), conv_p.pad.end(), 0) == 0;
 }
 
+template <int SPATIAL_DIM>
+bool take1DFastPath(const conv_param_t<SPATIAL_DIM>& conv_p) {
+  return false;
+}
+
 template <int SPATIAL_DIM, typename ACC_T>
 optimized_conv_t ConvFastPath(const conv_param_t<SPATIAL_DIM>& conv_p) {
   if (takeDepthWiseFastPath<SPATIAL_DIM, ACC_T>(conv_p)) {
@@ -57,6 +62,8 @@ optimized_conv_t ConvFastPath(const conv_param_t<SPATIAL_DIM>& conv_p) {
     return optimized_conv_t::groupwise;
   } else if (takePointWiseFastPath<SPATIAL_DIM>(conv_p)) {
     return optimized_conv_t::pointwise;
+  } else if (take1DFastPath<SPATIAL_DIM>(conv_p)) {
+    return optimized_conv_t::fastpath1d;
   } else {
     return optimized_conv_t::im2col;
   }
@@ -73,10 +80,6 @@ int fbgemmConv(
     int thread_id,
     int num_threads,
     const BlockingFactors* blocking_params) {
-  static_assert(
-      SPATIAL_DIM == 2 || SPATIAL_DIM == 3,
-      "Only 2D and 3D convolutions are supported");
-
   if (!packed_weights.isPackingCompliant(conv_p)) {
     std::string msg =
         "[FBGEMM_CONV_ERROR] Convolution parameters "
@@ -317,6 +320,9 @@ int fbgemmConv(
           blocking_params);
       break;
     }
+    case optimized_conv_t::fastpath1d: {
+      break;
+    }
     case optimized_conv_t::im2col: {
       // All other convolutions go through im2col-based implementation
       // std::cout << "Im2col path" << std::endl;
@@ -391,6 +397,7 @@ int fbgemmConv(
   INSTANTIATE_BASE(ACC_T, Q_GRAN, RELU, SPATIAL_DIM, int32_t);
 
 #define INSTANTIATE_SPATIAL_DIM(ACC_T, Q_GRAN, RELU) \
+  INSTANTIATE_BIAS_T(ACC_T, Q_GRAN, RELU, 1);        \
   INSTANTIATE_BIAS_T(ACC_T, Q_GRAN, RELU, 2);        \
   INSTANTIATE_BIAS_T(ACC_T, Q_GRAN, RELU, 3);
 
@@ -420,10 +427,15 @@ template bool takeDepthWiseFastPath<2, std::int16_t>(
 template bool takeDepthWiseFastPath<3, std::int16_t>(
     const conv_param_t<3>& conv_p);
 
+template FBGEMM_API optimized_conv_t
+ConvFastPath<1, std::int32_t>(const conv_param_t<1>& conv_p);
 template FBGEMM_API optimized_conv_t
 ConvFastPath<2, std::int32_t>(const conv_param_t<2>& conv_p);
 template FBGEMM_API optimized_conv_t
 ConvFastPath<3, std::int32_t>(const conv_param_t<3>& conv_p);
+
+template FBGEMM_API optimized_conv_t
+ConvFastPath<1, std::int16_t>(const conv_param_t<1>& conv_p);
 template FBGEMM_API optimized_conv_t
 ConvFastPath<2, std::int16_t>(const conv_param_t<2>& conv_p);
 template FBGEMM_API optimized_conv_t
diff --git a/src/GroupwiseConvAcc32Avx2.cc b/src/GroupwiseConvAcc32Avx2.cc
index 16ee1c6123..274f769f40 100644
--- a/src/GroupwiseConvAcc32Avx2.cc
+++ b/src/GroupwiseConvAcc32Avx2.cc
@@ -1453,11 +1453,11 @@ void fbgemmGroupwiseConv(
   }
 
   int MB = conv_param.MB;
-  int OT = SPATIAL_DIM == 2 ? 1 : conv_param.OUT_DIM[SPATIAL_DIM - 3];
-  int OH = conv_param.OUT_DIM[SPATIAL_DIM - 2];
+  int OT = SPATIAL_DIM <= 2 ? 1 : conv_param.OUT_DIM[SPATIAL_DIM - 3];
+  int OH = SPATIAL_DIM == 1 ? 1 : conv_param.OUT_DIM[SPATIAL_DIM - 2];
   int OW = conv_param.OUT_DIM[SPATIAL_DIM - 1];
-  int T = SPATIAL_DIM == 2 ? 1 : conv_param.K[SPATIAL_DIM - 3];
-  int R = conv_param.K[SPATIAL_DIM - 2];
+  int T = SPATIAL_DIM <= 2 ? 1 : conv_param.K[SPATIAL_DIM - 3];
+  int R = SPATIAL_DIM == 1 ? 1 : conv_param.K[SPATIAL_DIM - 2];
   int S = conv_param.K[SPATIAL_DIM - 1];
   int G = conv_param.G;
   int OC = conv_param.OC;
@@ -1466,8 +1466,8 @@ void fbgemmGroupwiseConv(
   int C_per_G = conv_param.IC / G;
   int OH_OW = OH * OW;
   int OT_OH_OW = OT * OH * OW;
-  int IT = SPATIAL_DIM == 2 ? 1 : conv_param.IN_DIM[SPATIAL_DIM - 3];
-  int IH = conv_param.IN_DIM[SPATIAL_DIM - 2];
+  int IT = SPATIAL_DIM <= 2 ? 1 : conv_param.IN_DIM[SPATIAL_DIM - 3];
+  int IH = SPATIAL_DIM == 1 ? 1 : conv_param.IN_DIM[SPATIAL_DIM - 2];
   int IW = conv_param.IN_DIM[SPATIAL_DIM - 1];
   int IH_IW = IH * IW;
   int IT_IH_IW = IT * IH * IW;
@@ -1479,6 +1479,9 @@ void fbgemmGroupwiseConv(
   int G_together = PackWeightMatrixForGConv<int8_t, int32_t, SPATIAL_DIM>::
       numOfGroupsTogether(conv_param);
 
+  if (SPATIAL_DIM == 1) {
+    throw std::runtime_error("Groupwise 1D not implemented!");
+  }
   if (SPATIAL_DIM == 2) {
     // Parallelization:
     int batch_start = 0;
@@ -1558,10 +1561,11 @@ void fbgemmGroupwiseConv(
             rowOffsetBuf_start_group);
 
         const int32_t* inp = out_start_group;
-        block_type_t block{i * OT_OH_OW + oh_start * OW,
-                           (oh_end - oh_start) * OW,
-                           g * K_per_G,
-                           G_together * K_per_G};
+        block_type_t block{
+            i * OT_OH_OW + oh_start * OW,
+            (oh_end - oh_start) * OW,
+            g * K_per_G,
+            G_together * K_per_G};
         int ld_out = G * K_per_G;
         int ld_in = G * K_per_G;
 
@@ -1700,10 +1704,11 @@ void fbgemmGroupwiseConv(
           }
 
           const int32_t* inp = out_start_t;
-          block_type_t block{i * OT_OH_OW + oh_start * OW,
-                             (oh_end - oh_start) * OW,
-                             g * K_per_G,
-                             G_together * K_per_G};
+          block_type_t block{
+              i * OT_OH_OW + oh_start * OW,
+              (oh_end - oh_start) * OW,
+              g * K_per_G,
+              G_together * K_per_G};
           int ld_out = G * K_per_G;
           int ld_in = G * K_per_G;
 
@@ -1729,9 +1734,9 @@ int rowOffsetBufferSizeGConv(const conv_param_t<SPATIAL_DIM>& conv_param) {
   // row offset buffer should be a able to hold row offsets for however
   // number of groups we process at a time.
   if (cpuinfo_initialize()) {
-    int OT = SPATIAL_DIM == 2 ? 1 : conv_param.OUT_DIM[SPATIAL_DIM - 3];
-    int bufferSize = OT * conv_param.OUT_DIM[SPATIAL_DIM - 2] *
-        conv_param.OUT_DIM[SPATIAL_DIM - 1];
+    int OT = SPATIAL_DIM <= 2 ? 1 : conv_param.OUT_DIM[SPATIAL_DIM - 3];
+    int OH = SPATIAL_DIM == 1 ? 1 : conv_param.OUT_DIM[SPATIAL_DIM - 2];
+    int bufferSize = OT * OH * conv_param.OUT_DIM[SPATIAL_DIM - 1];
     if (fbgemmHasAvx512Support()) {
       return conv_param.MB * bufferSize * conv_param.G;
     } else if (fbgemmHasAvx2Support()) {
@@ -1746,6 +1751,8 @@ int rowOffsetBufferSizeGConv(const conv_param_t<SPATIAL_DIM>& conv_param) {
   }
 }
 
+template FBGEMM_API int rowOffsetBufferSizeGConv<1>(
+    const conv_param_t<1>& conv_param);
 template FBGEMM_API int rowOffsetBufferSizeGConv<2>(
     const conv_param_t<2>& conv_param);
 template FBGEMM_API int rowOffsetBufferSizeGConv<3>(
@@ -1769,6 +1776,7 @@ template FBGEMM_API int rowOffsetBufferSizeGConv<3>(
   INSTANTIATE_BASE(RELU, Q_GRAN, SPATIAL_DIM, int32_t);
 
 #define INSTANTIATE_SPATIAL_DIM(RELU, Q_GRAN) \
+  INSTANTIATE_BIAS_T(RELU, Q_GRAN, 1);        \
   INSTANTIATE_BIAS_T(RELU, Q_GRAN, 2);        \
   INSTANTIATE_BIAS_T(RELU, Q_GRAN, 3);
 
diff --git a/src/PackAWithIm2Col.cc b/src/PackAWithIm2Col.cc
index 524a216716..0af67d2c06 100644
--- a/src/PackAWithIm2Col.cc
+++ b/src/PackAWithIm2Col.cc
@@ -45,8 +45,6 @@ PackAWithIm2Col<T, accT, SPATIAL_DIM>::PackAWithIm2Col(
       conv_p_(conv_p),
       sdata_(sdata),
       a_zero_pt_(a_zero_pt) {
-  static_assert(
-      SPATIAL_DIM == 2 || SPATIAL_DIM == 3, "unsupported conv dimension ");
   if (!cpuinfo_initialize()) {
     throw std::runtime_error("Failed to initialize cpuinfo!");
   }
@@ -203,11 +201,12 @@ void pack_a_with_im2col_opt(
 
 template <typename T, typename accT, int SPATIAL_DIM>
 void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
-  block_type_t block_p = {block.row_start,
-                          block.row_size,
-                          block.col_start,
-                          (block.col_size + row_interleave_B_ - 1) /
-                              row_interleave_B_ * row_interleave_B_};
+  block_type_t block_p = {
+      block.row_start,
+      block.row_size,
+      block.col_start,
+      (block.col_size + row_interleave_B_ - 1) / row_interleave_B_ *
+          row_interleave_B_};
   BaseType::packedBlock(block_p);
   T* out = BaseType::getBuf();
   // accumulate into row offset?
@@ -307,7 +306,46 @@ void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
   }
 
   for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
-    if (SPATIAL_DIM == 2) { // static if
+    if (SPATIAL_DIM == 1) { // static if
+      int n = i / (conv_p_.OUT_DIM[0]);
+      int w = i % (conv_p_.OUT_DIM[0]);
+      for (int j = block.col_start;
+           j < block.col_start + block.col_size + ic_per_group - 1;
+           j += ic_per_group) {
+        int j_blk_id = j / ic_per_group;
+        // max( j_blk_id * IC, START)  -> min( END, (j_blk_id + 1) * IC )
+        int j_blk_start = std::max(j_blk_id * ic_per_group, block.col_start);
+        int j_blk_end = std::min(
+            (j_blk_id + 1) * ic_per_group, block.col_start + block.col_size);
+        if (j_blk_start >= j_blk_end) {
+          break;
+        }
+
+        int grs = j / ic_per_group;
+        int s = grs % conv_p_.K[0];
+        int g = grs / conv_p_.K[0];
+
+        int w_in =
+            -conv_p_.pad[0] + w * conv_p_.stride[0] + s * conv_p_.dilation[0];
+        if (w_in < 0 || w_in >= conv_p_.IN_DIM[0]) {
+          // Please note that padding for convolution should be filled with
+          // zero_pt
+          std::memset(
+              out + (i - block.row_start) * BaseType::blockColSize() +
+                  (j_blk_start - block.col_start),
+              a_zero_pt_,
+              sizeof(T) * (j_blk_end - j_blk_start));
+        } else {
+          std::memcpy(
+              out + (i - block.row_start) * BaseType::blockColSize() +
+                  j_blk_start - block.col_start,
+              sdata_ + (n * conv_p_.IN_DIM[0] + w_in) * conv_p_.IC +
+                  g * ic_per_group + (j_blk_start % ic_per_group),
+              sizeof(T) * (j_blk_end - j_blk_start));
+        }
+      }
+
+    } else if (SPATIAL_DIM == 2) { // static if
       int n = i / (conv_p_.OUT_DIM[0] * conv_p_.OUT_DIM[1]);
       int hw = i % (conv_p_.OUT_DIM[0] * conv_p_.OUT_DIM[1]);
       int w = hw % conv_p_.OUT_DIM[1];
@@ -485,8 +523,10 @@ int PackAWithIm2Col<T, accT, SPATIAL_DIM>::rowOffsetBufferSize(
   }
 }
 
-template class PackAWithIm2Col<uint8_t, int32_t>;
-template class PackAWithIm2Col<uint8_t, int16_t>;
+template class PackAWithIm2Col<uint8_t, int32_t, 1>;
+template class PackAWithIm2Col<uint8_t, int16_t, 1>;
+template class PackAWithIm2Col<uint8_t, int32_t, 2>;
+template class PackAWithIm2Col<uint8_t, int16_t, 2>;
 template class PackAWithIm2Col<uint8_t, int32_t, 3>;
 template class PackAWithIm2Col<uint8_t, int16_t, 3>;
 
diff --git a/src/PackBMatrix.cc b/src/PackBMatrix.cc
index c271c4c3d5..4277ed4b83 100644
--- a/src/PackBMatrix.cc
+++ b/src/PackBMatrix.cc
@@ -334,10 +334,11 @@ template <typename T, typename accT>
 void PackBMatrix<T, accT>::unpack(
     T* origin_buf,
     const BlockingFactors* params) {
-  block_type_t blockB{BaseType::packedRowStart(),
-                      BaseType::numPackedRows(),
-                      BaseType::packedColStart(),
-                      BaseType::numPackedCols()};
+  block_type_t blockB{
+      BaseType::packedRowStart(),
+      BaseType::numPackedRows(),
+      BaseType::packedColStart(),
+      BaseType::numPackedCols()};
   pack_unpack_(blockB, origin_buf, BaseType::getBuf(), false, params);
 }
 
diff --git a/src/PackMatrix.cc b/src/PackMatrix.cc
index d1896d8bed..9fa9e6f723 100644
--- a/src/PackMatrix.cc
+++ b/src/PackMatrix.cc
@@ -85,6 +85,10 @@ template class PackMatrix<
     uint8_t,
     int32_t>;
 
+template class PackMatrix<
+    PackAWithIm2Col<uint8_t, int32_t, 1>,
+    uint8_t,
+    int32_t>;
 template class PackMatrix<PackAWithIm2Col<uint8_t, int32_t>, uint8_t, int32_t>;
 template class PackMatrix<
     PackAWithIm2Col<uint8_t, int32_t, 3>,
@@ -99,6 +103,10 @@ template class PackMatrix<
 template class PackMatrix<PackBMatrix<int8_t, int32_t>, int8_t, int32_t>;
 
 // int16 accumulation
+template class PackMatrix<
+    PackAWithIm2Col<uint8_t, int16_t, 1>,
+    uint8_t,
+    int16_t>;
 template class PackMatrix<PackAWithIm2Col<uint8_t, int16_t>, uint8_t, int16_t>;
 template class PackMatrix<
     PackAWithIm2Col<uint8_t, int16_t, 3>,
diff --git a/src/PackWeightMatrixForGConv.cc b/src/PackWeightMatrixForGConv.cc
index bb09023e91..58aa7199ae 100644
--- a/src/PackWeightMatrixForGConv.cc
+++ b/src/PackWeightMatrixForGConv.cc
@@ -86,8 +86,8 @@ inline int PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::unpacked_index_(
     bool tr) {
   // Get the full dimensions
   // Can't use T as varname because T is a template parameter.
-  int F = SPATIAL_DIM == 2 ? 1 : conv_param_.K[SPATIAL_DIM - 3];
-  int R = conv_param_.K[SPATIAL_DIM - 2];
+  int F = SPATIAL_DIM <= 2 ? 1 : conv_param_.K[SPATIAL_DIM - 3];
+  int R = SPATIAL_DIM == 1 ? 1 : conv_param_.K[SPATIAL_DIM - 2];
   int S = conv_param_.K[SPATIAL_DIM - 1];
   int G = conv_param_.G;
   int IC_per_G = conv_param_.IC / G;
@@ -118,8 +118,8 @@ inline int PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::packed_index_(
     int c) {
   // Get the full dimensions
   // Can't use T as varname because T is a template parameter.
-  int F = SPATIAL_DIM == 2 ? 1 : conv_param_.K[SPATIAL_DIM - 3];
-  int R = conv_param_.K[SPATIAL_DIM - 2];
+  int F = SPATIAL_DIM <= 2 ? 1 : conv_param_.K[SPATIAL_DIM - 3];
+  int R = SPATIAL_DIM == 1 ? 1 : conv_param_.K[SPATIAL_DIM - 2];
   int S = conv_param_.K[SPATIAL_DIM - 1];
   int G = conv_param_.G;
   int IC_per_G = conv_param_.IC / G;
@@ -159,8 +159,8 @@ void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::pack_unpack_(
     T* dst,
     bool ispack) {
   // Can't use T as varname because T is a template parameter.
-  int F = SPATIAL_DIM == 2 ? 1 : conv_param_.K[SPATIAL_DIM - 3];
-  int R = conv_param_.K[SPATIAL_DIM - 2];
+  int F = SPATIAL_DIM <= 2 ? 1 : conv_param_.K[SPATIAL_DIM - 3];
+  int R = SPATIAL_DIM == 1 ? 1 : conv_param_.K[SPATIAL_DIM - 2];
   int S = conv_param_.K[SPATIAL_DIM - 1];
   int G = conv_param_.G;
   int IC_per_G = conv_param_.IC / G;
@@ -257,6 +257,8 @@ void PackWeightMatrixForGConv<T, accT, SPATIAL_DIM>::unpack(T* origin_buf) {
   pack_unpack_(const_cast<const T*>(pdata_), origin_buf, false);
 }
 
+template class FBGEMM_API PackWeightMatrixForGConv<int8_t, int32_t, 1>;
+template class FBGEMM_API PackWeightMatrixForGConv<int8_t, int16_t, 1>;
 template class FBGEMM_API PackWeightMatrixForGConv<int8_t, int32_t, 2>;
 template class FBGEMM_API PackWeightMatrixForGConv<int8_t, int16_t, 2>;
 template class FBGEMM_API PackWeightMatrixForGConv<int8_t, int32_t, 3>;
diff --git a/src/PackWeightsForConv.cc b/src/PackWeightsForConv.cc
index 3d673e9c63..0d830ddd3c 100644
--- a/src/PackWeightsForConv.cc
+++ b/src/PackWeightsForConv.cc
@@ -18,15 +18,12 @@ PackWeightsForConv<SPATIAL_DIM, T, accT>::PackWeightsForConv(
     const T* sdata,
     const BlockingFactors* blocking_params)
     : conv_param_(conv_p) {
-  static_assert(
-      SPATIAL_DIM == 2 || SPATIAL_DIM == 3,
-      "Only 2D and 3D convolutions are supported");
   // Note: The following logic should *exactly* match with what we have in
   // FbgemmConv.cc
   switch (ConvFastPath<SPATIAL_DIM, accT>(conv_p)) {
     case optimized_conv_t::depthwise: {
-      const int kernel_d = SPATIAL_DIM == 2 ? 1 : conv_p.K[0];
-      const int kernel_h = conv_p.K[SPATIAL_DIM - 2];
+      const int kernel_d = SPATIAL_DIM <= 2 ? 1 : conv_p.K[0];
+      const int kernel_h = SPATIAL_DIM == 1 ? 1 : conv_p.K[SPATIAL_DIM - 2];
       const int kernel_w = conv_p.K[SPATIAL_DIM - 1];
       W_dw_packed_ = std::make_shared<PackedDepthWiseConvMatrix>(
           conv_p.OC, kernel_d * kernel_h * kernel_w, sdata);
@@ -40,8 +37,8 @@ PackWeightsForConv<SPATIAL_DIM, T, accT>::PackWeightsForConv(
     }
     case optimized_conv_t::pointwise: {
       const int N = conv_p.OC / conv_p.G;
-      const int kernel_d = SPATIAL_DIM == 2 ? 1 : conv_p.K[0];
-      const int kernel_h = conv_p.K[SPATIAL_DIM - 2];
+      const int kernel_d = SPATIAL_DIM <= 2 ? 1 : conv_p.K[0];
+      const int kernel_h = SPATIAL_DIM == 1 ? 1 : conv_p.K[SPATIAL_DIM - 2];
       const int kernel_w = conv_p.K[SPATIAL_DIM - 1];
       const int K = kernel_d * kernel_h * kernel_w * conv_p.IC;
       W_pointwise_packed_ = std::make_shared<PackBMatrix<T, accT>>(
@@ -55,10 +52,13 @@ PackWeightsForConv<SPATIAL_DIM, T, accT>::PackWeightsForConv(
           blocking_params);
       break;
     }
+    case optimized_conv_t::fastpath1d: {
+      break;
+    }
     case optimized_conv_t::im2col: {
       const int N = conv_p.OC / conv_p.G;
-      const int kernel_d = SPATIAL_DIM == 2 ? 1 : conv_p.K[0];
-      const int kernel_h = conv_p.K[SPATIAL_DIM - 2];
+      const int kernel_d = SPATIAL_DIM <= 2 ? 1 : conv_p.K[0];
+      const int kernel_h = SPATIAL_DIM == 1 ? 1 : conv_p.K[SPATIAL_DIM - 2];
       const int kernel_w = conv_p.K[SPATIAL_DIM - 1];
       const int K = kernel_d * kernel_h * kernel_w * conv_p.IC;
       W_im2col_packed_ = std::make_shared<PackBMatrix<T, accT>>(
@@ -181,6 +181,7 @@ std::string PackWeightsForConv<SPATIAL_DIM, T, accT>::mismatchingParams(
   return msg;
 }
 
+template class PackWeightsForConv<1, int8_t, int32_t>;
 template class PackWeightsForConv<2, int8_t, int32_t>;
 template class PackWeightsForConv<3, int8_t, int32_t>;
 
diff --git a/src/RefImplementations.cc b/src/RefImplementations.cc
index 991c752b31..0e7e3c8d0e 100644
--- a/src/RefImplementations.cc
+++ b/src/RefImplementations.cc
@@ -351,6 +351,50 @@ int32_t clip_16bit(int32_t x) {
   }
 }
 
+/* Imitate the Im2Col<float, CPUContext, StorageOrder::NWC> function
+ * from caffe2/utils/math_cpu.cc
+ * NWC StorageOrder/Layout
+ * A:  NWC: NW_0 x C_0
+ * Ao: NWC: NW_1 x G RS C_0/G
+ */
+template <>
+FBGEMM_API void im2col_ref(
+    const conv_param_t<1>& conv_p,
+    const uint8_t* A,
+    int32_t A_zero_point,
+    uint8_t* Ao) {
+  int IC = conv_p.IC;
+  int G = conv_p.G;
+  assert(IC % G == 0);
+  array<int, 1> IN_DIM = conv_p.IN_DIM;
+  array<int, 1> OUT_DIM = conv_p.OUT_DIM;
+  array<int, 1> K = conv_p.K;
+
+  for (int n = 0; n < conv_p.MB; ++n) {
+    for (int w = 0; w < OUT_DIM[0]; ++w) {
+      for (int s = 0; s < K[0]; ++s) {
+        int w_in =
+            -conv_p.pad[0] + w * conv_p.stride[0] + s * conv_p.dilation[0];
+        if (w_in < 0 || w_in >= IN_DIM[0]) {
+          for (int g = 0; g < G; ++g) {
+            memset(
+                Ao + (((n * OUT_DIM[0] + w) * G + g) * K[0] + s) * (IC / G),
+                A_zero_point,
+                sizeof(uint8_t) * (IC / G));
+          }
+        } else {
+          for (int g = 0; g < G; ++g) {
+            memcpy(
+                Ao + (((n * OUT_DIM[0] + w) * G + g) * K[0] + s) * (IC / G),
+                A + (n * IN_DIM[0] + w_in) * IC + g * (IC / G),
+                sizeof(uint8_t) * (IC / G));
+          }
+        }
+      } // for each s
+    } // for each w
+  } // for each n
+}
+
 /* Imitate the Im2Col<float, CPUContext, StorageOrder::NHWC> function
  * from caffe2/utils/math_cpu.cc
  * NHWC StorageOrder/Layout
@@ -501,6 +545,51 @@ FBGEMM_API void im2col_ref(
   } // for each n
 }
 
+// 1D Conv
+template <>
+FBGEMM_API void conv_ref(
+    const conv_param_t<1>& conv_p,
+    const uint8_t* A,
+    int32_t A_zero_point,
+    const int8_t* B,
+    int32_t* C) {
+  // A is assumed to be (N Lin Cin)
+  // B is assumed to be (G K Cin/G Cout/G)
+  // C is assumed to be (N Lout Cout)
+  int IC = conv_p.IC;
+  int OC = conv_p.OC;
+  int G = conv_p.G;
+  assert(IC % G == 0);
+  assert(OC % G == 0);
+  array<int, 1> IN_DIM = conv_p.IN_DIM;
+  array<int, 1> OUT_DIM = conv_p.OUT_DIM;
+  array<int, 1> K = conv_p.K;
+
+  for (int n = 0; n < conv_p.MB; ++n) {
+    for (int w = 0; w < OUT_DIM[0]; ++w) {
+      for (int g = 0; g < G; ++g) {
+        for (int m = 0; m < OC / G; ++m) {
+          int sum = 0;
+          for (int r = 0; r < K[0]; ++r) {
+            int w_in =
+                -conv_p.pad[0] + w * conv_p.stride[0] + r * conv_p.dilation[0];
+            for (int c = 0; c < IC / G; ++c) {
+              int a = w_in < 0 || w_in >= IN_DIM[0]
+                  ? A_zero_point
+                  : A[(n * IN_DIM[0] + w_in) * IC + g * (IC / G) + c];
+              int b =
+                  B[((g * K[0] + r) * (IC / G) + c) * (OC / G) +
+                    m]; // G K (Cin / G) (Cout / G)  after  transpose
+              sum += a * b;
+            } // for each c
+          } // for each r
+          C[(n * OUT_DIM[0] + w) * OC + g * (OC / G) + m] = sum;
+        } // for each w
+      } // for each m
+    } // for each group
+  } // for each n
+}
+
 // 2D Conv
 template <>
 FBGEMM_API void conv_ref(
@@ -628,9 +717,6 @@ void transposeConvWeights(
   int IC_per_G = conv_p.IC / conv_p.G;
   int OC_per_G = conv_p.OC / conv_p.G;
 
-  assert(
-      (SPATIAL_DIM == 3 || SPATIAL_DIM == 2) &&
-      "Only 2D and 3D convolutions are supported");
   int filter_prod = std::accumulate(
       conv_p.K.begin(),
       conv_p.K.begin() + SPATIAL_DIM,
@@ -1192,6 +1278,11 @@ int rowwise_sparse_adagrad_fused_ref(
   return current == index_size;
 }
 
+template FBGEMM_API void transposeConvWeights(
+    const conv_param_t<1>& conv_p,
+    const std::int8_t* src,
+    std::int8_t* dest);
+
 template FBGEMM_API void transposeConvWeights(
     const conv_param_t<2>& conv_p,
     const std::int8_t* src,
diff --git a/test/GConvTest.cc b/test/GConvTest.cc
index 737f652416..ae22f108f8 100644
--- a/test/GConvTest.cc
+++ b/test/GConvTest.cc
@@ -25,8 +25,9 @@
 using namespace std;
 using namespace fbgemm;
 
-vector<matrix_op_t> transposeVals{matrix_op_t::NoTranspose,
-                                  matrix_op_t::Transpose};
+vector<matrix_op_t> transposeVals{
+    matrix_op_t::NoTranspose,
+    matrix_op_t::Transpose};
 
 vector<QuantizationGranularity> qGranularityVals{
     QuantizationGranularity::TENSOR,
@@ -271,16 +272,16 @@ void runRequantizeTest(matrix_op_t /* unused */,
     bool a_symmetric, bool b_symmetric) {
   vector<conv_param_t<SPATIAL_DIM>> shapes(GetShapes_<SPATIAL_DIM>());
   for (auto conv_p : shapes) {
-    int T = SPATIAL_DIM == 2 ? 1 : conv_p.K[SPATIAL_DIM - 3];
-    int R = conv_p.K[SPATIAL_DIM - 2];
+    int T = SPATIAL_DIM <= 2 ? 1 : conv_p.K[SPATIAL_DIM - 3];
+    int R = SPATIAL_DIM == 1 ? 1 : conv_p.K[SPATIAL_DIM - 2];
     int S = conv_p.K[SPATIAL_DIM - 1];
     int G = conv_p.G;
     int OC = conv_p.OC;
-    int IT = SPATIAL_DIM == 2 ? 1 : conv_p.IN_DIM[SPATIAL_DIM - 3];
-    int IH = conv_p.IN_DIM[SPATIAL_DIM - 2];
+    int IT = SPATIAL_DIM <= 2 ? 1 : conv_p.IN_DIM[SPATIAL_DIM - 3];
+    int IH = SPATIAL_DIM == 1 ? 1 : conv_p.IN_DIM[SPATIAL_DIM - 2];
     int IW = conv_p.IN_DIM[SPATIAL_DIM - 1];
-    int OT = SPATIAL_DIM == 2 ? 1 : conv_p.OUT_DIM[SPATIAL_DIM - 3];
-    int OH = conv_p.OUT_DIM[SPATIAL_DIM - 2];
+    int OT = SPATIAL_DIM <= 2 ? 1 : conv_p.OUT_DIM[SPATIAL_DIM - 3];
+    int OH = SPATIAL_DIM == 1 ? 1 : conv_p.OUT_DIM[SPATIAL_DIM - 2];
     int OW = conv_p.OUT_DIM[SPATIAL_DIM - 1];
     int IC_per_G = conv_p.IC / conv_p.G;
     int OC_per_G = conv_p.OC / conv_p.G;
@@ -591,8 +592,8 @@ void runPackUnpackTest(matrix_op_t btrans) {
   vector<conv_param_t<SPATIAL_DIM>> shapes(GetShapes_<SPATIAL_DIM>());
 
   for (auto conv_p : shapes) {
-    int T = SPATIAL_DIM == 2 ? 1 : conv_p.K[SPATIAL_DIM - 3];
-    int R = conv_p.K[SPATIAL_DIM - 2];
+    int T = SPATIAL_DIM <= 2 ? 1 : conv_p.K[SPATIAL_DIM - 3];
+    int R = SPATIAL_DIM == 1 ? 1 : conv_p.K[SPATIAL_DIM - 2];
     int S = conv_p.K[SPATIAL_DIM - 1];
     int IC_per_G = conv_p.IC / conv_p.G;
     int OC_per_G = conv_p.OC / conv_p.G;
diff --git a/test/UniConvTest.cc b/test/UniConvTest.cc
index bfc3e516b9..79348e74ae 100644
--- a/test/UniConvTest.cc
+++ b/test/UniConvTest.cc
@@ -26,7 +26,24 @@ vector<QuantizationGranularity> qGranularityVals{
     QuantizationGranularity::OUT_CHANNEL};
 
 // clang-format off
-static vector<conv_param_t<>> GetShapes_() {
+template <int SPATIAL_DIM = 1>
+static typename std::enable_if<SPATIAL_DIM == 1, vector<conv_param_t<1>>>::type
+GetShapes_() {
+  vector<conv_param_t<1>> shapes = {
+    // MB, IC, OC, {IW}, G, {KW}, {stride_w}, {pad_l,pad_r}, {dilation_w}
+    // Regular
+    conv_param_t<1>(1, 16, 16, {30}, 1, {3}, {1}, {1, 1}),
+    conv_param_t<1>(1, 32, 32, {30}, 1, {3}, {1}, {1, 1}),
+    conv_param_t<1>(1, 32, 16, {30}, 1, {3}, {1}, {0, 0}, {2}),
+  };
+  return shapes;
+}
+// clang-format on
+
+// clang-format off
+template <int SPATIAL_DIM = 2>
+static typename std::enable_if<SPATIAL_DIM == 2, vector<conv_param_t<2>>>::type
+GetShapes_() {
   vector<conv_param_t<>> shapes = {
     // MB, IC, OC, {IH, IW}, G, {KH, KW}, {stride_h, stride_w}, {pad_t, pad_l,
     // pad_b, pad_r}, {dilation_h, dilation_w}
@@ -130,6 +147,64 @@ TEST_P(uniConvTest, packingTest) {
   int MB, IC, OC, IT, IH, IW, G, kernel, stride, pad;
   tie(MB, IC, OC, IT, IH, IW, G, kernel, stride, pad) = GetParam();
 
+  conv_param_t<1> conv_p_1d(
+      MB, IC, OC, {IW}, G, {kernel}, {stride}, {pad, pad});
+
+  int kernel_dim_1d = kernel;
+  aligned_vector<int8_t> Bint8_1d(
+      kernel_dim_1d * conv_p_1d.IC * (conv_p_1d.OC / conv_p_1d.G));
+  PackWeightsForConv<1> packedB_1D(conv_p_1d, Bint8_1d.data());
+
+  switch (ConvFastPath<1, int32_t>(conv_p_1d)) {
+    case optimized_conv_t::depthwise: {
+      ASSERT_EQ(packedB_1D.getPackedWForIm2col(), nullptr)
+          << "im2col packed matrix should be null";
+      ASSERT_EQ(packedB_1D.getPackedWForGroupwise(), nullptr)
+          << "groupwise packed matrix should be null";
+      ASSERT_EQ(packedB_1D.getPackedWForPointwise(), nullptr)
+          << "pointwise packed matrix should be null";
+      ASSERT_NE(packedB_1D.getPackedWForDepthwise(), nullptr)
+          << "depthwise packed matrix is null";
+      break;
+    }
+    case optimized_conv_t::groupwise: {
+      ASSERT_EQ(packedB_1D.getPackedWForIm2col(), nullptr)
+          << "im2col packed matrix should be null";
+      ASSERT_EQ(packedB_1D.getPackedWForDepthwise(), nullptr)
+          << "depthwise packed matrix should be null";
+      ASSERT_EQ(packedB_1D.getPackedWForPointwise(), nullptr)
+          << "pointwise packed matrix should be null";
+      ASSERT_NE(packedB_1D.getPackedWForGroupwise(), nullptr)
+          << "Groupwise packed matrix is null";
+      break;
+    }
+    case optimized_conv_t::pointwise: {
+      ASSERT_EQ(packedB_1D.getPackedWForIm2col(), nullptr)
+          << "im2col packed matrix should be null";
+      ASSERT_EQ(packedB_1D.getPackedWForDepthwise(), nullptr)
+          << "depthwise packed matrix should null";
+      ASSERT_EQ(packedB_1D.getPackedWForGroupwise(), nullptr)
+          << "Groupwise packed matrix should be null";
+      ASSERT_NE(packedB_1D.getPackedWForPointwise(), nullptr)
+          << "pointwise packed matrix is null";
+      break;
+    }
+    case optimized_conv_t::fastpath1d: {
+      break;
+    }
+    case optimized_conv_t::im2col: {
+      ASSERT_EQ(packedB_1D.getPackedWForDepthwise(), nullptr)
+          << "depthwise packed matrix should be null";
+      ASSERT_EQ(packedB_1D.getPackedWForGroupwise(), nullptr)
+          << "groupwise packed matrix should be null";
+      ASSERT_EQ(packedB_1D.getPackedWForPointwise(), nullptr)
+          << "pointwise packed matrix should be null";
+      ASSERT_NE(packedB_1D.getPackedWForIm2col(), nullptr)
+          << "im2col packed matrix is null";
+      break;
+    }
+  }
+
   conv_param_t<2> conv_p_2d(
       MB,
       IC,
@@ -179,6 +254,9 @@ TEST_P(uniConvTest, packingTest) {
           << "pointwise packed matrix is null";
       break;
     }
+    case optimized_conv_t::fastpath1d: {
+      break;
+    }
     case optimized_conv_t::im2col: {
       ASSERT_EQ(packedB_2D.getPackedWForDepthwise(), nullptr)
           << "depthwise packed matrix should be null";
@@ -241,6 +319,9 @@ TEST_P(uniConvTest, packingTest) {
           << "pointwise packed matrix is null";
       break;
     }
+    case optimized_conv_t::fastpath1d: {
+      break;
+    }
     case optimized_conv_t::im2col: {
       ASSERT_EQ(packedB_3D.getPackedWForDepthwise(), nullptr)
           << "depthwise packed matrix should be null";
@@ -262,6 +343,23 @@ TEST_P(uniConvTest, packUnpackTest) {
   int MB, IC, OC, IT, IH, IW, G, kernel, stride, pad;
   tie(MB, IC, OC, IT, IH, IW, G, kernel, stride, pad) = GetParam();
 
+  conv_param_t<1> conv_p_1d(
+      MB, IC, OC, {IW}, G, {kernel}, {stride}, {pad, pad});
+
+  int kernel_dim_1d = kernel;
+
+  aligned_vector<int8_t> Bint8_1d(
+      kernel_dim_1d * conv_p_1d.IC * (conv_p_1d.OC / conv_p_1d.G));
+  aligned_vector<int8_t> Bint8_1d_unpacked(
+      kernel_dim_1d * conv_p_1d.IC * (conv_p_1d.OC / conv_p_1d.G));
+
+  PackWeightsForConv<1> packedB_1D(conv_p_1d, Bint8_1d.data());
+
+  packedB_1D.unpack(Bint8_1d_unpacked.data());
+
+  ASSERT_EQ(Bint8_1d, Bint8_1d_unpacked)
+      << "Original and unpacked data elements are not the same [1D]";
+
   conv_param_t<2> conv_p_2d(
       MB,
       IC,
@@ -399,27 +497,30 @@ TEST(uniConvTest, cornerCases) {
  * @brief Unit test for uint8 activations, int8 weights, and 32-bit
  * accumulation. Output processing: requantization -> nothing
  */
-TEST_P(UniConvQGranTest, requantizeTest) {
-  vector<conv_param_t<>> shapes(GetShapes_());
-  QuantizationGranularity q_granularity;
-  bool a_symmetric, b_symmetric;
-  bool test_bias, test_float_bias;
-  tie(q_granularity, a_symmetric, b_symmetric, test_bias, test_float_bias) =
-      GetParam();
+
+template <int SPATIAL_DIM = 2>
+void runRequantizeTest(
+    QuantizationGranularity q_granularity,
+    bool a_symmetric,
+    bool b_symmetric,
+    bool test_bias,
+    bool test_float_bias) {
+  vector<conv_param_t<SPATIAL_DIM>> shapes(GetShapes_<SPATIAL_DIM>());
 
   for (auto conv_p : shapes) {
-    int R = conv_p.K[0];
-    int S = conv_p.K[1];
+    int R = SPATIAL_DIM == 1 ? 1 : conv_p.K[SPATIAL_DIM - 2];
+    int S = conv_p.K[SPATIAL_DIM - 1];
     int G = conv_p.G;
     int OC = conv_p.OC;
-    int OH = conv_p.OUT_DIM[0];
-    int OW = conv_p.OUT_DIM[1];
+    int OH = SPATIAL_DIM == 1 ? 1 : conv_p.OUT_DIM[SPATIAL_DIM - 2];
+    int OW = conv_p.OUT_DIM[SPATIAL_DIM - 1];
     int IC_per_G = conv_p.IC / conv_p.G;
     int OC_per_G = conv_p.OC / conv_p.G;
+    int IH = SPATIAL_DIM == 1 ? 1 : conv_p.IN_DIM[SPATIAL_DIM - 2];
+    int IW = conv_p.IN_DIM[SPATIAL_DIM - 1];
 
     // activations
-    aligned_vector<uint8_t> Aint8(
-        conv_p.MB * conv_p.IN_DIM[0] * conv_p.IN_DIM[1] * conv_p.IC, 0);
+    aligned_vector<uint8_t> Aint8(conv_p.MB * IH * IW * conv_p.IC, 0);
 
     // weights
     // The weight matrix is in layout G K/G (R S C/G)
@@ -550,7 +651,7 @@ TEST_P(UniConvQGranTest, requantizeTest) {
           ncols_per_quant_group);
     }
 
-    PackWeightsForConv<2> packedWeights(conv_p, Bint8.data());
+    PackWeightsForConv<SPATIAL_DIM> packedWeights(conv_p, Bint8.data());
 
     // TODO: Uncomment once we support multiple threads in fbgemmGroupwiseConv
     // #ifdef _OPENMP
@@ -724,3 +825,16 @@ TEST_P(UniConvQGranTest, requantizeTest) {
         static_cast<uint8_t>(0));
   } // for each shape
 }
+
+TEST_P(UniConvQGranTest, requantizeTest) {
+  QuantizationGranularity q_granularity;
+  bool a_symmetric, b_symmetric;
+  bool test_bias, test_float_bias;
+  tie(q_granularity, a_symmetric, b_symmetric, test_bias, test_float_bias) =
+      GetParam();
+
+  runRequantizeTest<1>(
+      q_granularity, a_symmetric, b_symmetric, test_bias, test_float_bias);
+  runRequantizeTest<2>(
+      q_granularity, a_symmetric, b_symmetric, test_bias, test_float_bias);
+}