Skip to content

Commit

Permalink
Add fftconv cuda impl from safari
Browse files Browse the repository at this point in the history
Signed-off-by: Guy Jacob <[email protected]>
guyjacob committed May 16, 2024
1 parent 9f45c0a commit 104d454
Showing 603 changed files with 1,785,640 additions and 13 deletions.
241 changes: 241 additions & 0 deletions nemo/collections/nlp/modules/common/hyena/csrc/fftconv/fftconv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
#include <vector>
#include <utility>
#include <cmath>
#include <torch/extension.h>

#include <cuda/std/complex>
#include <cuda_fp16.h>

#define CHECK_DEVICE(x) TORCH_CHECK(x.device().type() == torch::kCUDA, #x " must be on CUDA")
#define CHECK_SHAPE(x, ...) TORCH_CHECK(x.sizes() == torch::IntArrayRef({__VA_ARGS__}), #x " must have shape (" #__VA_ARGS__ ")")

#define DISPATCH_FLOAT_AND_HALF_AND_BF16(INTYPE, OUTTYPE, NAME, ...) \
if (INTYPE == at::ScalarType::Half) { \
using input_t = at::Half; \
using output_t = at::Half; \
__VA_ARGS__(); \
} else if (INTYPE == at::ScalarType::BFloat16) { \
using input_t = at::BFloat16; \
using output_t = at::BFloat16; \
__VA_ARGS__(); \
} else if ((INTYPE == at::ScalarType::Float) && (OUTTYPE == at::ScalarType::Float)) { \
using input_t = float; \
using output_t = float; \
__VA_ARGS__(); \
} else if ((INTYPE == at::ScalarType::Float) && (OUTTYPE == at::ScalarType::Half)) { \
using input_t = float; \
using output_t = at::Half; \
__VA_ARGS__(); \
} else { \
AT_ERROR(#NAME, " not implemented for in-type '", toString(INTYPE), "' and out-type '", toString(OUTTYPE), "'"); \
}

template <typename input_t, typename output_t=input_t>
void fftconv_fwd_cuda_dispatch(
const input_t *u, const c10::complex<float> *filter,
const input_t *v, int head_dim, const input_t *q,
const float *D, const float *dropout_mask, output_t *out,
bool gelu, bool gelu_inp, bool gelu_q, int batch_size, int H, int signal_size,
size_t batch_stride, size_t H_stride, int fft_size, bool output_hbl_layout, bool fftfp16);

template <typename input_t, typename output_t=input_t>
void fftconv_bwd_cuda_dispatch(
const output_t *dout,
const input_t *u, const c10::complex<float> *filter,
const input_t *v, int head_dim, const input_t *q,
const float *D, const float *dropout_mask,
input_t *du, c10::complex<float> *dfilter, float *dD,
float *dv, input_t *dq,
bool gelu, bool gelu_inp, bool gelu_q, int batch_size, int H, int signal_size,
size_t batch_stride, size_t H_stride, int fft_size,
bool output_hbl_layout, bool fftfp16);

torch::Tensor fftconv_fwd(torch::Tensor u, torch::Tensor filter,
torch::Tensor D,
c10::optional<torch::Tensor> v, int head_dim,
c10::optional<torch::Tensor> q,
c10::optional<torch::Tensor> dropout_mask,
bool gelu, bool gelu_inp, bool gelu_q, int fft_size,
bool force_fp16_output, bool output_hbl_layout,
bool fftfp16
) {
CHECK_DEVICE(u);
CHECK_DEVICE(filter);
CHECK_DEVICE(D);

TORCH_CHECK(u.stride(-1) == 1);
TORCH_CHECK(filter.is_contiguous());
TORCH_CHECK(D.is_contiguous());

const int batch_size = u.size(0);
const int H = u.size(1);
const int L = u.size(2);
CHECK_SHAPE(u, batch_size, H, L);
CHECK_SHAPE(filter, H / head_dim, fft_size / 2 + 1);
CHECK_SHAPE(D, H / head_dim);

TORCH_CHECK(u.dtype() == torch::kFloat16 || u.dtype() == torch::kFloat32 || u.dtype() == torch::kBFloat16);
// TODO: check filter.dtype is complex64 (no complex32)
TORCH_CHECK(D.dtype() == torch::kFloat32);

if (dropout_mask.has_value()) {
auto dropout_mask_value = dropout_mask.value();
CHECK_DEVICE(dropout_mask_value);
CHECK_SHAPE(dropout_mask_value, batch_size, H);
TORCH_CHECK(dropout_mask_value.dtype() == torch::kFloat32);
}
if (v.has_value()) {
auto v_value = v.value();
CHECK_DEVICE(v_value);
CHECK_SHAPE(v_value, batch_size, H, L);
TORCH_CHECK(v_value.stride(-1) == 1);
TORCH_CHECK(v_value.stride(0) == u.stride(0) && v_value.stride(1) == u.stride(1));
TORCH_CHECK(v_value.dtype() == u.dtype());
}
if (q.has_value()) {
auto q_value = q.value();
CHECK_DEVICE(q_value);
CHECK_SHAPE(q_value, batch_size, H, L);
TORCH_CHECK(q_value.stride(-1) == 1);
TORCH_CHECK(q_value.stride(0) == u.stride(0) && q_value.stride(1) == u.stride(1));
TORCH_CHECK(q_value.dtype() == u.dtype());
}

TORCH_CHECK((!gelu_inp) && (!gelu_q));
TORCH_CHECK((H % head_dim) == 0);
TORCH_CHECK(!fftfp16 || head_dim == 8); // fp16 only suported for head dim 8

auto opts = u.options();
at::ScalarType u_dtype = ::detail::scalar_type(u.scalar_type());
if (u.dtype() == at::ScalarType::BFloat16) { force_fp16_output = false; }
auto out = !output_hbl_layout
? torch::empty({batch_size, H, L}, opts.dtype(force_fp16_output ? torch::kFloat16 : u_dtype))
: torch::empty({H, batch_size, L}, opts.dtype(force_fp16_output ? torch::kFloat16 : u_dtype)).permute({1, 0, 2});
TORCH_CHECK((L <= fft_size / 2) && (L % 2 == 0));
TORCH_CHECK(fft_size >= 16 && fft_size <= 16384 && (fft_size == 1 << int(log2(float(fft_size)))));

size_t batch_stride = u.stride(0), H_stride = u.stride(1);
DISPATCH_FLOAT_AND_HALF_AND_BF16(u.scalar_type(), out.scalar_type(), "fftconv_fwd", [&] {
fftconv_fwd_cuda_dispatch(
static_cast<input_t *>(u.data_ptr()),
static_cast<c10::complex<float> *>(filter.data_ptr()),
v.has_value() ? static_cast<input_t *>(v.value().data_ptr()) : nullptr,
head_dim,
q.has_value() ? static_cast<input_t *>(q.value().data_ptr()) : nullptr,
static_cast<float *>(D.data_ptr()),
dropout_mask.has_value() ? static_cast<float *>(dropout_mask.value().data_ptr()) : nullptr,
static_cast<output_t *>(out.data_ptr()),
gelu, gelu_inp, gelu_q, batch_size, H, L, batch_stride, H_stride, fft_size,
output_hbl_layout, fftfp16);
});
return out;
}

std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
fftconv_bwd(torch::Tensor dout,
torch::Tensor u,
torch::Tensor filter,
torch::Tensor D,
c10::optional<torch::Tensor> v, int head_dim,
c10::optional<torch::Tensor> q,
c10::optional<torch::Tensor> dropout_mask,
bool gelu, bool gelu_inp, bool gelu_q, int fft_size,
bool output_hbl_layout, bool fftfp16) {
CHECK_DEVICE(dout);
CHECK_DEVICE(u);
CHECK_DEVICE(filter);
CHECK_DEVICE(D);

TORCH_CHECK(u.stride(-1) == 1);
TORCH_CHECK(filter.is_contiguous());
TORCH_CHECK(D.is_contiguous());

const int batch_size = u.size(0);
const int H = u.size(1);
const int L = u.size(2);
CHECK_SHAPE(dout, batch_size, H, L);
CHECK_SHAPE(u, batch_size, H, L);
CHECK_SHAPE(filter, H / head_dim, fft_size / 2 + 1);
CHECK_SHAPE(D, H / head_dim);
if (!output_hbl_layout) {
TORCH_CHECK(dout.is_contiguous());
} else {
// Previously we were checking
// TORCH_CHECK(dout.stride(1) == batch_size * L && dout.stride(0) == L)
// but this fails for the edge case of batch_size=1, where shape (H, 1, L)
// is already contiguous, and dout.stride(0) = L * H in that case.
TORCH_CHECK(dout.permute({1, 0, 2}).is_contiguous());
}

TORCH_CHECK(dout.dtype() == torch::kFloat16 || dout.dtype() == torch::kFloat32 || dout.dtype() == torch::kBFloat16);
TORCH_CHECK(u.dtype() == torch::kFloat16 || u.dtype() == torch::kFloat32 || u.dtype() == torch::kBFloat16);
TORCH_CHECK(D.dtype() == torch::kFloat32);

auto opts = u.options();

torch::Tensor dv;
torch::Tensor dq;

if (dropout_mask.has_value()) {
auto dropout_mask_value = dropout_mask.value();
CHECK_DEVICE(dropout_mask_value);
CHECK_SHAPE(dropout_mask_value, batch_size, H);
TORCH_CHECK(dropout_mask_value.dtype() == torch::kFloat32);
}
if (v.has_value()) {
auto v_value = v.value();
CHECK_DEVICE(v_value);
CHECK_SHAPE(v_value, batch_size, H, L);
TORCH_CHECK(v_value.stride(-1) == 1);
TORCH_CHECK(v_value.stride(0) == u.stride(0) && v_value.stride(1) == u.stride(1));
TORCH_CHECK(v_value.dtype() == u.dtype());
dv = torch::zeros_like(v_value, opts.dtype(torch::kFloat));
}
if (q.has_value()) {
auto q_value = q.value();
CHECK_DEVICE(q_value);
CHECK_SHAPE(q_value, batch_size, H, L);
TORCH_CHECK(q_value.stride(-1) == 1);
TORCH_CHECK(q_value.stride(0) == u.stride(0) && q_value.stride(1) == u.stride(1));
TORCH_CHECK(q_value.dtype() == u.dtype());
dq = torch::empty_like(q_value);
}

TORCH_CHECK((!gelu_inp) && (!gelu_q));
TORCH_CHECK((H % head_dim) == 0);
TORCH_CHECK(!fftfp16 || head_dim == 8); // fp16 only suported for head dim 8

auto du = torch::empty_like(u);
auto dfilter = torch::empty({batch_size, H / head_dim, head_dim, fft_size / 2 + 1}, opts.dtype(filter.dtype()));
auto dD = torch::empty({batch_size, H / head_dim, head_dim}, opts.dtype(torch::kFloat));

TORCH_CHECK((L <= fft_size / 2) && (L % 2 == 0));
TORCH_CHECK(fft_size >= 16 && fft_size <= 16384 && (fft_size == 1 << int(log2(float(fft_size)))));

size_t batch_stride = u.stride(0), H_stride = u.stride(1);
DISPATCH_FLOAT_AND_HALF_AND_BF16(u.scalar_type(), dout.scalar_type(), "fftconv_bwd", [&] {
fftconv_bwd_cuda_dispatch(
static_cast<output_t *>(dout.data_ptr()),
static_cast<input_t *>(u.data_ptr()),
static_cast<c10::complex<float> *>(filter.data_ptr()),
v.has_value() ? static_cast<input_t *>(v.value().data_ptr()) : nullptr,
head_dim,
q.has_value() ? static_cast<input_t *>(q.value().data_ptr()) : nullptr,
static_cast<float *>(D.data_ptr()),
dropout_mask.has_value() ? static_cast<float *>(dropout_mask.value().data_ptr()) : nullptr,
static_cast<input_t *>(du.data_ptr()),
static_cast<c10::complex<float> *>(dfilter.data_ptr()),
static_cast<float *>(dD.data_ptr()),
v.has_value() ? static_cast<float *>(dv.data_ptr()) : nullptr,
q.has_value() ? static_cast<input_t *>(dq.data_ptr()) : nullptr,
gelu, gelu_inp, gelu_q, batch_size, H, L, batch_stride, H_stride, fft_size,
output_hbl_layout, fftfp16);
});

return std::make_tuple(du, dfilter.sum(/*dim=*/std::vector<int64_t>{0, 2}), dD.sum(/*dim=*/std::vector<int64_t>{0, 2}), dv, dq);
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("fftconv_fwd", &fftconv_fwd, "Convolution with FFT");
m.def("fftconv_bwd", &fftconv_bwd, "Convolution with FFT, backward");
}
1,981 changes: 1,981 additions & 0 deletions nemo/collections/nlp/modules/common/hyena/csrc/fftconv/fftconv_cuda.cu

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import torch
import torch.nn.functional as F

from einops import rearrange

from fftconv import fftconv_fwd, fftconv_bwd


def fftconv_ref(u, k, D, dropout_mask):
seqlen = u.shape[-1]
fft_size = 2 * seqlen
k_f = torch.fft.rfft(k, n=fft_size) / fft_size
u_f = torch.fft.rfft(u.to(dtype=k.dtype), n=fft_size)
y = torch.fft.irfft(u_f * k_f, n=fft_size, norm='forward')[..., :seqlen]
out = y + u * D.unsqueeze(-1)
return (F.gelu(out) * rearrange(dropout_mask, 'b H -> b H 1')).to(dtype=u.dtype)


def fftconv_fast(u, k, D, dropout_mask):
"""Fuse padding + rfft + pointwise mult + ifft + multiply with D + gelu + dropout
"""
seqlen = u.shape[-1]
fft_size = 2 * seqlen
k_f = torch.fft.rfft(k, n=fft_size)
out = fftconv_fwd(u, k_f, D, dropout_mask, fft_size)
return out


def fftconv_fast_bwd(dout, u, k, D, dropout_mask=None):
seqlen = u.shape[-1]
fft_size = 2 * seqlen
k_f = torch.fft.rfft(k, n=fft_size)
dx, dk_f, dD = fftconv_bwd(dout, u, k_f, D, dropout_mask, fft_size)
dk = torch.fft.irfft(dk_f, n=fft_size, norm='forward')[..., :seqlen]
return dx, dk, dD


device = 'cuda'
dtype = torch.float32
# dtype = torch.float16
batch_size = 64
H = 256
fft_size = 2048
seqlen = 1024
dropout_prob = 0.37

torch.manual_seed(0)
u = torch.randn(batch_size, H, seqlen, device=device, dtype=dtype, requires_grad=True)
k = torch.randn(H, seqlen, device=device, requires_grad=True)
D = torch.randn(H, device=device, requires_grad=True)
dropout_mask = F.dropout(torch.ones(batch_size, H, device=device), dropout_prob)

out = fftconv_ref(u, k, D, dropout_mask)
out = fftconv_fast(u, k, D, dropout_mask)
g = torch.randn_like(out)
fftconv_fast_bwd(g, u, k, D, dropout_mask)
240 changes: 240 additions & 0 deletions nemo/collections/nlp/modules/common/hyena/csrc/fftconv/lut.h

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import math

import re
import numpy as np


# N = 8192
N = 16384
# The case of 0 / N is special, we want to simplify it to 0 / 2 instead of 0 / 1
numerator = np.arange(1, N // 8 + 1)
gcd = np.gcd(numerator, N)
num = numerator // gcd
denom = N // gcd
lut_vals = ['T_2_0'] + [f'T_{d}_{n}' for n, d in zip(num, denom)]
lut_string = f"static const __device__ float2 lut_mine_sp_8_{N}[{N // 8 + 1}] = {{\n {','.join(lut_vals)}\n}};"
print(lut_string)

# Only define new values if it's not already in the cuFFTDx lookup table
cufftdx_lut_filename = 'mathdx/22.02/include/cufftdx/include/database/lut_defines_0.hpp.inc'
matches = set()
reg = re.compile(f'^#define T_{N}_([0-9]+) ')
with open(cufftdx_lut_filename, 'r') as f:
for line in f:
if (match := reg.match(line)) is not None:
matches.add(int(match[1]))

numerator = np.arange(1, N // 8 + 1, 2)
angle = -2 * math.pi * numerator.astype(np.float64) / N
cos, sin = np.cos(angle), np.sin(angle)
defs = [f'#define T_{N}_{n} {{{c:.40f},{s:.40f}}}' for n, c, s in zip(numerator, cos, sin) if n not in matches]
def_string = '\n'.join(defs)
print(def_string)
72 changes: 72 additions & 0 deletions nemo/collections/nlp/modules/common/hyena/csrc/fftconv/map.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Downloaded from https://github.com/swansontec/map-macro

/*
* Copyright (C) 2012 William Swanson
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Except as contained in this notice, the names of the authors or
* their institutions shall not be used in advertising or otherwise to
* promote the sale, use or other dealings in this Software without
* prior written authorization from the authors.
*/

#ifndef MAP_H_INCLUDED
#define MAP_H_INCLUDED

#define EVAL0(...) __VA_ARGS__
#define EVAL1(...) EVAL0(EVAL0(EVAL0(__VA_ARGS__)))
#define EVAL2(...) EVAL1(EVAL1(EVAL1(__VA_ARGS__)))
#define EVAL3(...) EVAL2(EVAL2(EVAL2(__VA_ARGS__)))
#define EVAL4(...) EVAL3(EVAL3(EVAL3(__VA_ARGS__)))
#define EVAL(...) EVAL4(EVAL4(EVAL4(__VA_ARGS__)))

#define MAP_END(...)
#define MAP_OUT
#define MAP_COMMA ,

#define MAP_GET_END2() 0, MAP_END
#define MAP_GET_END1(...) MAP_GET_END2
#define MAP_GET_END(...) MAP_GET_END1
#define MAP_NEXT0(test, next, ...) next MAP_OUT
#define MAP_NEXT1(test, next) MAP_NEXT0(test, next, 0)
#define MAP_NEXT(test, next) MAP_NEXT1(MAP_GET_END test, next)

#define MAP0(f, x, peek, ...) f(x) MAP_NEXT(peek, MAP1)(f, peek, __VA_ARGS__)
#define MAP1(f, x, peek, ...) f(x) MAP_NEXT(peek, MAP0)(f, peek, __VA_ARGS__)

#define MAP_LIST_NEXT1(test, next) MAP_NEXT0(test, MAP_COMMA next, 0)
#define MAP_LIST_NEXT(test, next) MAP_LIST_NEXT1(MAP_GET_END test, next)

#define MAP_LIST0(f, x, peek, ...) f(x) MAP_LIST_NEXT(peek, MAP_LIST1)(f, peek, __VA_ARGS__)
#define MAP_LIST1(f, x, peek, ...) f(x) MAP_LIST_NEXT(peek, MAP_LIST0)(f, peek, __VA_ARGS__)

/**
* Applies the function macro `f` to each of the remaining parameters.
*/
#define MAP(f, ...) EVAL(MAP1(f, __VA_ARGS__, ()()(), ()()(), ()()(), 0))

/**
* Applies the function macro `f` to each of the remaining parameters and
* inserts commas between the results.
*/
#define MAP_LIST(f, ...) EVAL(MAP_LIST1(f, __VA_ARGS__, ()()(), ()()(), ()()(), 0))

#endif

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.wy-nav-content {
max-width: 1240px !important;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,326 @@
/*
* doctools.js
* ~~~~~~~~~~~
*
* Sphinx JavaScript utilities for all documentation.
*
* :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/

/**
* select a different prefix for underscore
*/
$u = _.noConflict();

/**
* make the code below compatible with browsers without
* an installed firebug like debugger
if (!window.console || !console.firebug) {
var names = ["log", "debug", "info", "warn", "error", "assert", "dir",
"dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace",
"profile", "profileEnd"];
window.console = {};
for (var i = 0; i < names.length; ++i)
window.console[names[i]] = function() {};
}
*/

/**
* small helper function to urldecode strings
*
* See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL
*/
jQuery.urldecode = function(x) {
if (!x) {
return x
}
return decodeURIComponent(x.replace(/\+/g, ' '));
};

/**
* small helper function to urlencode strings
*/
jQuery.urlencode = encodeURIComponent;

/**
* This function returns the parsed url parameters of the
* current request. Multiple values per key are supported,
* it will always return arrays of strings for the value parts.
*/
jQuery.getQueryParameters = function(s) {
if (typeof s === 'undefined')
s = document.location.search;
var parts = s.substr(s.indexOf('?') + 1).split('&');
var result = {};
for (var i = 0; i < parts.length; i++) {
var tmp = parts[i].split('=', 2);
var key = jQuery.urldecode(tmp[0]);
var value = jQuery.urldecode(tmp[1]);
if (key in result)
result[key].push(value);
else
result[key] = [value];
}
return result;
};

/**
* highlight a given string on a jquery object by wrapping it in
* span elements with the given class name.
*/
jQuery.fn.highlightText = function(text, className) {
function highlight(node, addItems) {
if (node.nodeType === 3) {
var val = node.nodeValue;
var pos = val.toLowerCase().indexOf(text);
if (pos >= 0 &&
!jQuery(node.parentNode).hasClass(className) &&
!jQuery(node.parentNode).hasClass("nohighlight")) {
var span;
var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg");
if (isInSVG) {
span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
} else {
span = document.createElement("span");
span.className = className;
}
span.appendChild(document.createTextNode(val.substr(pos, text.length)));
node.parentNode.insertBefore(span, node.parentNode.insertBefore(
document.createTextNode(val.substr(pos + text.length)),
node.nextSibling));
node.nodeValue = val.substr(0, pos);
if (isInSVG) {
var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
var bbox = node.parentElement.getBBox();
rect.x.baseVal.value = bbox.x;
rect.y.baseVal.value = bbox.y;
rect.width.baseVal.value = bbox.width;
rect.height.baseVal.value = bbox.height;
rect.setAttribute('class', className);
addItems.push({
"parent": node.parentNode,
"target": rect});
}
}
}
else if (!jQuery(node).is("button, select, textarea")) {
jQuery.each(node.childNodes, function() {
highlight(this, addItems);
});
}
}
var addItems = [];
var result = this.each(function() {
highlight(this, addItems);
});
for (var i = 0; i < addItems.length; ++i) {
jQuery(addItems[i].parent).before(addItems[i].target);
}
return result;
};

/*
* backward compatibility for jQuery.browser
* This will be supported until firefox bug is fixed.
*/
if (!jQuery.browser) {
jQuery.uaMatch = function(ua) {
ua = ua.toLowerCase();

var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
/(webkit)[ \/]([\w.]+)/.exec(ua) ||
/(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
/(msie) ([\w.]+)/.exec(ua) ||
ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
[];

return {
browser: match[ 1 ] || "",
version: match[ 2 ] || "0"
};
};
jQuery.browser = {};
jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
}

/**
* Small JavaScript module for the documentation.
*/
var Documentation = {

init : function() {
this.fixFirefoxAnchorBug();
this.highlightSearchWords();
this.initIndexTable();
if (DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) {
this.initOnKeyListeners();
}
},

/**
* i18n support
*/
TRANSLATIONS : {},
PLURAL_EXPR : function(n) { return n === 1 ? 0 : 1; },
LOCALE : 'unknown',

// gettext and ngettext don't access this so that the functions
// can safely bound to a different name (_ = Documentation.gettext)
gettext : function(string) {
var translated = Documentation.TRANSLATIONS[string];
if (typeof translated === 'undefined')
return string;
return (typeof translated === 'string') ? translated : translated[0];
},

ngettext : function(singular, plural, n) {
var translated = Documentation.TRANSLATIONS[singular];
if (typeof translated === 'undefined')
return (n == 1) ? singular : plural;
return translated[Documentation.PLURALEXPR(n)];
},

addTranslations : function(catalog) {
for (var key in catalog.messages)
this.TRANSLATIONS[key] = catalog.messages[key];
this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')');
this.LOCALE = catalog.locale;
},

/**
* add context elements like header anchor links
*/
addContextElements : function() {
$('div[id] > :header:first').each(function() {
$('<a class="headerlink">\u00B6</a>').
attr('href', '#' + this.id).
attr('title', _('Permalink to this headline')).
appendTo(this);
});
$('dt[id]').each(function() {
$('<a class="headerlink">\u00B6</a>').
attr('href', '#' + this.id).
attr('title', _('Permalink to this definition')).
appendTo(this);
});
},

/**
* workaround a firefox stupidity
* see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075
*/
fixFirefoxAnchorBug : function() {
if (document.location.hash && $.browser.mozilla)
window.setTimeout(function() {
document.location.href += '';
}, 10);
},

/**
* highlight the search words provided in the url in the text
*/
highlightSearchWords : function() {
var params = $.getQueryParameters();
var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : [];
if (terms.length) {
var body = $('div.body');
if (!body.length) {
body = $('body');
}
window.setTimeout(function() {
$.each(terms, function() {
body.highlightText(this.toLowerCase(), 'highlighted');
});
}, 10);
$('<p class="highlight-link"><a href="javascript:Documentation.' +
'hideSearchWords()">' + _('Hide Search Matches') + '</a></p>')
.appendTo($('#searchbox'));
}
},

/**
* init the domain index toggle buttons
*/
initIndexTable : function() {
var togglers = $('img.toggler').click(function() {
var src = $(this).attr('src');
var idnum = $(this).attr('id').substr(7);
$('tr.cg-' + idnum).toggle();
if (src.substr(-9) === 'minus.png')
$(this).attr('src', src.substr(0, src.length-9) + 'plus.png');
else
$(this).attr('src', src.substr(0, src.length-8) + 'minus.png');
}).css('display', '');
if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) {
togglers.click();
}
},

/**
* helper function to hide the search marks again
*/
hideSearchWords : function() {
$('#searchbox .highlight-link').fadeOut(300);
$('span.highlighted').removeClass('highlighted');
var url = new URL(window.location);
url.searchParams.delete('highlight');
window.history.replaceState({}, '', url);
},

/**
* make the url absolute
*/
makeURL : function(relativeURL) {
return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL;
},

/**
* get the current relative url
*/
getCurrentURL : function() {
var path = document.location.pathname;
var parts = path.split(/\//);
$.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() {
if (this === '..')
parts.pop();
});
var url = parts.join('/');
return path.substring(url.lastIndexOf('/') + 1, path.length - 1);
},

initOnKeyListeners: function() {
$(document).keydown(function(event) {
var activeElementType = document.activeElement.tagName;
// don't navigate when in search box, textarea, dropdown or button
if (activeElementType !== 'TEXTAREA' && activeElementType !== 'INPUT' && activeElementType !== 'SELECT'
&& activeElementType !== 'BUTTON' && !event.altKey && !event.ctrlKey && !event.metaKey
&& !event.shiftKey) {
switch (event.keyCode) {
case 37: // left
var prevHref = $('link[rel="prev"]').prop('href');
if (prevHref) {
window.location.href = prevHref;
return false;
}
break;
case 39: // right
var nextHref = $('link[rel="next"]').prop('href');
if (nextHref) {
window.location.href = nextHref;
return false;
}
break;
}
}
});
}
};

// quick alias for translations
_ = Documentation.gettext;

$(document).ready(function() {
Documentation.init();
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
var DOCUMENTATION_OPTIONS = {
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
VERSION: '1.0.0',
LANGUAGE: 'None',
COLLAPSE_INDEX: false,
BUILDER: 'html',
FILE_SUFFIX: '.html',
LINK_SUFFIX: '.html',
HAS_SOURCE: false,
SOURCELINK_SUFFIX: '.txt',
NAVIGATION_WITH_KEYS: false
};
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,297 @@
/*
* language_data.js
* ~~~~~~~~~~~~~~~~
*
* This script contains the language-specific data used by searchtools.js,
* namely the list of stopwords, stemmer, scorer and splitter.
*
* :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/

var stopwords = ["a","and","are","as","at","be","but","by","for","if","in","into","is","it","near","no","not","of","on","or","such","that","the","their","then","there","these","they","this","to","was","will","with"];


/* Non-minified version is copied as a separate JS file, is available */

/**
* Porter Stemmer
*/
var Stemmer = function() {

var step2list = {
ational: 'ate',
tional: 'tion',
enci: 'ence',
anci: 'ance',
izer: 'ize',
bli: 'ble',
alli: 'al',
entli: 'ent',
eli: 'e',
ousli: 'ous',
ization: 'ize',
ation: 'ate',
ator: 'ate',
alism: 'al',
iveness: 'ive',
fulness: 'ful',
ousness: 'ous',
aliti: 'al',
iviti: 'ive',
biliti: 'ble',
logi: 'log'
};

var step3list = {
icate: 'ic',
ative: '',
alize: 'al',
iciti: 'ic',
ical: 'ic',
ful: '',
ness: ''
};

var c = "[^aeiou]"; // consonant
var v = "[aeiouy]"; // vowel
var C = c + "[^aeiouy]*"; // consonant sequence
var V = v + "[aeiou]*"; // vowel sequence

var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
var s_v = "^(" + C + ")?" + v; // vowel in stem

this.stemWord = function (w) {
var stem;
var suffix;
var firstch;
var origword = w;

if (w.length < 3)
return w;

var re;
var re2;
var re3;
var re4;

firstch = w.substr(0,1);
if (firstch == "y")
w = firstch.toUpperCase() + w.substr(1);

// Step 1a
re = /^(.+?)(ss|i)es$/;
re2 = /^(.+?)([^s])s$/;

if (re.test(w))
w = w.replace(re,"$1$2");
else if (re2.test(w))
w = w.replace(re2,"$1$2");

// Step 1b
re = /^(.+?)eed$/;
re2 = /^(.+?)(ed|ing)$/;
if (re.test(w)) {
var fp = re.exec(w);
re = new RegExp(mgr0);
if (re.test(fp[1])) {
re = /.$/;
w = w.replace(re,"");
}
}
else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1];
re2 = new RegExp(s_v);
if (re2.test(stem)) {
w = stem;
re2 = /(at|bl|iz)$/;
re3 = new RegExp("([^aeiouylsz])\\1$");
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re2.test(w))
w = w + "e";
else if (re3.test(w)) {
re = /.$/;
w = w.replace(re,"");
}
else if (re4.test(w))
w = w + "e";
}
}

// Step 1c
re = /^(.+?)y$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(s_v);
if (re.test(stem))
w = stem + "i";
}

// Step 2
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem))
w = stem + step2list[suffix];
}

// Step 3
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem))
w = stem + step3list[suffix];
}

// Step 4
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
re2 = /^(.+?)(s|t)(ion)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
if (re.test(stem))
w = stem;
}
else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1] + fp[2];
re2 = new RegExp(mgr1);
if (re2.test(stem))
w = stem;
}

// Step 5
re = /^(.+?)e$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
re2 = new RegExp(meq1);
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
w = stem;
}
re = /ll$/;
re2 = new RegExp(mgr1);
if (re.test(w) && re2.test(w)) {
re = /.$/;
w = w.replace(re,"");
}

// and turn initial Y back to y
if (firstch == "y")
w = firstch.toLowerCase() + w.substr(1);
return w;
}
}




var splitChars = (function() {
var result = {};
var singles = [96, 180, 187, 191, 215, 247, 749, 885, 903, 907, 909, 930, 1014, 1648,
1748, 1809, 2416, 2473, 2481, 2526, 2601, 2609, 2612, 2615, 2653, 2702,
2706, 2729, 2737, 2740, 2857, 2865, 2868, 2910, 2928, 2948, 2961, 2971,
2973, 3085, 3089, 3113, 3124, 3213, 3217, 3241, 3252, 3295, 3341, 3345,
3369, 3506, 3516, 3633, 3715, 3721, 3736, 3744, 3748, 3750, 3756, 3761,
3781, 3912, 4239, 4347, 4681, 4695, 4697, 4745, 4785, 4799, 4801, 4823,
4881, 5760, 5901, 5997, 6313, 7405, 8024, 8026, 8028, 8030, 8117, 8125,
8133, 8181, 8468, 8485, 8487, 8489, 8494, 8527, 11311, 11359, 11687, 11695,
11703, 11711, 11719, 11727, 11735, 12448, 12539, 43010, 43014, 43019, 43587,
43696, 43713, 64286, 64297, 64311, 64317, 64319, 64322, 64325, 65141];
var i, j, start, end;
for (i = 0; i < singles.length; i++) {
result[singles[i]] = true;
}
var ranges = [[0, 47], [58, 64], [91, 94], [123, 169], [171, 177], [182, 184], [706, 709],
[722, 735], [741, 747], [751, 879], [888, 889], [894, 901], [1154, 1161],
[1318, 1328], [1367, 1368], [1370, 1376], [1416, 1487], [1515, 1519], [1523, 1568],
[1611, 1631], [1642, 1645], [1750, 1764], [1767, 1773], [1789, 1790], [1792, 1807],
[1840, 1868], [1958, 1968], [1970, 1983], [2027, 2035], [2038, 2041], [2043, 2047],
[2070, 2073], [2075, 2083], [2085, 2087], [2089, 2307], [2362, 2364], [2366, 2383],
[2385, 2391], [2402, 2405], [2419, 2424], [2432, 2436], [2445, 2446], [2449, 2450],
[2483, 2485], [2490, 2492], [2494, 2509], [2511, 2523], [2530, 2533], [2546, 2547],
[2554, 2564], [2571, 2574], [2577, 2578], [2618, 2648], [2655, 2661], [2672, 2673],
[2677, 2692], [2746, 2748], [2750, 2767], [2769, 2783], [2786, 2789], [2800, 2820],
[2829, 2830], [2833, 2834], [2874, 2876], [2878, 2907], [2914, 2917], [2930, 2946],
[2955, 2957], [2966, 2968], [2976, 2978], [2981, 2983], [2987, 2989], [3002, 3023],
[3025, 3045], [3059, 3076], [3130, 3132], [3134, 3159], [3162, 3167], [3170, 3173],
[3184, 3191], [3199, 3204], [3258, 3260], [3262, 3293], [3298, 3301], [3312, 3332],
[3386, 3388], [3390, 3423], [3426, 3429], [3446, 3449], [3456, 3460], [3479, 3481],
[3518, 3519], [3527, 3584], [3636, 3647], [3655, 3663], [3674, 3712], [3717, 3718],
[3723, 3724], [3726, 3731], [3752, 3753], [3764, 3772], [3774, 3775], [3783, 3791],
[3802, 3803], [3806, 3839], [3841, 3871], [3892, 3903], [3949, 3975], [3980, 4095],
[4139, 4158], [4170, 4175], [4182, 4185], [4190, 4192], [4194, 4196], [4199, 4205],
[4209, 4212], [4226, 4237], [4250, 4255], [4294, 4303], [4349, 4351], [4686, 4687],
[4702, 4703], [4750, 4751], [4790, 4791], [4806, 4807], [4886, 4887], [4955, 4968],
[4989, 4991], [5008, 5023], [5109, 5120], [5741, 5742], [5787, 5791], [5867, 5869],
[5873, 5887], [5906, 5919], [5938, 5951], [5970, 5983], [6001, 6015], [6068, 6102],
[6104, 6107], [6109, 6111], [6122, 6127], [6138, 6159], [6170, 6175], [6264, 6271],
[6315, 6319], [6390, 6399], [6429, 6469], [6510, 6511], [6517, 6527], [6572, 6592],
[6600, 6607], [6619, 6655], [6679, 6687], [6741, 6783], [6794, 6799], [6810, 6822],
[6824, 6916], [6964, 6980], [6988, 6991], [7002, 7042], [7073, 7085], [7098, 7167],
[7204, 7231], [7242, 7244], [7294, 7400], [7410, 7423], [7616, 7679], [7958, 7959],
[7966, 7967], [8006, 8007], [8014, 8015], [8062, 8063], [8127, 8129], [8141, 8143],
[8148, 8149], [8156, 8159], [8173, 8177], [8189, 8303], [8306, 8307], [8314, 8318],
[8330, 8335], [8341, 8449], [8451, 8454], [8456, 8457], [8470, 8472], [8478, 8483],
[8506, 8507], [8512, 8516], [8522, 8525], [8586, 9311], [9372, 9449], [9472, 10101],
[10132, 11263], [11493, 11498], [11503, 11516], [11518, 11519], [11558, 11567],
[11622, 11630], [11632, 11647], [11671, 11679], [11743, 11822], [11824, 12292],
[12296, 12320], [12330, 12336], [12342, 12343], [12349, 12352], [12439, 12444],
[12544, 12548], [12590, 12592], [12687, 12689], [12694, 12703], [12728, 12783],
[12800, 12831], [12842, 12880], [12896, 12927], [12938, 12976], [12992, 13311],
[19894, 19967], [40908, 40959], [42125, 42191], [42238, 42239], [42509, 42511],
[42540, 42559], [42592, 42593], [42607, 42622], [42648, 42655], [42736, 42774],
[42784, 42785], [42889, 42890], [42893, 43002], [43043, 43055], [43062, 43071],
[43124, 43137], [43188, 43215], [43226, 43249], [43256, 43258], [43260, 43263],
[43302, 43311], [43335, 43359], [43389, 43395], [43443, 43470], [43482, 43519],
[43561, 43583], [43596, 43599], [43610, 43615], [43639, 43641], [43643, 43647],
[43698, 43700], [43703, 43704], [43710, 43711], [43715, 43738], [43742, 43967],
[44003, 44015], [44026, 44031], [55204, 55215], [55239, 55242], [55292, 55295],
[57344, 63743], [64046, 64047], [64110, 64111], [64218, 64255], [64263, 64274],
[64280, 64284], [64434, 64466], [64830, 64847], [64912, 64913], [64968, 65007],
[65020, 65135], [65277, 65295], [65306, 65312], [65339, 65344], [65371, 65381],
[65471, 65473], [65480, 65481], [65488, 65489], [65496, 65497]];
for (i = 0; i < ranges.length; i++) {
start = ranges[i][0];
end = ranges[i][1];
for (j = start; j <= end; j++) {
result[j] = true;
}
}
return result;
})();

function splitQuery(query) {
var result = [];
var start = -1;
for (var i = 0; i < query.length; i++) {
if (splitChars[query.charCodeAt(i)]) {
if (start !== -1) {
result.push(query.slice(start, i));
start = -1;
}
} else if (start === -1) {
start = i;
}
}
if (start !== -1) {
result.push(query.slice(start));
}
return result;
}


Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight { background: #eeffcc; }
.highlight .c { color: #408090; font-style: italic } /* Comment */
.highlight .err { border: 1px solid #FF0000 } /* Error */
.highlight .k { color: #007020; font-weight: bold } /* Keyword */
.highlight .o { color: #666666 } /* Operator */
.highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
.highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
.highlight .cp { color: #007020 } /* Comment.Preproc */
.highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
.highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #A00000 } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #FF0000 } /* Generic.Error */
.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
.highlight .gi { color: #00A000 } /* Generic.Inserted */
.highlight .go { color: #333333 } /* Generic.Output */
.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
.highlight .gt { color: #0044DD } /* Generic.Traceback */
.highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #007020 } /* Keyword.Pseudo */
.highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #902000 } /* Keyword.Type */
.highlight .m { color: #208050 } /* Literal.Number */
.highlight .s { color: #4070a0 } /* Literal.String */
.highlight .na { color: #4070a0 } /* Name.Attribute */
.highlight .nb { color: #007020 } /* Name.Builtin */
.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
.highlight .no { color: #60add5 } /* Name.Constant */
.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
.highlight .ne { color: #007020 } /* Name.Exception */
.highlight .nf { color: #06287e } /* Name.Function */
.highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
.highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #bb60d5 } /* Name.Variable */
.highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #208050 } /* Literal.Number.Bin */
.highlight .mf { color: #208050 } /* Literal.Number.Float */
.highlight .mh { color: #208050 } /* Literal.Number.Hex */
.highlight .mi { color: #208050 } /* Literal.Number.Integer */
.highlight .mo { color: #208050 } /* Literal.Number.Oct */
.highlight .sa { color: #4070a0 } /* Literal.String.Affix */
.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
.highlight .sc { color: #4070a0 } /* Literal.String.Char */
.highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
.highlight .s2 { color: #4070a0 } /* Literal.String.Double */
.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
.highlight .sx { color: #c65d09 } /* Literal.String.Other */
.highlight .sr { color: #235388 } /* Literal.String.Regex */
.highlight .s1 { color: #4070a0 } /* Literal.String.Single */
.highlight .ss { color: #517918 } /* Literal.String.Symbol */
.highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #06287e } /* Name.Function.Magic */
.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
.highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
.highlight .il { color: #208050 } /* Literal.Number.Integer.Long */

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>&lt;no title&gt; &mdash; cuFFTDx 1.0.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/cufftdx_override.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->

<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
</head>

<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home"> cuFFTDx
</a>
<div class="version">
1.0.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>

<style>
/* Sidebar header (and topbar for mobile) */
.wy-side-nav-search, .wy-nav-top {
background: #76b900;
}

.wy-side-nav-search a:link, .wy-nav-top a:link {
color: #fff;
}
.wy-side-nav-search a:visited, .wy-nav-top a:visited {
color: #fff;
}
.wy-side-nav-search a:hover, .wy-nav-top a:hover {
color: #fff;
}

.wy-menu-vertical a:link, .wy-menu-vertical a:visited {
color: #d9d9d9
}

.wy-menu-vertical a:active {
background-color: #76b900
}

.wy-side-nav-search>div.version {
color: rgba(0, 0, 0, 0.3)
}

/* override table width restrictions */
.wy-table-responsive table td, .wy-table-responsive table th {
white-space: normal;
}

.wy-table-responsive {
margin-bottom: 24px;
max-width: 100%;
overflow: visible;
}
</style>

</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../index.html">Documentation home</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">User guide:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../introduction.html">First FFT using cuFFTDx</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#what-next">What next?</a></li>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#compilation">Compilation</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../introduction.html#your-next-custom-fft-kernels">Your next custom FFT kernels</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#what-happens-under-the-hood">What happens under the hood?</a></li>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#why">Why?</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../performance.html">Achieving high performance</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#general-advice">General advice</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#memory-management">Memory management</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#kernel-fusion">Kernel fusion</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#advanced">Advanced</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#further-reading">Further reading</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../performance.html#references">References</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../requirements_func.html">Requirements and Functionality</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../requirements_func.html#requirements">Requirements</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../requirements_func.html#supported-compilers">Supported Compilers</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../requirements_func.html#supported-functionality">Supported Functionality</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="index.html">cuFFTDx API Reference</a><ul>
<li class="toctree-l2"><a class="reference internal" href="operators.html">Operators</a><ul>
<li class="toctree-l3"><a class="reference internal" href="operators.html#description-operators">Description Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="operators.html#size-operator">Size Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="operators.html#direction-operator">Direction Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="operators.html#type-operator">Type Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="operators.html#precision-operator">Precision Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="operators.html#sm-operator">SM Operator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="operators.html#execution-operators">Execution Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="operators.html#thread-operator">Thread Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="operators.html#block-operator">Block Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="operators.html#block-configuration-operators">Block Configuration Operators</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="traits.html">Traits</a><ul>
<li class="toctree-l3"><a class="reference internal" href="traits.html#description-traits">Description Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="traits.html#size-trait">Size Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#type-trait">Type Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#direction-trait">Direction Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#precision-trait">Precision Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#is-fft-trait">Is FFT? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#is-fft-execution-trait">Is FFT Execution? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#is-fft-complete-trait">Is FFT-complete? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#is-fft-complete-execution-trait">Is FFT-complete Execution? Trait</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="traits.html#execution-traits">Execution Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="traits.html#thread-traits">Thread Traits</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#block-traits">Block Traits</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="methods.html">Execution Methods</a><ul>
<li class="toctree-l3"><a class="reference internal" href="methods.html#thread-execute-method">Thread Execute Method</a></li>
<li class="toctree-l3"><a class="reference internal" href="methods.html#block-execute-method">Block Execute Method</a><ul>
<li class="toctree-l4"><a class="reference internal" href="methods.html#value-format">Value Format</a></li>
<li class="toctree-l4"><a class="reference internal" href="methods.html#input-output-data-format">Input/Output Data Format</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="methods.html#make-workspace-function">Make Workspace Function</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../release_notes.html">Release Notes</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../release_notes.html#id1">1.0.0</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../release_notes.html#new-features">New Features</a></li>
<li class="toctree-l3"><a class="reference internal" href="../release_notes.html#resolved-issues">Resolved Issues</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../release_notes.html#id2">0.3.1</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../release_notes.html#known-issues">Known Issues</a></li>
</ul>
</li>
</ul>
</li>
</ul>

</div>
</div>
</nav>

<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">cuFFTDx</a>
</nav>

<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
<li>&lt;no title&gt;</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">

<blockquote>
<div><p>Workspace is not required for FFTs of following sizes:</p>
<ul class="simple">
<li><p>Powers of 2 up to 32768</p></li>
<li><p>Powers of 3 up to 19683</p></li>
<li><p>Powers of 5 up to 15625</p></li>
<li><p>Powers of 6 up to 1296</p></li>
<li><p>Powers of 7 up to 2401</p></li>
<li><p>Powers of 10 up to 10000</p></li>
<li><p>Powers of 11 up to 1331</p></li>
<li><p>Powers of 12 up to 1728</p></li>
</ul>
<dl class="simple">
<dt>In the future versions of cuFFTDx:</dt><dd><ul class="simple">
<li><p>Workspace requirement may be removed for other configurations.</p></li>
<li><p>FFT configurations that do not require workspace will continue to do so.</p></li>
</ul>
</dd>
</dl>
</div></blockquote>


</div>
</div>
<footer>

<hr/>

<div role="contentinfo">
<p>&#169; Copyright 2022, NVIDIA Corporation.</p>
</div>

Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.


</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>

<style>
a:link, a:visited {
color: #76b900;
}

a:hover {
color: #8c0;
}

.rst-content dl:not(.docutils) dt {
background: rgba(118, 185, 0, 0.1);
color: rgba(59,93,0,1);
border-top: solid 3px rgba(59,93,0,1);
}
</style>


</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>cuFFTDx API Reference &mdash; cuFFTDx 1.0.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/cufftdx_override.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->

<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Operators" href="operators.html" />
<link rel="prev" title="Requirements and Functionality" href="../requirements_func.html" />
</head>

<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home"> cuFFTDx
</a>
<div class="version">
1.0.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>

<style>
/* Sidebar header (and topbar for mobile) */
.wy-side-nav-search, .wy-nav-top {
background: #76b900;
}

.wy-side-nav-search a:link, .wy-nav-top a:link {
color: #fff;
}
.wy-side-nav-search a:visited, .wy-nav-top a:visited {
color: #fff;
}
.wy-side-nav-search a:hover, .wy-nav-top a:hover {
color: #fff;
}

.wy-menu-vertical a:link, .wy-menu-vertical a:visited {
color: #d9d9d9
}

.wy-menu-vertical a:active {
background-color: #76b900
}

.wy-side-nav-search>div.version {
color: rgba(0, 0, 0, 0.3)
}

/* override table width restrictions */
.wy-table-responsive table td, .wy-table-responsive table th {
white-space: normal;
}

.wy-table-responsive {
margin-bottom: 24px;
max-width: 100%;
overflow: visible;
}
</style>

</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../index.html">Documentation home</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">User guide:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../introduction.html">First FFT using cuFFTDx</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#what-next">What next?</a></li>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#compilation">Compilation</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../introduction.html#your-next-custom-fft-kernels">Your next custom FFT kernels</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#what-happens-under-the-hood">What happens under the hood?</a></li>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#why">Why?</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../performance.html">Achieving high performance</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#general-advice">General advice</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#memory-management">Memory management</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#kernel-fusion">Kernel fusion</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#advanced">Advanced</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#further-reading">Further reading</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../performance.html#references">References</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../requirements_func.html">Requirements and Functionality</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../requirements_func.html#requirements">Requirements</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../requirements_func.html#supported-compilers">Supported Compilers</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../requirements_func.html#supported-functionality">Supported Functionality</a></li>
</ul>
</li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">cuFFTDx API Reference</a><ul>
<li class="toctree-l2"><a class="reference internal" href="operators.html">Operators</a><ul>
<li class="toctree-l3"><a class="reference internal" href="operators.html#description-operators">Description Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="operators.html#size-operator">Size Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="operators.html#direction-operator">Direction Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="operators.html#type-operator">Type Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="operators.html#precision-operator">Precision Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="operators.html#sm-operator">SM Operator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="operators.html#execution-operators">Execution Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="operators.html#thread-operator">Thread Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="operators.html#block-operator">Block Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="operators.html#block-configuration-operators">Block Configuration Operators</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="traits.html">Traits</a><ul>
<li class="toctree-l3"><a class="reference internal" href="traits.html#description-traits">Description Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="traits.html#size-trait">Size Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#type-trait">Type Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#direction-trait">Direction Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#precision-trait">Precision Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#is-fft-trait">Is FFT? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#is-fft-execution-trait">Is FFT Execution? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#is-fft-complete-trait">Is FFT-complete? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#is-fft-complete-execution-trait">Is FFT-complete Execution? Trait</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="traits.html#execution-traits">Execution Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="traits.html#thread-traits">Thread Traits</a></li>
<li class="toctree-l4"><a class="reference internal" href="traits.html#block-traits">Block Traits</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="methods.html">Execution Methods</a><ul>
<li class="toctree-l3"><a class="reference internal" href="methods.html#thread-execute-method">Thread Execute Method</a></li>
<li class="toctree-l3"><a class="reference internal" href="methods.html#block-execute-method">Block Execute Method</a><ul>
<li class="toctree-l4"><a class="reference internal" href="methods.html#value-format">Value Format</a></li>
<li class="toctree-l4"><a class="reference internal" href="methods.html#input-output-data-format">Input/Output Data Format</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="methods.html#make-workspace-function">Make Workspace Function</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../release_notes.html">Release Notes</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../release_notes.html#id1">1.0.0</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../release_notes.html#new-features">New Features</a></li>
<li class="toctree-l3"><a class="reference internal" href="../release_notes.html#resolved-issues">Resolved Issues</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../release_notes.html#id2">0.3.1</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../release_notes.html#known-issues">Known Issues</a></li>
</ul>
</li>
</ul>
</li>
</ul>

</div>
</div>
</nav>

<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">cuFFTDx</a>
</nav>

<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
<li>cuFFTDx API Reference</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">

<div class="section" id="cufftdx-api-reference">
<span id="api-reference-label"></span><h1>cuFFTDx API Reference<a class="headerlink" href="#cufftdx-api-reference" title="Permalink to this headline"></a></h1>
<p>Here you can find a description of the main components of the cuFFTDx library, with usage examples.</p>
<div class="toctree-wrapper compound">
<ul>
<li class="toctree-l1"><a class="reference internal" href="operators.html">Operators</a><ul>
<li class="toctree-l2"><a class="reference internal" href="operators.html#description-operators">Description Operators</a></li>
<li class="toctree-l2"><a class="reference internal" href="operators.html#execution-operators">Execution Operators</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="traits.html">Traits</a><ul>
<li class="toctree-l2"><a class="reference internal" href="traits.html#description-traits">Description Traits</a></li>
<li class="toctree-l2"><a class="reference internal" href="traits.html#execution-traits">Execution Traits</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="methods.html">Execution Methods</a><ul>
<li class="toctree-l2"><a class="reference internal" href="methods.html#thread-execute-method">Thread Execute Method</a></li>
<li class="toctree-l2"><a class="reference internal" href="methods.html#block-execute-method">Block Execute Method</a></li>
<li class="toctree-l2"><a class="reference internal" href="methods.html#make-workspace-function">Make Workspace Function</a></li>
</ul>
</li>
</ul>
</div>
</div>


</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="../requirements_func.html" class="btn btn-neutral float-left" title="Requirements and Functionality" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="operators.html" class="btn btn-neutral float-right" title="Operators" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>

<hr/>

<div role="contentinfo">
<p>&#169; Copyright 2022, NVIDIA Corporation.</p>
</div>

Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.


</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>

<style>
a:link, a:visited {
color: #76b900;
}

a:hover {
color: #8c0;
}

.rst-content dl:not(.docutils) dt {
background: rgba(118, 185, 0, 0.1);
color: rgba(59,93,0,1);
border-top: solid 3px rgba(59,93,0,1);
}
</style>


</body>
</html>

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Index &mdash; cuFFTDx 1.0.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/cufftdx_override.css" type="text/css" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->

<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
<script src="_static/jquery.js"></script>
<script src="_static/underscore.js"></script>
<script src="_static/doctools.js"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="#" />
<link rel="search" title="Search" href="search.html" />
</head>

<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home"> cuFFTDx
</a>
<div class="version">
1.0.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>

<style>
/* Sidebar header (and topbar for mobile) */
.wy-side-nav-search, .wy-nav-top {
background: #76b900;
}

.wy-side-nav-search a:link, .wy-nav-top a:link {
color: #fff;
}
.wy-side-nav-search a:visited, .wy-nav-top a:visited {
color: #fff;
}
.wy-side-nav-search a:hover, .wy-nav-top a:hover {
color: #fff;
}

.wy-menu-vertical a:link, .wy-menu-vertical a:visited {
color: #d9d9d9
}

.wy-menu-vertical a:active {
background-color: #76b900
}

.wy-side-nav-search>div.version {
color: rgba(0, 0, 0, 0.3)
}

/* override table width restrictions */
.wy-table-responsive table td, .wy-table-responsive table th {
white-space: normal;
}

.wy-table-responsive {
margin-bottom: 24px;
max-width: 100%;
overflow: visible;
}
</style>

</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="index.html">Documentation home</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">User guide:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="introduction.html">First FFT using cuFFTDx</a><ul>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#what-next">What next?</a></li>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#compilation">Compilation</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="introduction.html#your-next-custom-fft-kernels">Your next custom FFT kernels</a><ul>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#what-happens-under-the-hood">What happens under the hood?</a></li>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#why">Why?</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="performance.html">Achieving high performance</a><ul>
<li class="toctree-l2"><a class="reference internal" href="performance.html#general-advice">General advice</a></li>
<li class="toctree-l2"><a class="reference internal" href="performance.html#memory-management">Memory management</a></li>
<li class="toctree-l2"><a class="reference internal" href="performance.html#kernel-fusion">Kernel fusion</a></li>
<li class="toctree-l2"><a class="reference internal" href="performance.html#advanced">Advanced</a></li>
<li class="toctree-l2"><a class="reference internal" href="performance.html#further-reading">Further reading</a><ul>
<li class="toctree-l3"><a class="reference internal" href="performance.html#references">References</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="requirements_func.html">Requirements and Functionality</a><ul>
<li class="toctree-l2"><a class="reference internal" href="requirements_func.html#requirements">Requirements</a><ul>
<li class="toctree-l3"><a class="reference internal" href="requirements_func.html#supported-compilers">Supported Compilers</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="requirements_func.html#supported-functionality">Supported Functionality</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="api/index.html">cuFFTDx API Reference</a><ul>
<li class="toctree-l2"><a class="reference internal" href="api/operators.html">Operators</a><ul>
<li class="toctree-l3"><a class="reference internal" href="api/operators.html#description-operators">Description Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#size-operator">Size Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#direction-operator">Direction Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#type-operator">Type Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#precision-operator">Precision Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#sm-operator">SM Operator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="api/operators.html#execution-operators">Execution Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#thread-operator">Thread Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#block-operator">Block Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#block-configuration-operators">Block Configuration Operators</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="api/traits.html">Traits</a><ul>
<li class="toctree-l3"><a class="reference internal" href="api/traits.html#description-traits">Description Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#size-trait">Size Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#type-trait">Type Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#direction-trait">Direction Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#precision-trait">Precision Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#is-fft-trait">Is FFT? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#is-fft-execution-trait">Is FFT Execution? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#is-fft-complete-trait">Is FFT-complete? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#is-fft-complete-execution-trait">Is FFT-complete Execution? Trait</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="api/traits.html#execution-traits">Execution Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#thread-traits">Thread Traits</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#block-traits">Block Traits</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="api/methods.html">Execution Methods</a><ul>
<li class="toctree-l3"><a class="reference internal" href="api/methods.html#thread-execute-method">Thread Execute Method</a></li>
<li class="toctree-l3"><a class="reference internal" href="api/methods.html#block-execute-method">Block Execute Method</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/methods.html#value-format">Value Format</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/methods.html#input-output-data-format">Input/Output Data Format</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="api/methods.html#make-workspace-function">Make Workspace Function</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="release_notes.html">Release Notes</a><ul>
<li class="toctree-l2"><a class="reference internal" href="release_notes.html#id1">1.0.0</a><ul>
<li class="toctree-l3"><a class="reference internal" href="release_notes.html#new-features">New Features</a></li>
<li class="toctree-l3"><a class="reference internal" href="release_notes.html#resolved-issues">Resolved Issues</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="release_notes.html#id2">0.3.1</a><ul>
<li class="toctree-l3"><a class="reference internal" href="release_notes.html#known-issues">Known Issues</a></li>
</ul>
</li>
</ul>
</li>
</ul>

</div>
</div>
</nav>

<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">cuFFTDx</a>
</nav>

<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
<li>Index</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">


<h1 id="index">Index</h1>

<div class="genindex-jumpbox">

</div>


</div>
</div>
<footer>

<hr/>

<div role="contentinfo">
<p>&#169; Copyright 2022, NVIDIA Corporation.</p>
</div>

Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.


</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>

<style>
a:link, a:visited {
color: #76b900;
}

a:hover {
color: #8c0;
}

.rst-content dl:not(.docutils) dt {
background: rgba(118, 185, 0, 0.1);
color: rgba(59,93,0,1);
border-top: solid 3px rgba(59,93,0,1);
}
</style>


</body>
</html>

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Binary file not shown.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Release Notes &mdash; cuFFTDx 1.0.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/cufftdx_override.css" type="text/css" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->

<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
<script src="_static/jquery.js"></script>
<script src="_static/underscore.js"></script>
<script src="_static/doctools.js"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="prev" title="Execution Methods" href="api/methods.html" />
</head>

<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home"> cuFFTDx
</a>
<div class="version">
1.0.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>

<style>
/* Sidebar header (and topbar for mobile) */
.wy-side-nav-search, .wy-nav-top {
background: #76b900;
}

.wy-side-nav-search a:link, .wy-nav-top a:link {
color: #fff;
}
.wy-side-nav-search a:visited, .wy-nav-top a:visited {
color: #fff;
}
.wy-side-nav-search a:hover, .wy-nav-top a:hover {
color: #fff;
}

.wy-menu-vertical a:link, .wy-menu-vertical a:visited {
color: #d9d9d9
}

.wy-menu-vertical a:active {
background-color: #76b900
}

.wy-side-nav-search>div.version {
color: rgba(0, 0, 0, 0.3)
}

/* override table width restrictions */
.wy-table-responsive table td, .wy-table-responsive table th {
white-space: normal;
}

.wy-table-responsive {
margin-bottom: 24px;
max-width: 100%;
overflow: visible;
}
</style>

</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="index.html">Documentation home</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">User guide:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="introduction.html">First FFT using cuFFTDx</a><ul>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#what-next">What next?</a></li>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#compilation">Compilation</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="introduction.html#your-next-custom-fft-kernels">Your next custom FFT kernels</a><ul>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#what-happens-under-the-hood">What happens under the hood?</a></li>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#why">Why?</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="performance.html">Achieving high performance</a><ul>
<li class="toctree-l2"><a class="reference internal" href="performance.html#general-advice">General advice</a></li>
<li class="toctree-l2"><a class="reference internal" href="performance.html#memory-management">Memory management</a></li>
<li class="toctree-l2"><a class="reference internal" href="performance.html#kernel-fusion">Kernel fusion</a></li>
<li class="toctree-l2"><a class="reference internal" href="performance.html#advanced">Advanced</a></li>
<li class="toctree-l2"><a class="reference internal" href="performance.html#further-reading">Further reading</a><ul>
<li class="toctree-l3"><a class="reference internal" href="performance.html#references">References</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="requirements_func.html">Requirements and Functionality</a><ul>
<li class="toctree-l2"><a class="reference internal" href="requirements_func.html#requirements">Requirements</a><ul>
<li class="toctree-l3"><a class="reference internal" href="requirements_func.html#supported-compilers">Supported Compilers</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="requirements_func.html#supported-functionality">Supported Functionality</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="api/index.html">cuFFTDx API Reference</a><ul>
<li class="toctree-l2"><a class="reference internal" href="api/operators.html">Operators</a><ul>
<li class="toctree-l3"><a class="reference internal" href="api/operators.html#description-operators">Description Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#size-operator">Size Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#direction-operator">Direction Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#type-operator">Type Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#precision-operator">Precision Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#sm-operator">SM Operator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="api/operators.html#execution-operators">Execution Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#thread-operator">Thread Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#block-operator">Block Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#block-configuration-operators">Block Configuration Operators</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="api/traits.html">Traits</a><ul>
<li class="toctree-l3"><a class="reference internal" href="api/traits.html#description-traits">Description Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#size-trait">Size Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#type-trait">Type Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#direction-trait">Direction Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#precision-trait">Precision Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#is-fft-trait">Is FFT? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#is-fft-execution-trait">Is FFT Execution? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#is-fft-complete-trait">Is FFT-complete? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#is-fft-complete-execution-trait">Is FFT-complete Execution? Trait</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="api/traits.html#execution-traits">Execution Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#thread-traits">Thread Traits</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#block-traits">Block Traits</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="api/methods.html">Execution Methods</a><ul>
<li class="toctree-l3"><a class="reference internal" href="api/methods.html#thread-execute-method">Thread Execute Method</a></li>
<li class="toctree-l3"><a class="reference internal" href="api/methods.html#block-execute-method">Block Execute Method</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/methods.html#value-format">Value Format</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/methods.html#input-output-data-format">Input/Output Data Format</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="api/methods.html#make-workspace-function">Make Workspace Function</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Release Notes</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#id1">1.0.0</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#new-features">New Features</a></li>
<li class="toctree-l3"><a class="reference internal" href="#resolved-issues">Resolved Issues</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#id2">0.3.1</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#known-issues">Known Issues</a></li>
</ul>
</li>
</ul>
</li>
</ul>

</div>
</div>
</nav>

<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">cuFFTDx</a>
</nav>

<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
<li>Release Notes</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">

<div class="section" id="release-notes">
<span id="requirements-label"></span><h1>Release Notes<a class="headerlink" href="#release-notes" title="Permalink to this headline"></a></h1>
<p>This section includes significant changes, new features, performance improvements, and various issues. Unless noted,
listed issues should not impact functionality. When functionality is impacted, we offer a work-around to avoid the issue (if available).</p>
<div class="section" id="id1">
<h2>1.0.0<a class="headerlink" href="#id1" title="Permalink to this headline"></a></h2>
<p>The first general availability (GA) release of cuFFTDx library.</p>
<div class="section" id="new-features">
<h3>New Features<a class="headerlink" href="#new-features" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><p>Added new shared API for block FFT execution, see <a class="reference internal" href="api/methods.html#block-execute-method-label"><span class="std std-ref">block execution methods</span></a>.</p></li>
<li><p>Added and documented <a class="reference internal" href="api/traits.html#stride-block-trait-label"><span class="std std-ref">FFT::stride</span></a>.</p></li>
<li><p>Optimize default <a class="reference internal" href="api/operators.html#ept-operator-label"><span class="std std-ref">ElementsPerThread</span></a> and <a class="reference internal" href="api/operators.html#fftsperblock-operator-label"><span class="std std-ref">FFTsPerBlock</span></a> values for SM80 (targeting A100) and SM70 (targeting V100).</p></li>
<li><p>Restore full performance of power of two kernels in cuFFTDx.</p></li>
</ul>
</div>
<div class="section" id="resolved-issues">
<h3>Resolved Issues<a class="headerlink" href="#resolved-issues" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><p><code class="code highlight cpp docutils literal notranslate"><span class="n"><span class="pre">ptxas</span></span></code> warning <code class="code highlight cpp docutils literal notranslate"><span class="n"><span class="pre">program</span></span><span class="w"> </span><span class="n"><span class="pre">uses</span></span><span class="w"> </span><span class="mi"><span class="pre">32</span></span><span class="o"><span class="pre">-</span></span><span class="n"><span class="pre">bit</span></span><span class="w"> </span><span class="n"><span class="pre">address</span></span><span class="w"> </span><span class="n"><span class="pre">on</span></span><span class="w"> </span><span class="n"><span class="pre">line</span></span><span class="w"> </span><span class="n"><span class="pre">XXX</span></span><span class="w"> </span><span class="n"><span class="pre">which</span></span><span class="w"> </span><span class="n"><span class="pre">is</span></span><span class="w"> </span><span class="n"><span class="pre">conflicting</span></span><span class="w"> </span><span class="n"><span class="pre">with</span></span><span class="w"> </span><span class="p"><span class="pre">.</span></span><span class="n"><span class="pre">address_size</span></span><span class="w"> </span><span class="mi"><span class="pre">64</span></span></code> shouldn’t appear anymore.</p></li>
</ul>
</div>
</div>
<div class="section" id="id2">
<h2>0.3.1<a class="headerlink" href="#id2" title="Permalink to this headline"></a></h2>
<p>The last early access (EA) release of cuFFTDx library.</p>
<div class="section" id="known-issues">
<h3>Known Issues<a class="headerlink" href="#known-issues" title="Permalink to this headline"></a></h3>
<ul>
<li><p><code class="code highlight cpp docutils literal notranslate"><span class="n"><span class="pre">ptxas</span></span></code> warning about pointer size conflict:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>ptxas warning : Program uses <span class="m">32</span>-bit address on line <span class="s1">&#39;XXX&#39;</span> which is conflicting with .address_size <span class="m">64</span>
</pre></div>
</div>
<p>This warning may appear when compiling, but it does not impact functionality or performance.</p>
</li>
</ul>
</div>
</div>
</div>


</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="api/methods.html" class="btn btn-neutral float-left" title="Execution Methods" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
</div>

<hr/>

<div role="contentinfo">
<p>&#169; Copyright 2022, NVIDIA Corporation.</p>
</div>

Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.


</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>

<style>
a:link, a:visited {
color: #76b900;
}

a:hover {
color: #8c0;
}

.rst-content dl:not(.docutils) dt {
background: rgba(118, 185, 0, 0.1);
color: rgba(59,93,0,1);
border-top: solid 3px rgba(59,93,0,1);
}
</style>


</body>
</html>

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Search &mdash; cuFFTDx 1.0.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/cufftdx_override.css" type="text/css" />

<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->

<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
<script src="_static/jquery.js"></script>
<script src="_static/underscore.js"></script>
<script src="_static/doctools.js"></script>
<script src="_static/js/theme.js"></script>
<script src="_static/searchtools.js"></script>
<script src="_static/language_data.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="#" />
</head>

<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home"> cuFFTDx
</a>
<div class="version">
1.0.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="#" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>

<style>
/* Sidebar header (and topbar for mobile) */
.wy-side-nav-search, .wy-nav-top {
background: #76b900;
}

.wy-side-nav-search a:link, .wy-nav-top a:link {
color: #fff;
}
.wy-side-nav-search a:visited, .wy-nav-top a:visited {
color: #fff;
}
.wy-side-nav-search a:hover, .wy-nav-top a:hover {
color: #fff;
}

.wy-menu-vertical a:link, .wy-menu-vertical a:visited {
color: #d9d9d9
}

.wy-menu-vertical a:active {
background-color: #76b900
}

.wy-side-nav-search>div.version {
color: rgba(0, 0, 0, 0.3)
}

/* override table width restrictions */
.wy-table-responsive table td, .wy-table-responsive table th {
white-space: normal;
}

.wy-table-responsive {
margin-bottom: 24px;
max-width: 100%;
overflow: visible;
}
</style>

</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="index.html">Documentation home</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">User guide:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="introduction.html">First FFT using cuFFTDx</a><ul>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#what-next">What next?</a></li>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#compilation">Compilation</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="introduction.html#your-next-custom-fft-kernels">Your next custom FFT kernels</a><ul>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#what-happens-under-the-hood">What happens under the hood?</a></li>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#why">Why?</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="performance.html">Achieving high performance</a><ul>
<li class="toctree-l2"><a class="reference internal" href="performance.html#general-advice">General advice</a></li>
<li class="toctree-l2"><a class="reference internal" href="performance.html#memory-management">Memory management</a></li>
<li class="toctree-l2"><a class="reference internal" href="performance.html#kernel-fusion">Kernel fusion</a></li>
<li class="toctree-l2"><a class="reference internal" href="performance.html#advanced">Advanced</a></li>
<li class="toctree-l2"><a class="reference internal" href="performance.html#further-reading">Further reading</a><ul>
<li class="toctree-l3"><a class="reference internal" href="performance.html#references">References</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="requirements_func.html">Requirements and Functionality</a><ul>
<li class="toctree-l2"><a class="reference internal" href="requirements_func.html#requirements">Requirements</a><ul>
<li class="toctree-l3"><a class="reference internal" href="requirements_func.html#supported-compilers">Supported Compilers</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="requirements_func.html#supported-functionality">Supported Functionality</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="api/index.html">cuFFTDx API Reference</a><ul>
<li class="toctree-l2"><a class="reference internal" href="api/operators.html">Operators</a><ul>
<li class="toctree-l3"><a class="reference internal" href="api/operators.html#description-operators">Description Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#size-operator">Size Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#direction-operator">Direction Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#type-operator">Type Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#precision-operator">Precision Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#sm-operator">SM Operator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="api/operators.html#execution-operators">Execution Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#thread-operator">Thread Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#block-operator">Block Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/operators.html#block-configuration-operators">Block Configuration Operators</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="api/traits.html">Traits</a><ul>
<li class="toctree-l3"><a class="reference internal" href="api/traits.html#description-traits">Description Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#size-trait">Size Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#type-trait">Type Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#direction-trait">Direction Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#precision-trait">Precision Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#is-fft-trait">Is FFT? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#is-fft-execution-trait">Is FFT Execution? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#is-fft-complete-trait">Is FFT-complete? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#is-fft-complete-execution-trait">Is FFT-complete Execution? Trait</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="api/traits.html#execution-traits">Execution Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#thread-traits">Thread Traits</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/traits.html#block-traits">Block Traits</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="api/methods.html">Execution Methods</a><ul>
<li class="toctree-l3"><a class="reference internal" href="api/methods.html#thread-execute-method">Thread Execute Method</a></li>
<li class="toctree-l3"><a class="reference internal" href="api/methods.html#block-execute-method">Block Execute Method</a><ul>
<li class="toctree-l4"><a class="reference internal" href="api/methods.html#value-format">Value Format</a></li>
<li class="toctree-l4"><a class="reference internal" href="api/methods.html#input-output-data-format">Input/Output Data Format</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="api/methods.html#make-workspace-function">Make Workspace Function</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="release_notes.html">Release Notes</a><ul>
<li class="toctree-l2"><a class="reference internal" href="release_notes.html#id1">1.0.0</a><ul>
<li class="toctree-l3"><a class="reference internal" href="release_notes.html#new-features">New Features</a></li>
<li class="toctree-l3"><a class="reference internal" href="release_notes.html#resolved-issues">Resolved Issues</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="release_notes.html#id2">0.3.1</a><ul>
<li class="toctree-l3"><a class="reference internal" href="release_notes.html#known-issues">Known Issues</a></li>
</ul>
</li>
</ul>
</li>
</ul>

</div>
</div>
</nav>

<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">cuFFTDx</a>
</nav>

<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
<li>Search</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">

<noscript>
<div id="fallback" class="admonition warning">
<p class="last">
Please activate JavaScript to enable the search functionality.
</p>
</div>
</noscript>


<div id="search-results">

</div>

</div>
</div>
<footer>

<hr/>

<div role="contentinfo">
<p>&#169; Copyright 2022, NVIDIA Corporation.</p>
</div>

Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.


</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
<script>
jQuery(function() { Search.loadIndex("searchindex.js"); });
</script>

<script id="searchindexloader"></script>


<style>
a:link, a:visited {
color: #76b900;
}

a:hover {
color: #8c0;
}

.rst-content dl:not(.docutils) dt {
background: rgba(118, 185, 0, 0.1);
color: rgba(59,93,0,1);
border-top: solid 3px rgba(59,93,0,1);
}
</style>



</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>&lt;no title&gt; &mdash; cuFFTDx 1.0.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/cufftdx_override.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->

<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
</head>

<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home"> cuFFTDx
</a>
<div class="version">
1.0.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>

<style>
/* Sidebar header (and topbar for mobile) */
.wy-side-nav-search, .wy-nav-top {
background: #76b900;
}

.wy-side-nav-search a:link, .wy-nav-top a:link {
color: #fff;
}
.wy-side-nav-search a:visited, .wy-nav-top a:visited {
color: #fff;
}
.wy-side-nav-search a:hover, .wy-nav-top a:hover {
color: #fff;
}

.wy-menu-vertical a:link, .wy-menu-vertical a:visited {
color: #d9d9d9
}

.wy-menu-vertical a:active {
background-color: #76b900
}

.wy-side-nav-search>div.version {
color: rgba(0, 0, 0, 0.3)
}

/* override table width restrictions */
.wy-table-responsive table td, .wy-table-responsive table th {
white-space: normal;
}

.wy-table-responsive {
margin-bottom: 24px;
max-width: 100%;
overflow: visible;
}
</style>

</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../index.html">Documentation home</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">User guide:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../introduction.html">First FFT using cuFFTDx</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#what-next">What next?</a></li>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#compilation">Compilation</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../introduction.html#your-next-custom-fft-kernels">Your next custom FFT kernels</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#what-happens-under-the-hood">What happens under the hood?</a></li>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#why">Why?</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../performance.html">Achieving high performance</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#general-advice">General advice</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#memory-management">Memory management</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#kernel-fusion">Kernel fusion</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#advanced">Advanced</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#further-reading">Further reading</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../performance.html#references">References</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../requirements_func.html">Requirements and Functionality</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../requirements_func.html#requirements">Requirements</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../requirements_func.html#supported-compilers">Supported Compilers</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../requirements_func.html#supported-functionality">Supported Functionality</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../api/index.html">cuFFTDx API Reference</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../api/operators.html">Operators</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../api/operators.html#description-operators">Description Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#size-operator">Size Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#direction-operator">Direction Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#type-operator">Type Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#precision-operator">Precision Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#sm-operator">SM Operator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../api/operators.html#execution-operators">Execution Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#thread-operator">Thread Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#block-operator">Block Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#block-configuration-operators">Block Configuration Operators</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../api/traits.html">Traits</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../api/traits.html#description-traits">Description Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#size-trait">Size Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#type-trait">Type Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#direction-trait">Direction Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#precision-trait">Precision Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#is-fft-trait">Is FFT? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#is-fft-execution-trait">Is FFT Execution? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#is-fft-complete-trait">Is FFT-complete? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#is-fft-complete-execution-trait">Is FFT-complete Execution? Trait</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../api/traits.html#execution-traits">Execution Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#thread-traits">Thread Traits</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#block-traits">Block Traits</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../api/methods.html">Execution Methods</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../api/methods.html#thread-execute-method">Thread Execute Method</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/methods.html#block-execute-method">Block Execute Method</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../api/methods.html#value-format">Value Format</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/methods.html#input-output-data-format">Input/Output Data Format</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../api/methods.html#make-workspace-function">Make Workspace Function</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../release_notes.html">Release Notes</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../release_notes.html#id1">1.0.0</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../release_notes.html#new-features">New Features</a></li>
<li class="toctree-l3"><a class="reference internal" href="../release_notes.html#resolved-issues">Resolved Issues</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../release_notes.html#id2">0.3.1</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../release_notes.html#known-issues">Known Issues</a></li>
</ul>
</li>
</ul>
</li>
</ul>

</div>
</div>
</nav>

<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">cuFFTDx</a>
</nav>

<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
<li>&lt;no title&gt;</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">

<div class="admonition warning">
<p class="admonition-title">Warning</p>
<p>It is not guaranteed that executions of the same FFTs (size, direction, type, precision) but with different</p>
<ul class="simple">
<li><p>number of elements per thread (<a class="reference internal" href="../api/operators.html#ept-operator-label"><span class="std std-ref">ElementsPerThread</span></a>),</p></li>
<li><p>number of FFTs calculated per CUDA block (<a class="reference internal" href="../api/operators.html#fftsperblock-operator-label"><span class="std std-ref">FFTsPerBlock</span></a>), or</p></li>
<li><p>block dimension (<a class="reference internal" href="../api/operators.html#blockdim-operator-label"><span class="std std-ref">BlockDim</span></a>),</p></li>
</ul>
<p>will produce bit-identical results.</p>
</div>


</div>
</div>
<footer>

<hr/>

<div role="contentinfo">
<p>&#169; Copyright 2022, NVIDIA Corporation.</p>
</div>

Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.


</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>

<style>
a:link, a:visited {
color: #76b900;
}

a:hover {
color: #8c0;
}

.rst-content dl:not(.docutils) dt {
background: rgba(118, 185, 0, 0.1);
color: rgba(59,93,0,1);
border-top: solid 3px rgba(59,93,0,1);
}
</style>


</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>&lt;no title&gt; &mdash; cuFFTDx 1.0.0 documentation</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/cufftdx_override.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->

<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
</head>

<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home"> cuFFTDx
</a>
<div class="version">
1.0.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>

<style>
/* Sidebar header (and topbar for mobile) */
.wy-side-nav-search, .wy-nav-top {
background: #76b900;
}

.wy-side-nav-search a:link, .wy-nav-top a:link {
color: #fff;
}
.wy-side-nav-search a:visited, .wy-nav-top a:visited {
color: #fff;
}
.wy-side-nav-search a:hover, .wy-nav-top a:hover {
color: #fff;
}

.wy-menu-vertical a:link, .wy-menu-vertical a:visited {
color: #d9d9d9
}

.wy-menu-vertical a:active {
background-color: #76b900
}

.wy-side-nav-search>div.version {
color: rgba(0, 0, 0, 0.3)
}

/* override table width restrictions */
.wy-table-responsive table td, .wy-table-responsive table th {
white-space: normal;
}

.wy-table-responsive {
margin-bottom: 24px;
max-width: 100%;
overflow: visible;
}
</style>

</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../index.html">Documentation home</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">User guide:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../introduction.html">First FFT using cuFFTDx</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#what-next">What next?</a></li>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#compilation">Compilation</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../introduction.html#your-next-custom-fft-kernels">Your next custom FFT kernels</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#what-happens-under-the-hood">What happens under the hood?</a></li>
<li class="toctree-l2"><a class="reference internal" href="../introduction.html#why">Why?</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../performance.html">Achieving high performance</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#general-advice">General advice</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#memory-management">Memory management</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#kernel-fusion">Kernel fusion</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#advanced">Advanced</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance.html#further-reading">Further reading</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../performance.html#references">References</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../requirements_func.html">Requirements and Functionality</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../requirements_func.html#requirements">Requirements</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../requirements_func.html#supported-compilers">Supported Compilers</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../requirements_func.html#supported-functionality">Supported Functionality</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../api/index.html">cuFFTDx API Reference</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../api/operators.html">Operators</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../api/operators.html#description-operators">Description Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#size-operator">Size Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#direction-operator">Direction Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#type-operator">Type Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#precision-operator">Precision Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#sm-operator">SM Operator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../api/operators.html#execution-operators">Execution Operators</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#thread-operator">Thread Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#block-operator">Block Operator</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/operators.html#block-configuration-operators">Block Configuration Operators</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../api/traits.html">Traits</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../api/traits.html#description-traits">Description Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#size-trait">Size Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#type-trait">Type Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#direction-trait">Direction Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#precision-trait">Precision Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#is-fft-trait">Is FFT? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#is-fft-execution-trait">Is FFT Execution? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#is-fft-complete-trait">Is FFT-complete? Trait</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#is-fft-complete-execution-trait">Is FFT-complete Execution? Trait</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../api/traits.html#execution-traits">Execution Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#thread-traits">Thread Traits</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/traits.html#block-traits">Block Traits</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../api/methods.html">Execution Methods</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../api/methods.html#thread-execute-method">Thread Execute Method</a></li>
<li class="toctree-l3"><a class="reference internal" href="../api/methods.html#block-execute-method">Block Execute Method</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../api/methods.html#value-format">Value Format</a></li>
<li class="toctree-l4"><a class="reference internal" href="../api/methods.html#input-output-data-format">Input/Output Data Format</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../api/methods.html#make-workspace-function">Make Workspace Function</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../release_notes.html">Release Notes</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../release_notes.html#id1">1.0.0</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../release_notes.html#new-features">New Features</a></li>
<li class="toctree-l3"><a class="reference internal" href="../release_notes.html#resolved-issues">Resolved Issues</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../release_notes.html#id2">0.3.1</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../release_notes.html#known-issues">Known Issues</a></li>
</ul>
</li>
</ul>
</li>
</ul>

</div>
</div>
</nav>

<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">cuFFTDx</a>
</nav>

<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
<li>&lt;no title&gt;</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">

<div class="admonition warning">
<p class="admonition-title">Warning</p>
<p>It is not guaranteed that executions of exactly the same FFTs on GPUs of different CUDA architectures will produce
bit-identical results.</p>
</div>


</div>
</div>
<footer>

<hr/>

<div role="contentinfo">
<p>&#169; Copyright 2022, NVIDIA Corporation.</p>
</div>

Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.


</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>

<style>
a:link, a:visited {
color: #76b900;
}

a:hover {
color: #8c0;
}

.rst-content dl:not(.docutils) dt {
background: rgba(118, 185, 0, 0.1);
color: rgba(59,93,0,1);
border-top: solid 3px rgba(59,93,0,1);
}
</style>


</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
cmake_minimum_required(VERSION 3.18.0)

# cuFFTDxExamples project
project(cuFFTDxExamples LANGUAGES CXX CUDA)

# Find CUDA Toolkit packaged, required for NVRTC sample
find_package(CUDAToolkit)

# Project options
option(USE_MATHDX_PACKAGE "Use mathDx package to find cuFFTDx" ON)
option(USE_CUFFTDX_PACKAGE "Use cuFFTDx package to find cuFFTDx" OFF)

if(DEFINED cufftdx_ROOT OR DEFINED ENV{cufftdx_ROOT})
SET(USE_CUFFTDX_PACKAGE ON CACHE BOOL "Use cuFFTDx package to find cuFFTDx" FORCE)
SET(USE_MATHDX_PACKAGE OFF CACHE BOOL "Use mathDx package to find cuFFTDx" FORCE)
endif()

if(DEFINED mathdx_ROOT OR DEFINED ENV{mathdx_ROOT})
SET(USE_CUFFTDX_PACKAGE OFF CACHE BOOL "Use cuFFTDx package to find cuFFTDx" FORCE)
SET(USE_MATHDX_PACKAGE ON CACHE BOOL "Use mathDx package to find cuFFTDx" FORCE)
endif()

if(NOT TARGET cufftdx)
if(USE_MATHDX_PACKAGE)
message(STATUS "Using mathDx package to find cuFFTDx")
# Find mathDx and cuFFTDx (mathDx's component)
# Default path: "/opt/nvidia/mathdx/22.2", path to mathDx can be passed cmake in mathdx_ROOT variable
find_package(mathdx REQUIRED COMPONENTS cufftdx CONFIG
PATHS
"${PROJECT_SOURCE_DIR}/../.." # example/cufftdx
"${PROJECT_SOURCE_DIR}/../../.." # include/cufftdx/example
"/opt/nvidia/mathdx/22.2"
)
elseif(USE_CUFFTDX_PACKAGE)
message(STATUS "Using cuFFTDx package to find cuFFTDx")
# Find cuFFTDx
# Default path: "/opt/nvidia/mathdx/22.2/include/cufftdx", path to cuFFTDx can be passed cmake in cufftdx_ROOT variable
find_package(cufftdx REQUIRED CONFIG PATHS "/opt/nvidia/mathdx/22.2/include/cufftdx" "${PROJECT_SOURCE_DIR}/../../cufftdx")
else()
message(FATAL_ERROR "No cuFFTDx package found")
endif()
endif()

if((NOT TARGET cufftdx) AND (NOT CUFFTDX_TEST_RELEASED_PACKAGE) AND (NOT MATHDX_TEST_RELEASED_PACKAGE))
# Targeted CUDA Architectures, see https://cmake.org/cmake/help/latest/prop_tgt/CUDA_ARCHITECTURES.html#prop_tgt:CUDA_ARCHITECTURES
if(CUFFTDX_TARGET_ARCHS)
set(CUFFTDX_TARGET_ARCHS 70;75;80 CACHE
STRING "[LEGACY] List of targeted cuFFTDx Example CUDA architectures (compute capabilities), for example \"70;75\". Can't be older than 70."
)
list(SORT CUFFTDX_TARGET_ARCHS)
# Remove unsupported architectures
list(REMOVE_ITEM CUFFTDX_TARGET_ARCHS 30;32;35;37;50;52;53;60;61;62)

# Translate legacy option CUFFTDX_TARGET_ARCHS into CUFFTDX_CUDA_ARCHITECTURES
set(CUFFTDX_TARGET_ARCHS_TRANSLATED)
foreach(ARCH ${CUFFTDX_TARGET_ARCHS})
list(APPEND CUFFTDX_TARGET_ARCHS_TRANSLATED ${ARCH}-real)
endforeach()
set(CUFFTDX_CUDA_ARCHITECTURES ${CUFFTDX_TARGET_ARCHS_TRANSLATED} CACHE
STRING "List of targeted cuFFTDx CUDA architectures, for example \"70-real;75-real;80\""
)
else()
set(CUFFTDX_CUDA_ARCHITECTURES 70-real;75-real;80-real CACHE
STRING "List of targeted cuFFTDX CUDA architectures, for example \"70-real;75-real;80\""
)
# Remove unsupported architectures
list(REMOVE_ITEM CUFFTDX_CUDA_ARCHITECTURES 30;32;35;37;50;52;53;60;61;62)
list(REMOVE_ITEM CUFFTDX_CUDA_ARCHITECTURES 30-real;32-real;35-real;37-real;50-real;52-real;53-real;60-real;61-real;62-real)
list(REMOVE_ITEM CUFFTDX_CUDA_ARCHITECTURES 30-virtual;32-virtual;35-virtual;37-virtual;50-virtual;52-virtual;53-virtual;60-virtual;61-virtual;62-virtual)
endif()
message(STATUS "Targeted cuFFTDx Examples CUDA Architectures: ${CUFFTDX_CUDA_ARCHITECTURES}")

# Global CXX/CUDA flags
if(NOT MSVC)
set(CUFFTDX_CUDA_CXX_FLAGS "${CUFFTDX_CUDA_CXX_FLAGS} -Wall -Wextra")
else()
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
add_definitions(-D_CRT_NONSTDC_NO_WARNINGS)
add_definitions(-D_SCL_SECURE_NO_WARNINGS)
add_definitions(-DNOMINMAX)
set(CUFFTDX_CUDA_CXX_FLAGS "${CUFFT_CUDA_CXX_FLAGS} /W3") # Warning level
set(CUFFTDX_CUDA_CXX_FLAGS "${CUFFT_CUDA_CXX_FLAGS} /WX") # All warnings are errors
endif()

# Global CXX flags/options
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CUFFTDX_CUDA_CXX_FLAGS}")

# Global CUDA CXX flags/options
set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_EXTENSIONS OFF)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler \"${CUFFTDX_CUDA_CXX_FLAGS}\"")

# Clang
if(BUILD_CUFFTDX)
if(CMAKE_CUDA_HOST_COMPILER MATCHES ".*clang.*")
# clang complains about unused function in CUDA system headers
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-unused-function")
endif()
endif()

# CUDA Architectures
set(CMAKE_CUDA_ARCHITECTURES OFF)

# Enable testing (ctest)
enable_testing()
endif()

# ###############################################################
# add_cufftdx_example
# ###############################################################
function(add_cufftdx_example GROUP_TARGET EXAMPLE_NAME EXAMPLE_SOURCES)
list(GET EXAMPLE_SOURCES 0 EXAMPLE_MAIN_SOURCE)
get_filename_component(EXAMPLE_TARGET ${EXAMPLE_MAIN_SOURCE} NAME_WE)
set_source_files_properties(${EXAMPLE_SOURCES} PROPERTIES LANGUAGE CUDA)
add_executable(${EXAMPLE_TARGET} ${EXAMPLE_SOURCES})
target_link_libraries(${EXAMPLE_TARGET}
PRIVATE
$<IF:$<TARGET_EXISTS:mathdx::cufftdx>,mathdx::cufftdx,cufftdx::cufftdx>
)
if(NOT TARGET cufftdx)
set_target_properties(${EXAMPLE_TARGET}
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/example/cufftdx"
)
endif()
set_target_properties(${EXAMPLE_TARGET}
PROPERTIES
CUDA_ARCHITECTURES "${CUFFTDX_CUDA_ARCHITECTURES}"
)
target_compile_options(${EXAMPLE_TARGET}
PRIVATE
"$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xfatbin -compress-all>"
)
add_test(NAME ${EXAMPLE_NAME} COMMAND ${EXAMPLE_TARGET})
set_tests_properties(${EXAMPLE_NAME}
PROPERTIES
LABELS "CUFFTDX_EXAMPLE"
)
add_dependencies(${GROUP_TARGET} ${EXAMPLE_TARGET})
endfunction()

# ###############################################################
# add_cufft_and_cufftdx_example
# ###############################################################
function(add_cufft_and_cufftdx_example GROUP_TARGET EXAMPLE_NAME EXAMPLE_SOURCES)
list(GET EXAMPLE_SOURCES 0 EXAMPLE_MAIN_SOURCE)
get_filename_component(EXAMPLE_TARGET ${EXAMPLE_MAIN_SOURCE} NAME_WE)
set_source_files_properties(${EXAMPLE_SOURCES} PROPERTIES LANGUAGE CUDA)
add_executable(${EXAMPLE_TARGET} ${EXAMPLE_SOURCES})
target_link_libraries(${EXAMPLE_TARGET}
PRIVATE
$<IF:$<TARGET_EXISTS:mathdx::cufftdx>,mathdx::cufftdx,cufftdx::cufftdx>
)
if(CUFFTDX_EXAMPLES_CUFFT_CALLBACK)
if(TARGET cufft)
target_link_libraries(${EXAMPLE_TARGET} PRIVATE cufft_static)
else()
target_link_libraries(${EXAMPLE_TARGET} PRIVATE CUDA::cufft_static)
endif()
set_target_properties(${EXAMPLE_TARGET}
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
)
target_compile_definitions(${EXAMPLE_TARGET} PRIVATE CUFFTDX_EXAMPLES_CUFFT_CALLBACK)
else()
if(TARGET cufft)
target_link_libraries(${EXAMPLE_TARGET} PRIVATE cufft)
else()
target_link_libraries(${EXAMPLE_TARGET} PRIVATE CUDA::cufft)
endif()
endif()
if(NOT TARGET cufftdx)
set_target_properties(${EXAMPLE_TARGET}
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/example/cufftdx"
)
endif()
set_target_properties(${EXAMPLE_TARGET}
PROPERTIES
CUDA_ARCHITECTURES "${CUFFTDX_CUDA_ARCHITECTURES}"
)
target_compile_options(${EXAMPLE_TARGET}
PRIVATE
"$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xfatbin -compress-all>"
)
add_test(NAME ${EXAMPLE_NAME} COMMAND ${EXAMPLE_TARGET})
set_tests_properties(${EXAMPLE_NAME}
PROPERTIES
LABELS "CUFFTDX_EXAMPLE"
)
add_dependencies(${GROUP_TARGET} ${EXAMPLE_TARGET})
endfunction()

# ###############################################################
# add_cufftdx_nvrtc_example
# ###############################################################
function(add_cufftdx_nvrtc_example GROUP_TARGET EXAMPLE_NAME EXAMPLE_SOURCES)
list(GET EXAMPLE_SOURCES 0 EXAMPLE_MAIN_SOURCE)
get_filename_component(EXAMPLE_TARGET ${EXAMPLE_MAIN_SOURCE} NAME_WE)
set_source_files_properties(${EXAMPLE_SOURCES} PROPERTIES LANGUAGE CUDA)
add_executable(${EXAMPLE_TARGET} ${EXAMPLE_SOURCES})
target_link_libraries(${EXAMPLE_TARGET}
PRIVATE
$<IF:$<TARGET_EXISTS:mathdx::cufftdx>,mathdx::cufftdx,cufftdx::cufftdx>
CUDA::nvrtc
)
if(NOT TARGET cufftdx)
set_target_properties(${EXAMPLE_TARGET}
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/example/cufftdx"
)
target_compile_definitions(${EXAMPLE_TARGET}
PRIVATE
CUDA_INCLUDE_DIR="${CUDAToolkit_INCLUDE_DIRS}"
CUFFTDX_INCLUDE_DIRS="${cufftdx_INCLUDE_DIRS}"
)
else()
target_compile_definitions(${EXAMPLE_TARGET}
PRIVATE
CUDA_INCLUDE_DIR="${CUDAToolkit_INCLUDE_DIRS}"
CUFFTDX_INCLUDE_DIRS="${CMAKE_SOURCE_DIR}/libcufftdx/include\\\;${CMAKE_BINARY_DIR}/libcufftdx/include"
)
endif()
set_target_properties(${EXAMPLE_TARGET}
PROPERTIES
CUDA_ARCHITECTURES "${CUFFTDX_CUDA_ARCHITECTURES}"
)
add_test(NAME ${EXAMPLE_NAME} COMMAND ${EXAMPLE_TARGET})
set_tests_properties(${EXAMPLE_NAME}
PROPERTIES
LABELS "CUFFTDX_EXAMPLE"
)
add_dependencies(${GROUP_TARGET} ${EXAMPLE_TARGET})
endfunction()

# ###############################################################
# cuFFTDx Examples
# ###############################################################

add_custom_target(cufftdx_examples)

# CUFFTDX_EXAMPLES_CUFFT_CALLBACK
option(CUFFTDX_EXAMPLES_CUFFT_CALLBACK "Build cuFFTDx convolution_performance example with cuFFT callback" OFF)

add_cufftdx_example(cufftdx_examples "cuFFTDx.example.simple_fft_thread" simple_fft_thread.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.simple_fft_thread_fp16" simple_fft_thread_fp16.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.simple_fft_block" simple_fft_block.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.simple_fft_block_half2" simple_fft_block_half2.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.simple_fft_block_fp16" simple_fft_block_fp16.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.simple_fft_block_r2c" simple_fft_block_r2c.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.simple_fft_block_r2c_fp16" simple_fft_block_r2c_fp16.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.simple_fft_block_c2r" simple_fft_block_c2r.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.simple_fft_block_c2r_fp16" simple_fft_block_c2r_fp16.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.simple_fft_block_shared" simple_fft_block_shared.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.simple_fft_block_std_complex" simple_fft_block_std_complex.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.simple_fft_block_cub_io" simple_fft_block_cub_io.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.convolution" convolution.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.convolution_r2c_c2r" convolution_r2c_c2r.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.block_fft_performance" block_fft_performance.cu)
add_cufftdx_example(cufftdx_examples "cuFFTDx.example.block_fft_performance_many" block_fft_performance_many.cu)
add_cufft_and_cufftdx_example(cufftdx_examples "cuFFTDx.example.convolution_performance" convolution_performance.cu)
add_cufftdx_nvrtc_example(cufftdx_examples "cuFFTDx.example.nvrtc_fft_thread" nvrtc_fft_thread.cu)
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
NVCC=nvcc
NVCC_FLAGS=-std=c++17 -O3 --generate-code arch=compute_70,code=sm_70 --generate-code arch=compute_75,code=sm_75 --generate-code arch=compute_80,code=sm_80 --generate-code arch=compute_86,code=sm_86

CUFFTDX_INCLUDE_DIR=../include/
CUDA_BIN_DIR=$(shell dirname `which $(NVCC)`)
CUDA_INCLUDE_DIR=$(CUDA_BIN_DIR)/../include
NVRTC_DEFINES=-DCUDA_INCLUDE_DIR="\"$(CUDA_INCLUDE_DIR)\"" -DCUFFTDX_INCLUDE_DIRS="\"$(CUFFTDX_INCLUDE_DIR)\""

SRCS=$(filter-out nvrtc_fft_thread.cu convolution_performance.cu, $(wildcard *.cu))
TARGETS=$(patsubst %.cu,%,$(SRCS))

NVRTC_SRCS=$(wildcard nvrtc_*.cu)
NVRTC_TARGETS=$(patsubst %.cu,%,$(NVRTC_SRCS))

CUFFT_SRCS=convolution_performance.cu
CUFFT_TARGETS=convolution_performance

$(TARGETS): %: %.cu
$(NVCC) -o $@ $< $(NVCC_FLAGS) -I$(CUFFTDX_INCLUDE_DIR)

$(NVRTC_TARGETS): %: %.cu
$(NVCC) -o $@ $< $(NVCC_FLAGS) -I$(CUFFTDX_INCLUDE_DIR) $(NVRTC_DEFINES) -lnvrtc -lcuda

$(CUFFT_TARGETS): %: %.cu
$(NVCC) -o $@ $< $(NVCC_FLAGS) -I$(CUFFTDX_INCLUDE_DIR) $(NVRTC_DEFINES) -lcuda -lcufft

.PHONY: all clean

all: $(TARGETS) $(NVRTC_TARGETS) $(CUFFT_TARGETS)
$(echo $(NVRTC_TARGETS))

clean:
rm -f $(TARGETS) $(NVRTC_TARGETS) $(CUFFT_TARGETS)

.DEFAULT_GOAL := all
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#include <iostream>
#include <vector>
#include <chrono>
#include <cmath>

#include "block_fft_performance.hpp"

template<unsigned int Arch>
void block_fft_performance() {
using namespace cufftdx;

using fft_base = decltype(Block() + Type<fft_type::c2c>() + Direction<fft_direction::forward>() +
Precision<float>() + SM<Arch>());

static constexpr unsigned int elements_per_thread = 8;
static constexpr unsigned int fft_size = 512;
static constexpr unsigned int ffts_per_block = 1;

cudaStream_t stream;
CUDA_CHECK_AND_EXIT(cudaStreamCreate(&stream))
benchmark_block_fft<fft_base, fft_size, elements_per_thread, ffts_per_block>(stream, true);
CUDA_CHECK_AND_EXIT(cudaStreamDestroy(stream));
}

template<unsigned int Arch>
struct block_fft_performance_functor {
void operator()() { return block_fft_performance<Arch>(); }
};

int main(int, char**) {
return example::sm_runner<block_fft_performance_functor>();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#ifndef CUFFTDX_EXAMPLE_BLOCK_FFT_PERFORMANCE_HPP_
#define CUFFTDX_EXAMPLE_BLOCK_FFT_PERFORMANCE_HPP_

#include <iostream>
#include <vector>
#include <chrono>
#include <cmath>

#include <cuda_runtime_api.h>
#include <cufftdx.hpp>

#include "block_io.hpp"
#include "common.hpp"
#include "random.hpp"

template<class FFT>
__launch_bounds__(FFT::max_threads_per_block) __global__ void block_fft_kernel(typename FFT::value_type* data,
unsigned int repeats,
typename FFT::workspace_type workspace) {
using complex_type = typename FFT::value_type;
extern __shared__ complex_type shared_mem[];

// Local array for thread
complex_type thread_data[FFT::storage_size];

// ID of FFT in CUDA block, in range [0; FFT::ffts_per_block)
const unsigned int local_fft_id = threadIdx.y;
// Load data from global memory to registers
example::io<FFT>::load(data, thread_data, local_fft_id);

// Execute FFT
#pragma unroll 1
for (unsigned int i = 0; i < repeats; i++) {
FFT().execute(thread_data, shared_mem, workspace);
}

// Save results
example::io<FFT>::store(thread_data, data, local_fft_id);
}

template<bool UseGraphs = false>
struct measure {
// Returns execution time in ms
template<unsigned int WarmUpRuns, typename Kernel>
static float execution(Kernel&& kernel, cudaStream_t stream) {
cudaEvent_t startEvent, stopEvent;
CUDA_CHECK_AND_EXIT(cudaEventCreate(&startEvent));
CUDA_CHECK_AND_EXIT(cudaEventCreate(&stopEvent));
CUDA_CHECK_AND_EXIT(cudaDeviceSynchronize());

for (size_t i = 0; i < WarmUpRuns; i++) {
kernel();
}

CUDA_CHECK_AND_EXIT(cudaGetLastError());
CUDA_CHECK_AND_EXIT(cudaDeviceSynchronize());

CUDA_CHECK_AND_EXIT(cudaEventRecord(startEvent, stream));
kernel();
CUDA_CHECK_AND_EXIT(cudaEventRecord(stopEvent, stream));
CUDA_CHECK_AND_EXIT(cudaDeviceSynchronize());

float time;
CUDA_CHECK_AND_EXIT(cudaEventElapsedTime(&time, startEvent, stopEvent));
CUDA_CHECK_AND_EXIT(cudaEventDestroy(startEvent));
CUDA_CHECK_AND_EXIT(cudaEventDestroy(stopEvent));
return time;
}
};

template<class FFTBase, unsigned int S /* Size */, unsigned int EPT, unsigned int FPB = 1, bool UseSuggested = false>
void benchmark_block_fft(const cudaStream_t& stream, bool verbose = false) {
using namespace cufftdx;

// Create complete FFT description, only now we can query EPT and suggested FFTs per block
using FFT_complete = decltype(FFTBase() + Size<S>());

static constexpr unsigned int inside_repeats = 4000;
static constexpr unsigned int kernel_repeats = 1;
static constexpr unsigned int warm_up_runs = 1;

static constexpr unsigned int fft_size = S;
static constexpr unsigned int elements_per_thread = UseSuggested ? FFT_complete::elements_per_thread : EPT;
static constexpr unsigned int ffts_per_block = UseSuggested ? FFT_complete::suggested_ffts_per_block : FPB;

using FFT = decltype(FFT_complete() + ElementsPerThread<elements_per_thread>() + FFTsPerBlock<ffts_per_block>());
using complex_type = typename FFT::value_type;

// Increase max shared memory if needed
CUDA_CHECK_AND_EXIT(cudaFuncSetAttribute(
block_fft_kernel<FFT>, cudaFuncAttributeMaxDynamicSharedMemorySize, FFT::shared_memory_size));

int blocks_per_multiprocessor = 0;
CUDA_CHECK_AND_EXIT(
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&blocks_per_multiprocessor,
block_fft_kernel<FFT>,
FFT::block_dim.x * FFT::block_dim.y * FFT::block_dim.z,
FFT::shared_memory_size));

unsigned int multiprocessor_count = example::get_multiprocessor_count();
unsigned int cuda_blocks = blocks_per_multiprocessor * multiprocessor_count;

// The memory required to run fft (number of complex_type values that must be allocated).
// For r2c, the input consists of fft_size real numbers and the output consists of (fft_size / 2 + 1) complex numbers.
// One memory block will be used to store input and output, so the memory block must fit
// max((fft_size + 1) / 2, fft_size / 2 + 1) = (fft_size / 2 + 1) complex numbers.
// For c2r, the input consists of (fft_size / 2 + 1) complex numbers and the output consists of fft_size real numbers,
// so the minimal required memory size is the same.
unsigned int input_size =
ffts_per_block * cuda_blocks * (type_of<FFT>::value == fft_type::c2c ? fft_size : (fft_size / 2 + 1));

// Host data
std::vector<complex_type> input =
example::get_random_complex_data<typename complex_type::value_type>(input_size, -10, 10);

// Device data
complex_type* device_buffer;
auto size_bytes = input.size() * sizeof(complex_type);
CUDA_CHECK_AND_EXIT(cudaMalloc(&device_buffer, size_bytes));
// Copy host to device
CUDA_CHECK_AND_EXIT(cudaMemcpy(device_buffer, input.data(), size_bytes, cudaMemcpyHostToDevice));
CUDA_CHECK_AND_EXIT(cudaDeviceSynchronize());

cudaError_t error_code = cudaSuccess;
auto workspace = make_workspace<FFT>(error_code);
CUDA_CHECK_AND_EXIT(error_code);
CUDA_CHECK_AND_EXIT(cudaDeviceSynchronize());
CUDA_CHECK_AND_EXIT(cudaGetLastError());

// Measure performance of N trials
double ms_n = measure<>::execution<warm_up_runs>(
[&]() {
for (unsigned int i = 0; i < kernel_repeats; i++) {
block_fft_kernel<FFT><<<cuda_blocks, FFT::block_dim, FFT::shared_memory_size, stream>>>(
device_buffer, inside_repeats, workspace);
}
},
stream);

// Check kernel error
CUDA_CHECK_AND_EXIT(cudaGetLastError());

// Copy host to device
CUDA_CHECK_AND_EXIT(cudaMemcpy(device_buffer, input.data(), size_bytes, cudaMemcpyHostToDevice));
CUDA_CHECK_AND_EXIT(cudaDeviceSynchronize());

// Measure performance of 2*N trials
double ms_n2 = measure<>::execution<warm_up_runs>(
[&]() {
for (unsigned int i = 0; i < kernel_repeats; i++) {
block_fft_kernel<FFT><<<cuda_blocks, FFT::block_dim, FFT::shared_memory_size, stream>>>(
device_buffer, 2 * inside_repeats, workspace);
}
},
stream);

CUDA_CHECK_AND_EXIT(cudaFree(device_buffer));

// Time for N repeats without overhead
auto time_n = ms_n2 - ms_n;
double gflops = 1.0 * kernel_repeats * inside_repeats * ffts_per_block * cuda_blocks * 5.0 * fft_size *
(std::log(fft_size) / std::log(2)) / time_n / 1000000.0;

static const std::string fft_type_name = type_of<FFT>::value == fft_type::c2c ? "c2c" :
(type_of<FFT>::value == fft_type::c2r ? "c2r" :
"r2c");
if (verbose) {
std::cout << "FFT type: " << fft_type_name << std::endl;
std::cout << "FFT size: " << fft_size << std::endl;
std::cout << "FFTs elements per thread: " << FFT::elements_per_thread << std::endl;
std::cout << "FFTs per block: " << ffts_per_block << std::endl;
std::cout << "CUDA blocks: " << cuda_blocks << std::endl;
std::cout << "Blocks per multiprocessor: " << blocks_per_multiprocessor << std::endl;
std::cout << "FFTs run: " << ffts_per_block * cuda_blocks << std::endl;
std::cout << "Shared memory: " << FFT::shared_memory_size << std::endl;
std::cout << "Avg Time [ms_n]: " << time_n / (inside_repeats * kernel_repeats) << std::endl;
std::cout << "Time (all) [ms_n]: " << time_n << std::endl;
std::cout << "Performance [GFLOPS]: " << gflops << std::endl;
} else {
std::cout << fft_type_name << ", " << fft_size << ", " << gflops << ", "
<< time_n / (inside_repeats * kernel_repeats) << ", " << std::endl;
}
}

#endif // CUFFTDX_EXAMPLE_BLOCK_FFT_PERFORMANCE_HPP_
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#include <iostream>
#include <vector>
#include <chrono>
#include <cmath>

#include "block_fft_performance.hpp"

template<unsigned int Arch,
unsigned int FFTSize,
cufftdx::fft_type FFTType,
class PrecisionType,
cufftdx::fft_direction FFTDirection = cufftdx::fft_direction::forward,
bool UseSuggested = true,
unsigned int ElementsPerThread = 8,
unsigned int FFTsPerBlock = 1>
void block_fft_performance(const cudaStream_t& stream, bool verbose) {
using namespace cufftdx;

using FFT_base = decltype(Block() + Type<FFTType>() + Precision<PrecisionType>() + SM<Arch>());

using FFT_with_direction = typename std::
conditional<FFTType == fft_type::c2c, decltype(FFT_base() + Direction<FFTDirection>()), FFT_base>::type;

benchmark_block_fft<FFT_with_direction, FFTSize, ElementsPerThread, FFTsPerBlock, UseSuggested>(stream, verbose);

if (verbose)
std::cout << std::endl;
}

template<unsigned int Arch>
struct block_fft_performance_functor {
void operator()() {
using namespace cufftdx;

cudaStream_t stream;
CUDA_CHECK_AND_EXIT(cudaStreamCreate(&stream))

bool default_verbose = false;


// To specify EPT and FPB values, set UsedSuggested to false.
// FFTDirection is used if and only if FFTType is C2C.
// Below is an example of a test run with specified EPT and FPB values.

block_fft_performance<Arch, 137, fft_type::c2c, float, fft_direction::forward, false, 8, 1>(stream,
default_verbose);

block_fft_performance<Arch, 137, fft_type::c2c, float>(stream, default_verbose);
block_fft_performance<Arch, 251, fft_type::c2c, float>(stream, default_verbose);
block_fft_performance<Arch, 512, fft_type::c2c, float>(stream, default_verbose);
block_fft_performance<Arch, 1024, fft_type::c2c, float>(stream, default_verbose);
block_fft_performance<Arch, 2048, fft_type::c2c, float>(stream, default_verbose);
block_fft_performance<Arch, 4096, fft_type::c2c, float>(stream, default_verbose);

block_fft_performance<Arch, 137, fft_type::c2c, float, fft_direction::inverse>(stream, default_verbose);
block_fft_performance<Arch, 251, fft_type::c2c, float, fft_direction::inverse>(stream, default_verbose);
block_fft_performance<Arch, 512, fft_type::c2c, float, fft_direction::inverse>(stream, default_verbose);
block_fft_performance<Arch, 1024, fft_type::c2c, float, fft_direction::inverse>(stream, default_verbose);
block_fft_performance<Arch, 2048, fft_type::c2c, float, fft_direction::inverse>(stream, default_verbose);
block_fft_performance<Arch, 4096, fft_type::c2c, float, fft_direction::inverse>(stream, default_verbose);

block_fft_performance<Arch, 137, fft_type::r2c, float>(stream, default_verbose);
block_fft_performance<Arch, 251, fft_type::r2c, float>(stream, default_verbose);
block_fft_performance<Arch, 512, fft_type::r2c, float>(stream, default_verbose);
block_fft_performance<Arch, 1024, fft_type::r2c, float>(stream, default_verbose);
block_fft_performance<Arch, 2048, fft_type::r2c, float>(stream, default_verbose);
block_fft_performance<Arch, 4096, fft_type::r2c, float>(stream, default_verbose);

block_fft_performance<Arch, 137, fft_type::c2r, float>(stream, default_verbose);
block_fft_performance<Arch, 251, fft_type::c2r, float>(stream, default_verbose);
block_fft_performance<Arch, 512, fft_type::c2r, float>(stream, default_verbose);
block_fft_performance<Arch, 1024, fft_type::c2r, float>(stream, default_verbose);
block_fft_performance<Arch, 2048, fft_type::c2r, float>(stream, default_verbose);
block_fft_performance<Arch, 4096, fft_type::c2r, float>(stream, default_verbose);

CUDA_CHECK_AND_EXIT(cudaStreamDestroy(stream));
}
};

int main(int, char**) {
return example::sm_runner<block_fft_performance_functor>();
}
Loading

0 comments on commit 104d454

Please sign in to comment.