-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
374 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Manifest.toml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
name = "LinearFold" | ||
uuid = "aebc047d-cbe2-4ea1-a181-a2987ad69fed" | ||
authors = ["Marco Matthies <[email protected]>"] | ||
version = "0.1.0" | ||
|
||
[deps] | ||
LinearFold_jll = "492392ba-fc20-5d72-afbd-f61f3c69e3d5" | ||
LinearPartition_jll = "e9cb6f04-5a86-57c9-b8cc-25aa5bdf9fd6" | ||
Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,313 @@ | ||
module LinearFold | ||
|
||
using Unitful | ||
|
||
# TODO | ||
# - is_sharpturn | ||
# means allow hairpin lengths < 3 | ||
# - doesn't seem to work for energy (eval mode)? | ||
# energy("GGGACCC", "(((.)))") == energy("GGGACCC", "(((.)))"; is_sharpturn=true) | ||
# | ||
# LinearPartition | ||
# - is it possible to run mea with threshknot together? | ||
# - forest_file output? | ||
# | ||
# tests | ||
# - test kwarg options: | ||
# model, verbose, beamsize, is_sharpturn | ||
|
||
module Private | ||
|
||
using Unitful | ||
import LinearFold_jll | ||
import LinearPartition_jll | ||
|
||
function cmd_linearfold(; model::Symbol=:vienna, | ||
verbose::Bool=false, | ||
beamsize::Int=100, | ||
is_sharpturn::Bool=false, | ||
is_eval::Bool=false, | ||
is_constraints::Bool=false, | ||
zuker_subopt::Bool=false, | ||
delta::Quantity=5.0u"kcal/mol") | ||
if model == :vienna | ||
bin = LinearFold_jll.linearfold_v() | ||
elseif model == :contrafold | ||
bin = LinearFold_jll.linearfold_c() | ||
else | ||
throw(ArgumentError("unknown model $model, options are: :vienna or :contrafold")) | ||
end | ||
delta_nounit = ustrip(uconvert(u"kcal/mol", delta)) | ||
cmd = `$bin $beamsize $(Int(is_sharpturn)) $(Int(verbose)) $(Int(is_eval)) | ||
$(Int(is_constraints)) $(Int(zuker_subopt)) $delta_nounit` | ||
return cmd | ||
end | ||
|
||
function check_constraints(seq, constraints) | ||
if constraints != "" | ||
if length(constraints) != length(seq) | ||
throw(ArgumentError("constraints and seq must have same size")) | ||
end | ||
if Set(collect(constraints)) != Set(['?', '.', '(', ')']) | ||
throw(ArgumentError("constraints can only contain the following characters: '?', '.', '(', ')'")) | ||
end | ||
end | ||
end | ||
|
||
function cmd_linearpartition(; model::Symbol=:vienna, | ||
verbose::Bool=false, | ||
beamsize::Int=100, | ||
is_sharpturn::Bool=false, | ||
bpp_file::AbstractString="", | ||
bpp_prefix::AbstractString="", | ||
pf_only::Bool=false, | ||
bpp_cutoff::Float64=0.0, | ||
forest_file::AbstractString="", | ||
mea::Bool=false, | ||
gamma::Float64=3.0, | ||
threshknot::Bool=false, | ||
threshold::Float64=0.3, | ||
threshknot_prefix::AbstractString="", | ||
mea_prefix::AbstractString="", | ||
mea_bpseq::Bool=false) | ||
if model == :vienna | ||
bin = LinearPartition_jll.linearpartition_v() | ||
elseif model == :contrafold | ||
bin = LinearPartition_jll.linearpartition_c() | ||
else | ||
throw(ArgumentError("unknown model $model, options are: :vienna or :contrafold")) | ||
end | ||
cmd = `$bin $beamsize $(Int(is_sharpturn)) $(Int(verbose)) $bpp_file $bpp_prefix | ||
$(Int(pf_only)) $bpp_cutoff $forest_file $(Int(mea)) $gamma $(Int(threshknot)) | ||
$threshold $threshknot_prefix $mea_prefix $(Int(mea_bpseq))` | ||
return cmd | ||
end | ||
|
||
function run_cmd(cmd, inputstr; nlines::Int=1, verbose::Bool=false) | ||
if count(c -> c == '\n', inputstr) + 1 > nlines | ||
throw(ArgumentError("too many lines detected in input (maybe extra newline chars?)")) | ||
end | ||
outbuf = IOBuffer() | ||
errbuf = IOBuffer() | ||
run(pipeline(cmd; stdin=IOBuffer(inputstr), stdout=outbuf, stderr=errbuf)) | ||
out = String(take!(outbuf)) | ||
err = String(take!(errbuf)) | ||
if verbose | ||
println(out) | ||
println(err) | ||
end | ||
return out, err | ||
end | ||
|
||
function parseline_structure_energy(line) | ||
structure, en_str = split(line, ' ') | ||
en = parse(Float64, replace(en_str, "(" => "", ")" => "")) * u"kcal/mol" | ||
return String(structure), en | ||
end | ||
|
||
function parseline_energy(line) | ||
dG = parse(Float64, split(line, ':')[2] |> s -> split(s, ' ')[2]) * u"kcal/mol" | ||
return dG | ||
end | ||
|
||
function parse_bpseq_format(bpseq::AbstractString) | ||
# bpseq format | ||
# TODO: reference? | ||
# Format: | ||
# base_number nucleobase(A,C,G,U,etc) base_pair_number(or 0 if unpaired) | ||
# e.g. for sequence "GAC", secondary structure "(.)" we have: | ||
# 1 G 3 | ||
# 2 A 0 | ||
# 3 C 1 | ||
pt = Int[] | ||
seq = Char[] | ||
k = 0 | ||
for line in eachline(IOBuffer(bpseq)) | ||
if isempty(strip(line)) | ||
continue | ||
end | ||
k += 1 | ||
a = split(line) | ||
if length(a) != 3 | ||
error("illegal line in bpseq format: $line") | ||
end | ||
i = parse(Int, a[1]) | ||
nt = String(a[2]) | ||
j = parse(Int, a[3]) | ||
if length(nt) != 1 | ||
error("nucleobase must be single letter only in line: $line") | ||
end | ||
if i != k | ||
error("nucleotides must be numbered consecutively in line: $line") | ||
end | ||
if i < 0 || j < 0 | ||
error("negative nucleotide numbers in line: $line") | ||
end | ||
if i == j | ||
error("nucleotide paired to itself in line: $line") | ||
end | ||
if j != 0 && i > j && pt[j] != i | ||
error("target base has different base pairing partner: $line") | ||
end | ||
push!(seq, nt[1]) | ||
push!(pt, j) | ||
end | ||
return join(seq), pt | ||
end | ||
|
||
end # module Private | ||
|
||
import .Private: cmd_linearfold, cmd_linearpartition, | ||
check_constraints, run_cmd, parseline_structure_energy, | ||
parseline_energy, parse_bpseq_format | ||
|
||
function energy(seq::AbstractString, structure::AbstractString; | ||
model::Symbol=:vienna, | ||
verbose::Bool=false, | ||
is_sharpturn::Bool=false) | ||
is_eval = true | ||
cmd = cmd_linearfold(; model, verbose, is_sharpturn, is_eval) | ||
out, err = run_cmd(cmd, "$seq\n$structure"; nlines=2, verbose) | ||
line = split(out, '\n')[end-1] | ||
_, en = parseline_structure_energy(line) | ||
return en | ||
end | ||
|
||
function mfe(seq::AbstractString; | ||
model::Symbol=:vienna, | ||
verbose::Bool=false, | ||
beamsize::Int=100, | ||
is_sharpturn::Bool=false, | ||
constraints::AbstractString="") | ||
if constraints != "" | ||
is_constraints = true | ||
input = "$seq\n$constraints" | ||
nlines = 2 | ||
check_constraints(seq, constraints) | ||
else | ||
is_constraints = false | ||
input = seq | ||
nlines = 1 | ||
end | ||
cmd = cmd_linearfold(; model, beamsize, is_sharpturn, verbose, | ||
is_constraints) | ||
out, err = run_cmd(cmd, input; nlines, verbose) | ||
line = split(out, '\n')[end-1] | ||
structure, en = parseline_structure_energy(line) | ||
return en, structure | ||
end | ||
|
||
function zuker_subopt(seq::AbstractString; | ||
model::Symbol=:vienna, | ||
verbose::Bool=false, | ||
beamsize::Int=100, | ||
is_sharpturn::Bool=false, | ||
constraints::AbstractString="", | ||
delta::Quantity=5.0u"kcal/mol") | ||
if constraints != "" | ||
is_constraints = true | ||
input = "$seq\n$constraints" | ||
nlines = 2 | ||
check_constraints(seq, constraints) | ||
else | ||
is_constraints = false | ||
input = seq | ||
nlines = 1 | ||
end | ||
cmd = cmd_linearfold(; model, beamsize, is_sharpturn, verbose, | ||
is_constraints, zuker_subopt=true, delta) | ||
out, err = run_cmd(cmd, input; nlines, verbose) | ||
# parse suboptimal structures | ||
subopts = Tuple{typeof(1.0u"kcal/mol"), String}[] | ||
in_subopt = false | ||
for line in eachline(IOBuffer(out)) | ||
if ! in_subopt | ||
if startswith(line, "Zuker suboptimal structures...") | ||
in_subopt = true | ||
end | ||
continue | ||
else | ||
structure, en = parseline_structure_energy(line) | ||
push!(subopts, (en, structure)) | ||
end | ||
end | ||
return subopts | ||
end | ||
|
||
function partfn(seq::AbstractString; | ||
model::Symbol=:vienna, | ||
verbose::Bool=false, | ||
beamsize::Int=100, | ||
is_sharpturn::Bool=false) | ||
cmd = cmd_linearpartition(; model, beamsize, is_sharpturn, | ||
verbose, pf_only=true) | ||
out, err = run_cmd(cmd, seq; verbose) | ||
dG_ensemble = parseline_energy(err) | ||
return dG_ensemble | ||
end | ||
|
||
|
||
function bpp(seq::AbstractString; | ||
model::Symbol=:vienna, | ||
verbose::Bool=false, | ||
beamsize::Int=100, | ||
is_sharpturn::Bool=false, | ||
bpp_cutoff::Float64=0.0) | ||
bpp_file, io_bpp = mktemp(cleanup=false) | ||
cmd = cmd_linearpartition(; model, beamsize, is_sharpturn, | ||
verbose, bpp_file, bpp_cutoff) | ||
out, err = run_cmd(cmd, seq; verbose) | ||
dG_ensemble = parseline_energy(err) | ||
# read bpp_file | ||
bpp = Dict{Tuple{Int,Int}, Float64}() | ||
for line in eachline(io_bpp) | ||
length(line) == 0 && continue | ||
a = split(line, ' ') | ||
if length(a) == 3 | ||
i = parse(Int, a[1]) | ||
j = parse(Int, a[2]) | ||
pij = parse(Float64, a[3]) | ||
bpp[(i,j)] = pij | ||
else | ||
error("strange line '$line' in bpp_file $bpp_file") | ||
end | ||
end | ||
close(io_bpp) | ||
Base.Filesystem.rm(bpp_file) | ||
|
||
return dG_ensemble, bpp | ||
end | ||
|
||
function mea(seq::AbstractString; | ||
model::Symbol=:vienna, | ||
verbose::Bool=false, | ||
beamsize::Int=100, | ||
is_sharpturn::Bool=false, | ||
gamma::Float64=3.0) | ||
cmd = cmd_linearpartition(; model, verbose, beamsize, | ||
is_sharpturn, mea=true, gamma) | ||
out, err = run_cmd(cmd, seq; verbose) | ||
structure = String(split(out, '\n')[2]) | ||
dG_ensemble = parseline_energy(err) | ||
return dG_ensemble, structure | ||
end | ||
|
||
function threshknot(seq::AbstractString; | ||
model::Symbol=:vienna, | ||
verbose::Bool=false, | ||
beamsize::Int=100, | ||
is_sharpturn::Bool=false, | ||
threshold::Float64=0.3) | ||
cmd = cmd_linearpartition(; model, verbose, beamsize, | ||
is_sharpturn, threshknot=true, threshold) | ||
out, err = run_cmd(cmd, seq; verbose) | ||
if verbose | ||
# skip over first line of output in verbose mode | ||
out = join(split(out, '\n')[2:end], '\n') | ||
end | ||
_, pt = parse_bpseq_format(out) | ||
dG_ensemble = parseline_energy(err) | ||
return dG_ensemble, pt | ||
end | ||
|
||
end # module |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[deps] | ||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
using Test | ||
using LinearFold: Unitful, @u_str | ||
using LinearFold: bpp, energy, mea, mfe, partfn, threshknot, zuker_subopt | ||
|
||
@testset "energy" begin | ||
@test energy("GGGAAACCC", | ||
"(((...)))") isa Unitful.Quantity | ||
end | ||
|
||
@testset "mfe" begin | ||
seq = "GGGAAACCC" | ||
dG, structure = mfe(seq) | ||
@test dG isa Unitful.Quantity | ||
@test length(structure) == length(seq) | ||
end | ||
|
||
@testset "zuker_subopt" begin | ||
seq = "GGGGGGAAAACCCCCAAAGGGGAAAAACCCCCAAAGGGGG" | ||
subopts = zuker_subopt(seq) | ||
@test length(subopts) > 0 | ||
@test subopts isa Vector{Tuple{typeof(1.0u"kcal/mol"),String}} | ||
end | ||
|
||
@testset "partfn" begin | ||
seq = "GGGAAACCC" | ||
dG = partfn(seq) | ||
@test dG isa Unitful.Quantity | ||
end | ||
|
||
@testset "bpp" begin | ||
seq = "GGGAAACCC" | ||
dG, p = bpp(seq) | ||
@test dG isa Unitful.Quantity | ||
@test p isa Dict{Tuple{Int,Int},Float64} | ||
end | ||
|
||
@testset "mea" begin | ||
seq = "GGGAAAACCCC" | ||
dG, structure = mea(seq) | ||
@test dG isa Unitful.Quantity | ||
@test length(structure) == length(seq) | ||
end | ||
|
||
@testset "threshknot" begin | ||
seq = "GGGGAAAACCCC" | ||
dG, pt = threshknot(seq) | ||
@test dG isa Unitful.Quantity | ||
@test length(pt) == length(seq) | ||
end |