Skip to content

Commit 8261050

Browse files
committed
Merge discvr-26.3 to develop
2 parents 3936e0b + fce0dac commit 8261050

13 files changed

Lines changed: 292 additions & 89 deletions

File tree

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import org.labkey.api.data.ConvertHelper;
66
import org.labkey.api.pipeline.PipelineJobException;
77
import org.labkey.api.pipeline.WorkDirectory;
8+
import org.labkey.api.util.FileUtil;
89
import org.labkey.vfs.FileSystemLike;
910

1011
import java.io.File;
@@ -85,20 +86,25 @@ private static boolean verifyOrCreateCachedIndex(PipelineContext ctx, @Nullable
8586
if (wd != null)
8687
{
8788
String val = ctx.getJob().getParameters().get(COPY_LOCALLY);
88-
boolean doCopy = forceCopyLocal || (val == null || ConvertHelper.convert(val, Boolean.class));
89+
boolean doCopy = forceCopyLocal || (val == null || Boolean.TRUE.equals(ConvertHelper.convert(val, Boolean.class)));
8990

9091
if (doCopy)
9192
{
9293
ctx.getLogger().info("copying index files to work location");
93-
File localSharedDir = new File(wd.getDir().toNioPathForRead().toFile(), "Shared");
94-
File destination = new File(localSharedDir, localName);
94+
File localSharedDir = FileUtil.appendName(wd.getDir().toNioPathForRead().toFile(), "Shared");
95+
File destination = FileUtil.appendName(localSharedDir, localName);
9596
ctx.getLogger().debug(destination.getPath());
9697
File[] files = webserverIndexDir.listFiles();
9798
if (files == null)
9899
{
99100
return false;
100101
}
101102

103+
if (!destination.exists())
104+
{
105+
FileUtil.mkdirs(destination);
106+
}
107+
102108
destination = wd.inputFile(FileSystemLike.wrapFile(webserverIndexDir), FileSystemLike.wrapFile(destination), true).toNioPathForRead().toFile();
103109
if (output != null && !destination.equals(webserverIndexDir))
104110
{

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,10 +272,10 @@ else if (sf.getFilePath() == null)
272272
return;
273273
}
274274

275-
File root = new File(sf.getFilePath());
275+
File root = new File(sf.getFilePath()).getParentFile();
276276
if (!root.exists())
277277
{
278-
log.error("Run fileroot does not exist: " + runId + " / " + root.getPath());
278+
log.error("Run file root does not exist. runId: " + runId + " / jobId: " + sf.getRowId() + " / " + root.getPath());
279279
return;
280280
}
281281

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@
127127
import org.labkey.sequenceanalysis.run.analysis.SawfishJointCallingHandler;
128128
import org.labkey.sequenceanalysis.run.analysis.SequenceBasedTypingAnalysis;
129129
import org.labkey.sequenceanalysis.run.analysis.SnpCountAnalysis;
130+
import org.labkey.sequenceanalysis.run.analysis.SpecHlaAnalysis;
130131
import org.labkey.sequenceanalysis.run.analysis.SubreadAnalysis;
131132
import org.labkey.sequenceanalysis.run.analysis.UnmappedReadExportHandler;
132133
import org.labkey.sequenceanalysis.run.analysis.ViralAnalysis;
@@ -343,6 +344,7 @@ public static void registerPipelineSteps()
343344
SequencePipelineService.get().registerPipelineStep(new PindelAnalysis.Provider());
344345
SequencePipelineService.get().registerPipelineStep(new PbsvAnalysis.Provider());
345346
SequencePipelineService.get().registerPipelineStep(new GenrichStep.Provider());
347+
SequencePipelineService.get().registerPipelineStep(new SpecHlaAnalysis.Provider());
346348
SequencePipelineService.get().registerPipelineStep(new SawfishAnalysis.Provider());
347349

348350
SequencePipelineService.get().registerPipelineStep(new PARalyzerAnalysis.Provider());

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/TaskFileManagerImpl.java

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.labkey.api.util.FileType;
3333
import org.labkey.api.util.FileUtil;
3434
import org.labkey.api.util.Pair;
35+
import org.labkey.api.util.Path;
3536
import org.labkey.api.writer.PrintWriters;
3637
import org.labkey.sequenceanalysis.SequenceAnalysisManager;
3738
import org.labkey.sequenceanalysis.SequenceAnalysisSchema;
@@ -57,6 +58,7 @@
5758
import java.util.HashSet;
5859
import java.util.List;
5960
import java.util.Map;
61+
import java.util.Objects;
6062
import java.util.Set;
6163
import java.util.stream.Collectors;
6264

@@ -250,7 +252,7 @@ private SequenceAnalysisJobSupport getSequenceSupport()
250252

251253
private File getDeferredDeleteLog(boolean create)
252254
{
253-
File logFile = new File(getSupport().getAnalysisDirectory().toNioPathForRead().toFile(), "toDelete.txt");
255+
File logFile = FileUtil.appendName(getSupport().getAnalysisDirectory().toNioPathForRead().toFile(), "toDelete.txt");
254256
if (create && !logFile.exists())
255257
{
256258
try
@@ -269,7 +271,7 @@ private File getDeferredDeleteLog(boolean create)
269271

270272
private File getMetricsLog(boolean create)
271273
{
272-
File logFile = new File(getSupport().getAnalysisDirectory().toNioPathForRead().toFile(), "metricsToCreate.txt");
274+
File logFile = FileUtil.appendName(getSupport().getAnalysisDirectory().toNioPathForRead().toFile(), "metricsToCreate.txt");
273275
if (create && !logFile.exists())
274276
{
275277
try (PrintWriter writer = PrintWriters.getPrintWriter(logFile))
@@ -385,10 +387,10 @@ private File convertRelPathToFile(String line)
385387
return null;
386388
}
387389

388-
File f = new File(_workLocation, line);
390+
File f = FileUtil.appendPath(_workLocation, Path.parse(line));
389391
if (!f.exists())
390392
{
391-
File test = new File(getSupport().getAnalysisDirectory().toNioPathForRead().toFile(), line);
393+
File test = FileUtil.appendPath(getSupport().getAnalysisDirectory().toNioPathForRead().toFile(), Path.parse(line));
392394
if (test.exists())
393395
{
394396
f = test;
@@ -859,7 +861,7 @@ private void processCopiedFile(File original, File moved, Collection<RecordedAct
859861
_job.getLogger().debug("Directory has " + moved.listFiles().length + " children");
860862
for (File f : moved.listFiles())
861863
{
862-
processCopiedFile(new File(original, f.getName()), f, actions, resumer);
864+
processCopiedFile(FileUtil.appendName(original, f.getName()), f, actions, resumer);
863865
}
864866
}
865867
}
@@ -910,23 +912,21 @@ public void cleanup(Collection<RecordedAction> actions, @Nullable AbstractResume
910912
_job.getLogger().debug("discarding copied inputs");
911913
_wd.discardCopiedInputs();
912914

913-
if (!_wd.getDir().exists())
915+
if (_wd.getDir().exists())
914916
{
915-
throw new PipelineJobException("work dir does not exist: " + _wd.getDir());
916-
}
917-
918-
//NOTE: preserving relative locations is a pain. therefore we copy all outputs, including directories
919-
//then sort out which files were specified as named outputs later
920-
for (File input : _wd.getDir().toNioPathForRead().toFile().listFiles())
921-
{
922-
if (input.getName().matches("^core.[0-9]+$") || input.getName().endsWith(".hprof"))
917+
//NOTE: preserving relative locations is a pain. therefore we copy all outputs, including directories
918+
//then sort out which files were specified as named outputs later
919+
for (File input : Objects.requireNonNull(_wd.getDir().toNioPathForRead().toFile().listFiles()))
923920
{
924-
_job.getLogger().debug("Deleting core/hprof file: " + input.getPath());
925-
input.delete();
926-
continue;
927-
}
921+
if (input.getName().matches("^core.[0-9]+$") || input.getName().endsWith(".hprof"))
922+
{
923+
_job.getLogger().debug("Deleting core/hprof file: " + input.getPath());
924+
input.delete();
925+
continue;
926+
}
928927

929-
copyFile(input, actions, resumer);
928+
copyFile(input, actions, resumer);
929+
}
930930
}
931931
}
932932
else
@@ -987,7 +987,7 @@ private void doCopyFile(File input, Collection<RecordedAction> actions, @Nullabl
987987
}
988988

989989
String path = _wd.getRelativePath(FileSystemLike.wrapFile(input));
990-
File dest = new File(getSupport().getAnalysisDirectory().toNioPathForRead().toFile(), path);
990+
File dest = FileUtil.appendPath(getSupport().getAnalysisDirectory().toNioPathForRead().toFile(), Path.parse(path));
991991
_job.getLogger().debug("to: " + dest.getPath());
992992

993993
boolean doMove = true;
@@ -1076,7 +1076,7 @@ private File decompressFile(File i, List<RecordedAction> actions)
10761076
//NOTE: we use relative paths in all cases here
10771077
_job.getLogger().info("Decompressing file: " + i.getPath());
10781078

1079-
unzipped = new File(_wd.getDir().toNioPathForRead().toFile(), i.getName().replaceAll(".gz$", ""));
1079+
unzipped = FileUtil.appendName(_wd.getDir().toNioPathForRead().toFile(), i.getName().replaceAll(".gz$", ""));
10801080
unzipped = Compress.decompressGzip(i, unzipped);
10811081
_job.getLogger().debug("\tunzipped: " + unzipped.getPath());
10821082

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
package org.labkey.sequenceanalysis.run.analysis;
2+
3+
import htsjdk.samtools.SAMFileHeader;
4+
import org.apache.commons.io.FileUtils;
5+
import org.labkey.api.pipeline.PipelineJobException;
6+
import org.labkey.api.sequenceanalysis.model.AnalysisModel;
7+
import org.labkey.api.sequenceanalysis.model.Readset;
8+
import org.labkey.api.sequenceanalysis.pipeline.AbstractAnalysisStepProvider;
9+
import org.labkey.api.sequenceanalysis.pipeline.AnalysisOutputImpl;
10+
import org.labkey.api.sequenceanalysis.pipeline.AnalysisStep;
11+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
12+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
13+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
14+
import org.labkey.api.sequenceanalysis.pipeline.SamSorter;
15+
import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner;
16+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
17+
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
18+
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
19+
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
20+
import org.labkey.api.util.FileUtil;
21+
import org.labkey.api.util.Path;
22+
23+
import java.io.File;
24+
import java.io.IOException;
25+
import java.util.ArrayList;
26+
import java.util.Arrays;
27+
import java.util.List;
28+
29+
public class SpecHlaAnalysis extends AbstractCommandPipelineStep<SimpleScriptWrapper> implements AnalysisStep
30+
{
31+
public SpecHlaAnalysis(PipelineStepProvider<?> provider, PipelineContext ctx)
32+
{
33+
super(provider, ctx, new SimpleScriptWrapper(ctx.getLogger()));
34+
}
35+
36+
public static class Provider extends AbstractAnalysisStepProvider<SpecHlaAnalysis>
37+
{
38+
public Provider()
39+
{
40+
super("SpecHlaStep", "SpecHLA", null, "This will run SpecHLA for HLA genotyping from WGS/WXS data. This should use a BAM aligned to a custom HLA DB, rather than aligned to the full genome", Arrays.asList(
41+
42+
), null, "https://github.com/deepomicslab/SpecHLA/");
43+
}
44+
45+
@Override
46+
public SpecHlaAnalysis create(PipelineContext ctx)
47+
{
48+
return new SpecHlaAnalysis(this, ctx);
49+
}
50+
}
51+
52+
@Override
53+
public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException
54+
{
55+
AnalysisOutputImpl output = new AnalysisOutputImpl();
56+
57+
File gzippedFasta = referenceGenome.getWorkingFastaFileGzipped();
58+
if (!gzippedFasta.exists())
59+
{
60+
throw new PipelineJobException("Missing file: " + gzippedFasta.getPath());
61+
}
62+
63+
File doneFile = FileUtil.appendName(outputDir, FileUtil.getBaseName(inputBam) + ".subset.done");
64+
output.addIntermediateFile(doneFile);
65+
66+
File subsetBam = FileUtil.appendName(outputDir, FileUtil.getBaseName(inputBam) + ".subset.bam");
67+
SamtoolsRunner sr = new SamtoolsRunner(getWrapper().getLogger());
68+
if (doneFile.exists())
69+
{
70+
getPipelineCtx().getLogger().debug("Done file exists, skipping samtools view");
71+
}
72+
else
73+
{
74+
sr.execute(Arrays.asList(
75+
sr.getSamtoolsPath().getPath(),
76+
"view",
77+
"-h",
78+
"-F", "12", //This selects pairs where either mate is mapped
79+
"-T", gzippedFasta.getPath(),
80+
"-o", subsetBam.getPath(),
81+
inputBam.getPath()
82+
));
83+
}
84+
output.addIntermediateFile(subsetBam);
85+
86+
File queryNameSortBam = FileUtil.appendName(outputDir, FileUtil.getBaseName(inputBam) + ".querySort.bam");
87+
if (doneFile.exists())
88+
{
89+
getPipelineCtx().getLogger().debug("Done file exists, skipping samtools sort");
90+
}
91+
else
92+
{
93+
new SamSorter(getPipelineCtx().getLogger()).execute(subsetBam, queryNameSortBam, SAMFileHeader.SortOrder.queryname);
94+
}
95+
output.addIntermediateFile(queryNameSortBam);
96+
97+
File fq1 = FileUtil.appendName(outputDir, FileUtil.getBaseName(inputBam) + ".R1.fastq.gz");
98+
File fq2 = FileUtil.appendName(outputDir, FileUtil.getBaseName(inputBam) + ".R2.fastq.gz");
99+
if (doneFile.exists())
100+
{
101+
getPipelineCtx().getLogger().debug("Done file exists, skipping samtools fastq");
102+
}
103+
else
104+
{
105+
sr.execute(Arrays.asList(
106+
sr.getSamtoolsPath().getPath(),
107+
"fastq",
108+
"-1",
109+
fq1.getPath(),
110+
"-2",
111+
fq2.getPath(),
112+
queryNameSortBam.getPath()
113+
));
114+
}
115+
output.addIntermediateFile(fq1);
116+
output.addIntermediateFile(fq2);
117+
118+
try
119+
{
120+
FileUtils.touch(doneFile);
121+
}
122+
catch (IOException e)
123+
{
124+
throw new PipelineJobException(e);
125+
}
126+
127+
File specHlaExe = AbstractCommandWrapper.resolveFileInPath("spechla", null, true);
128+
129+
List<String> toRun = new ArrayList<>(Arrays.asList(
130+
specHlaExe.getPath(),
131+
"-n",
132+
"specHLA",
133+
"-u",
134+
"1", // 1 = exon. 0 = full-length
135+
"-1",
136+
fq1.getPath(),
137+
"-2",
138+
fq2.getPath(),
139+
"-o",
140+
outputDir.getPath()
141+
));
142+
143+
Integer maxThreads = SequencePipelineService.get().getMaxThreads(getWrapper().getLogger());
144+
if (maxThreads != null)
145+
{
146+
toRun.add("-j");
147+
toRun.add(maxThreads.toString());
148+
}
149+
150+
getWrapper().execute(toRun);
151+
152+
File spechlaDir = FileUtil.appendName(outputDir, "specHLA");
153+
File outFile = FileUtil.appendName(spechlaDir, "hla.result.txt");
154+
if (!outFile.exists())
155+
{
156+
throw new PipelineJobException("SpecHLA result file does not exist: " + outFile.getPath());
157+
}
158+
159+
output.addIntermediateFile(spechlaDir);
160+
File copiedFile = FileUtil.appendName(outputDir, outFile.getName());
161+
try
162+
{
163+
FileUtils.copyFile(outFile, copiedFile);
164+
}
165+
catch (IOException e)
166+
{
167+
throw new PipelineJobException(e);
168+
}
169+
170+
output.addSequenceOutput(copiedFile, FileUtil.getBaseName(inputBam) + ": HLA Typing", "specHLA Genotyping", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
171+
172+
return output;
173+
}
174+
175+
@Override
176+
public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam, File referenceFasta, File outDir) throws PipelineJobException
177+
{
178+
return null;
179+
}
180+
}

0 commit comments

Comments
 (0)