Skip to content

Commit 7167f3e

Browse files
authored
Merge discvr-26.7 to develop (#430)
2 parents 6550d7c + 23e3f18 commit 7167f3e

20 files changed

Lines changed: 382 additions & 176 deletions

File tree

.github/workflows/branch_release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919

2020
steps:
2121
- name: Checkout
22-
uses: actions/checkout@v3
22+
uses: actions/checkout@v7
2323

2424
- name: Create branches and PRs
2525
uses: LabKey/gitHubActions/branch-release@develop

.github/workflows/build.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,13 @@ jobs:
5656
LK_VERSION=$(cat /home/runner/work/_temp/_github_home/lkDist/release.txt | sed 's/-SNAPSHOT//g')
5757
cd /home/runner/work/_temp/_github_home/labkey_build/${LK_VERSION}/server/server/modules/DiscvrLabKeyModules
5858
COUNT=$(gh release list | grep 'latest' | wc -l)
59-
if [ $COUNT != '0' ];then gh release delete 'latest' --cleanup-tag -y; fi
59+
if [ $COUNT != '0' ];then
60+
echo 'Deleting existing release'
61+
gh release delete 'latest' --cleanup-tag -y;
62+
fi
6063
61-
git push -f origin "latest"
64+
git push -f origin 'latest'
65+
sleep 100 # gh release periodically fails due to a missing tag.
6266
6367
gh release create 'latest' --verify-tag --generate-notes --prerelease --title "Development Build: ${{ env.DEFAULT_BRANCH }}"
6468
gh release upload 'latest' /home/runner/work/_temp/_github_home/lkDist/discvr/DISCVR-*

.github/workflows/merge_release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121

2222
steps:
2323
- name: Checkout
24-
uses: actions/checkout@v3
24+
uses: actions/checkout@v7
2525

2626
- name: Merge PR
2727
uses: LabKey/gitHubActions/merge-release@develop

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import org.labkey.api.data.CompareType;
99
import org.labkey.api.data.Container;
1010
import org.labkey.api.data.ContainerManager;
11+
import org.labkey.api.data.DbSchema;
12+
import org.labkey.api.data.DbSchemaType;
1113
import org.labkey.api.data.SimpleFilter;
1214
import org.labkey.api.data.Sort;
1315
import org.labkey.api.data.TableInfo;
@@ -16,7 +18,9 @@
1618
import org.labkey.api.exp.api.ExpRun;
1719
import org.labkey.api.exp.api.ExperimentService;
1820
import org.labkey.api.ldk.LDKService;
21+
import org.labkey.api.pipeline.CancelledException;
1922
import org.labkey.api.pipeline.PipeRoot;
23+
import org.labkey.api.pipeline.PipelineJob;
2024
import org.labkey.api.pipeline.PipelineJobException;
2125
import org.labkey.api.pipeline.PipelineService;
2226
import org.labkey.api.pipeline.PipelineStatusFile;
@@ -57,6 +61,9 @@
5761
*/
5862
public class SequenceAnalysisMaintenanceTask implements MaintenanceTask
5963
{
64+
private static final String SYSTEM_MAINTENANCE_DESCRIPTION = "System Maintenance";
65+
private static final String JOB_TABLE = "statusfiles";
66+
6067
public SequenceAnalysisMaintenanceTask()
6168
{
6269

@@ -74,6 +81,31 @@ public String getName()
7481
return "DeleteSequenceAnalysisArtifacts";
7582
}
7683

84+
// NOTE: if there is a more direct way to locate the JobID this hack should be replaced
85+
private void checkJobCancelled(Logger log)
86+
{
87+
// Make the assumption there is only one active maintenance job at a time:
88+
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("description"), SYSTEM_MAINTENANCE_DESCRIPTION).
89+
addCondition(FieldKey.fromString("container"), ContainerManager.getRoot().getId()).
90+
addCondition(FieldKey.fromString("modified"), new Date(), CompareType.DATE_EQUAL);
91+
int rowId = new TableSelector(DbSchema.get("pipeline", DbSchemaType.Module).getTable(JOB_TABLE), PageFlowUtil.set("RowId", "Status"), filter, null).getMapCollection().stream().filter(map -> {
92+
String val = String.valueOf(map.get("status"));
93+
return val != null && (val.toLowerCase().startsWith(PipelineJob.TaskStatus.cancelling.name()) || val.toLowerCase().startsWith(PipelineJob.TaskStatus.running.name()));
94+
}).map(rs -> Integer.parseInt(String.valueOf(rs.get("rowid")))).max(Integer::compareTo).orElse(-1);
95+
96+
if (rowId == -1)
97+
{
98+
log.warn("Unable to find rowId for job", new Exception("Unable to find rowId for job"));
99+
return;
100+
}
101+
102+
PipelineStatusFile sf = PipelineService.get().getStatusFile(rowId);
103+
if (PipelineJob.TaskStatus.cancelling.name().equalsIgnoreCase(sf.getStatus()))
104+
{
105+
throw new CancelledException();
106+
}
107+
}
108+
77109
@Override
78110
public void run(Logger log)
79111
{
@@ -158,6 +190,7 @@ private void verifySequenceDataPresent(Logger log)
158190
if (i % 1000 == 0)
159191
{
160192
log.info("readdata " + i + " of " + readDatas.size() + ". Current container: " + ContainerManager.getForId(rd.getContainer()).getPath());
193+
checkJobCancelled(log);
161194
}
162195

163196
if (rd.getFileId1() != null)
@@ -221,6 +254,7 @@ else if (!d.getFile().exists())
221254
if (i % 1000 == 0)
222255
{
223256
log.info("analysis " + i + " of " + analyses.size() + ". Current container: " + ContainerManager.getForId(m.getContainer()).getPath());
257+
checkJobCancelled(log);
224258
}
225259

226260
if (m.getAlignmentFile() != null)
@@ -296,7 +330,11 @@ else if (sf.getFilePath() == null)
296330
private void processContainer(Container c, Logger log) throws IOException, PipelineJobException
297331
{
298332
if (!c.isWorkbook())
333+
{
299334
log.info("processing container: " + c.getPath());
335+
}
336+
337+
checkJobCancelled(log);
300338

301339
PipeRoot root = PipelineService.get().getPipelineRootSetting(c);
302340
if (root != null && !root.isCloudRoot())

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@
152152
import org.labkey.sequenceanalysis.run.preprocessing.FastqcProcessingStep;
153153
import org.labkey.sequenceanalysis.run.preprocessing.FilterReadsStep;
154154
import org.labkey.sequenceanalysis.run.preprocessing.FlashPipelineStep;
155+
import org.labkey.sequenceanalysis.run.preprocessing.Kraken2Step;
155156
import org.labkey.sequenceanalysis.run.preprocessing.PrintReadsContainingStep;
156157
import org.labkey.sequenceanalysis.run.preprocessing.TagPcrSummaryStep;
157158
import org.labkey.sequenceanalysis.run.preprocessing.TrimmomaticWrapper;
@@ -291,7 +292,7 @@ public static void registerPipelineSteps()
291292
SequencePipelineService.get().registerPipelineStep(new CutadaptWrapper.Provider());
292293
SequencePipelineService.get().registerPipelineStep(new FastqcProcessingStep.Provider());
293294
SequencePipelineService.get().registerPipelineStep(new CutadaptCropWrapper.Provider());
294-
//SequencePipelineService.get().registerPipelineStep(new BlastFilterPipelineStep.Provider());
295+
SequencePipelineService.get().registerPipelineStep(new Kraken2Step.Provider());
295296

296297
//ref library
297298
SequencePipelineService.get().registerPipelineStep(new DNAReferenceLibraryStep.Provider());
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
package org.labkey.sequenceanalysis.run.preprocessing;
2+
3+
import org.apache.logging.log4j.Logger;
4+
import org.jetbrains.annotations.Nullable;
5+
import org.json.JSONObject;
6+
import org.labkey.api.pipeline.PipelineJobException;
7+
import org.labkey.api.pipeline.PipelineJobService;
8+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
9+
import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider;
10+
import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam;
11+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
12+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
13+
import org.labkey.api.sequenceanalysis.pipeline.PreprocessingStep;
14+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
15+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
16+
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
17+
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
18+
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
19+
import org.labkey.api.util.Compress;
20+
import org.labkey.api.util.FileUtil;
21+
import org.labkey.api.util.Pair;
22+
23+
import java.io.File;
24+
import java.util.ArrayList;
25+
import java.util.Arrays;
26+
import java.util.List;
27+
28+
public class Kraken2Step extends AbstractCommandPipelineStep<Kraken2Step.Kraken2Wrapper> implements PreprocessingStep
29+
{
30+
private static final String DB_PARAM = "db";
31+
private static final String MODE_PARAM = "mode";
32+
33+
public Kraken2Step(PipelineStepProvider<?> provider, PipelineContext ctx)
34+
{
35+
super(provider, ctx, new Kraken2Wrapper(ctx.getLogger()));
36+
}
37+
38+
public static class Provider extends AbstractPipelineStepProvider<PreprocessingStep>
39+
{
40+
public Provider()
41+
{
42+
super("Kraken2", "Kraken2", "Kraken2", "This step aligns input reads against a reference using BWA-mem and will only return read pairs without a passing hit in either read.", Arrays.asList(
43+
ToolParameterDescriptor.create(DB_PARAM, "Database", "This determines the DB for positive or negative selection", "ldk-simplecombo", new JSONObject(){{
44+
put("storeValues", "kraken2_bv;kraken2_standard");
45+
put("multiSelect", false);
46+
put("allowBlank", false);
47+
put("joinReturnValue", true);
48+
put("delimiter", ";");
49+
}}, "kraken2_bv"),
50+
ToolParameterDescriptor.create(MODE_PARAM, "Reads To Retain", "This determines which set of reads is passed to the next step. If 'Retain Classified' is selected, then reads matching the DB are retained. if 'Retain Unclassified' is selected, then reads that do not match the DB are retained", "ldk-simplecombo", new JSONObject(){{
51+
put("storeValues", "Classified;Unclassified");
52+
put("multiSelect", false);
53+
put("allowBlank", false);
54+
put("joinReturnValue", true);
55+
put("delimiter", ";");
56+
}}, null),
57+
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--memory-mapping"), "memoryMapping", "Memory Mapping", "If checked, the DB will not be read into memory, reducing RAM", "checkbox", null, false),
58+
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("--minimum-hit-groups"), "minimumHitGroups", "Minimum Hit Groups", "Minimum number of hit groups (overlapping k-mers sharing the same minimizer) needed to make a call", "ldk-integerfield", new JSONObject(){{
59+
put("minValue", 0);
60+
}}, 2),
61+
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("--confidence"), "confidence", "Confidence", "Confidence score threshold (0-1)", "ldk-numberfield", new JSONObject(){{
62+
put("minValue", 0);
63+
put("maxValue", 1);
64+
put("decimalPrecision", 2);
65+
}}, 0)
66+
), null, "https://github.com/DerrickWood/kraken2");
67+
}
68+
69+
@Override
70+
public Kraken2Step create(PipelineContext context)
71+
{
72+
return new Kraken2Step(this, context);
73+
}
74+
}
75+
76+
@Override
77+
public Output processInputFile(File inputFile, @Nullable File inputFile2, File outputDir) throws PipelineJobException
78+
{
79+
PreprocessingOutputImpl output = new PreprocessingOutputImpl(inputFile, inputFile2);
80+
81+
List<String> args = new ArrayList<>();
82+
args.add(getWrapper().getExe().getPath());
83+
84+
if (inputFile2 != null)
85+
{
86+
args.add("--paired");
87+
}
88+
89+
if (inputFile.getName().toLowerCase().endsWith(".gz"))
90+
{
91+
args.add("--gzip-compressed");
92+
}
93+
94+
Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
95+
if (threads != null)
96+
{
97+
args.add("--threads");
98+
args.add(threads.toString());
99+
}
100+
101+
String dbName = getProvider().getParameterByName(DB_PARAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
102+
if (dbName == null)
103+
{
104+
throw new PipelineJobException("Missing DB name");
105+
}
106+
107+
File binDir = FileUtil.appendName(new File(PipelineJobService.get().getAppProperties().getToolsDirectory()), "kraken2_dbs");
108+
if (!binDir.exists())
109+
{
110+
throw new PipelineJobException("Unable to find kraken2 DB dir, expected: " + binDir.getAbsolutePath());
111+
}
112+
113+
File dbDir = FileUtil.appendName(binDir, dbName);
114+
if (!dbDir.exists())
115+
{
116+
throw new PipelineJobException("Unable to find kraken2 DB dir, expected: " + dbDir.getAbsolutePath());
117+
}
118+
119+
args.add("--use-names");
120+
121+
args.add("--db");
122+
args.add(dbDir.getAbsolutePath());
123+
124+
args.addAll(getClientCommandArgs());
125+
126+
args.add("--output");
127+
args.add("-");
128+
129+
String mode = getProvider().getParameterByName(MODE_PARAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
130+
131+
File classifiedOutputBase = FileUtil.appendName(outputDir, SequenceAnalysisService.get().getUnzippedBaseName(inputFile.getName()) + ".classified");
132+
File unclassifiedOutputBase = FileUtil.appendName(outputDir, SequenceAnalysisService.get().getUnzippedBaseName(inputFile.getName()) + ".unclassified");
133+
if ("Classified".equals(mode))
134+
{
135+
args.add("--classified-out");
136+
args.add(classifiedOutputBase.getPath() + "#.fq");
137+
}
138+
else
139+
{
140+
args.add("--unclassified-out");
141+
args.add(unclassifiedOutputBase.getPath() + "#.fq");
142+
}
143+
144+
File reportFile = FileUtil.appendName(outputDir, SequencePipelineService.get().getUnzippedBaseName(inputFile.getName()) + ".kraken2.report.txt");
145+
args.add("--report");
146+
args.add(reportFile.getPath());
147+
148+
args.add(inputFile.getPath());
149+
if (inputFile2 != null)
150+
{
151+
args.add(inputFile2.getPath());
152+
}
153+
154+
getWrapper().execute(args);
155+
156+
if ("Classified".equals(mode))
157+
{
158+
File classified1 = new File(classifiedOutputBase.getPath() + "_1.fq");
159+
File classified2 = inputFile2 == null ? null : new File(classifiedOutputBase.getPath() + "_2.fq");
160+
if (!classified1.exists())
161+
{
162+
throw new PipelineJobException("Classified file does not exist: " + classified1.getAbsolutePath());
163+
}
164+
165+
File compressed1 = Compress.compressGzip(classified1);
166+
output.addIntermediateFile(classified1);
167+
168+
File compressed2 = classified2 == null ? null : Compress.compressGzip(classified2);
169+
if (classified2 != null)
170+
{
171+
output.addIntermediateFile(classified2);
172+
}
173+
174+
output.setProcessedFastq(Pair.of(compressed1, compressed2));
175+
}
176+
else
177+
{
178+
File unclassified1 = new File(unclassifiedOutputBase.getPath() + "_1.fq");
179+
File unclassified2 = inputFile2 == null ? null : new File(unclassifiedOutputBase.getPath() + "_2.fq");
180+
if (!unclassified1.exists())
181+
{
182+
throw new PipelineJobException("Unclassified file does not exist: " + unclassified1.getAbsolutePath());
183+
}
184+
185+
File compressed1 = Compress.compressGzip(unclassified1);
186+
output.addIntermediateFile(unclassified1);
187+
188+
File compressed2 = unclassified2 == null ? null : Compress.compressGzip(unclassified2);
189+
if (unclassified2 != null)
190+
{
191+
output.addIntermediateFile(unclassified2);
192+
}
193+
194+
output.setProcessedFastq(Pair.of(compressed1, compressed2));
195+
}
196+
197+
return output;
198+
}
199+
200+
public static class Kraken2Wrapper extends AbstractCommandWrapper
201+
{
202+
public Kraken2Wrapper(Logger log)
203+
{
204+
super(log);
205+
}
206+
207+
public File getExe()
208+
{
209+
return SimpleScriptWrapper.resolveFileInPath("kraken2", null, true);
210+
}
211+
}
212+
}

SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,7 @@ private void analysisPanelTest() throws Exception
611611
waitForElementToDisappear(Ext4Helper.Locators.window("Add Steps"));
612612

613613
Map<String, Ext4CmpRef> fieldsetMap = new HashMap<>();
614-
String[] setNames = {"Adapter Trimming (Trimmomatic)", "Average Quality Filter", "Crop Reads", "Downsample Reads", "Filter Reads Matching Reference", "Head Crop", "Quality Trimming (Adaptive)", "Quality Trimming (Sliding Window)", "Read Length Filter"};
614+
String[] setNames = {"Adapter Trimming (Trimmomatic)", "Average Quality Filter", "Crop Reads", "Downsample Reads", "Filter Reads Matching Reference", "Head Crop", "Quality Trimming (Adaptive)", "Quality Trimming (Sliding Window)", "Read Length Filter", "Kraken2"};
615615
isPresentInThisOrder(setNames);
616616

617617
for (String name : setNames)
@@ -628,6 +628,9 @@ private void analysisPanelTest() throws Exception
628628
waitAndClick(Locator.id(fieldsetMap.get("Head Crop").down("ldk-linkbutton[text='Remove']", Ext4CmpRef.class).getId()).append(Locator.tag("a")));
629629
waitForElementToDisappear(Locator.id(fieldsetMap.get("Head Crop").getId()));
630630

631+
waitAndClick(Locator.id(fieldsetMap.get("Kraken2").down("ldk-linkbutton[text='Remove']", Ext4CmpRef.class).getId()).append(Locator.tag("a")));
632+
waitForElementToDisappear(Locator.id(fieldsetMap.get("Kraken2").getId()));
633+
631634
Integer overlapLength = 6;
632635
Double errorRate = 0.2;
633636
Integer cropLength = 500;

0 commit comments

Comments
 (0)