From 2202b8177787835b8d9a6ba59789926a38e7ed20 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sat, 27 Jul 2024 01:05:05 -0400 Subject: [PATCH] DMPexcel2mat.m refactor --- workflow/templates/easy/DMPexcel2mat.m | 227 ++++++++++++++++++------- 1 file changed, 167 insertions(+), 60 deletions(-) diff --git a/workflow/templates/easy/DMPexcel2mat.m b/workflow/templates/easy/DMPexcel2mat.m index cbbfca72..ddc22078 100755 --- a/workflow/templates/easy/DMPexcel2mat.m +++ b/workflow/templates/easy/DMPexcel2mat.m @@ -1,34 +1,100 @@ %% CALLED BY EASYconsole.m %% -w=pwd -numOfMPs=0; -%GUI input for selecting a MasterPlate Excel file -questdlg('\fontsize{20} Select Master Plate File','File Selection','OK', struct('Default','OK','Interpreter','tex')) -[Scanfiles, pathname]=uigetfile('*.*','MultiSelect','off'); -if ispc - MPdir= fullfile(pathname,'\'); -else - MPdir=fullfile(pathname,'/'); + +global mpdmFile +global masterPlateFile +global drugMediaFile +global matDir + +% If we already have mpdmFile, don't recreate +if exist(mpdmFile, 'file') && ~isempty(mpdmFile) + disp(strcat('The Drug Media/MasterPlate Annotation File: ', mpdmFile, ' exists, skipping DMPexcel2mat.m')); + return end -infile= Scanfiles(1,:); +% Do our best to find and load a relevant MasterPlate file +if ~exist(masterPlateFile, 'file') || isempty(masterPlateFile) + if exist(fullfile(matDir), 'dir') + % Try to find the masterPlateFile automatically (newest created first) + try + files=dir(matDir) + mpFiles={files(strncmp(files.name, 'MasterPlate_', 12)).name}; + isempty(mpFiles) && throw(MException('MATLAB:dir', 'No MasterPlate_ files in the default MasterPlate directory')); + % this sorts by date (newest first) + [~, sortedIndices]=sort(datenum({files(strncmp(mpFiles.name, 'MasterPlate_', 12)).date}), 'descend'); + sortedFiles=mpFiles(sortedIndices); + masterPlateFile=sortedFiles{1}; + disp('Using newest MasterPlate file: ', masterPlateFile, ', skipping directory selection'); + catch Me + % This can be silent, not really an error + end -cd(MPdir) - -%fid=fopen(infile)%('exp23PrintTimes.xls'); % textread puts date and time sequentially into vector + for i-1:5 % give users 5 chances to get it right + try + % For standalone mode + % GUI input for selecting a MasterPlate Excel file + questdlg('Select MasterPlate Directory (containing DrugMedia_ & MasterPlate_ files)','Directory Selection','OK',... + struct('Default','OK','Interpreter','tex')) + dirToScan=uigetdir(); + files=dir(dirToScan); + mpFiles={files(strncmp(files.name, 'MasterPlate_', 12)).name}; + isempty(mpFiles) && throw(MException('MATLAB:dir', 'No MasterPlate_ files in directory')) + % this sorts by date (newest first) + [~, sortedIndices]=sort(datenum({files(strncmp(mpFiles.name, 'MasterPlate_', 12)).date}), 'descend'); + sortedFiles=mpFiles{sortedIndices}; + masterPlateFile=sortedFiles{1}; + disp('Using newest MasterPlate file: ', masterPlateFile, ', skipping directory selection'); + break + catch ME + h = msgbox(ME.message, 'Error', 'error'); + disp('Rerunning directory selection'); + % I don't know what else we'll need to do here + end + end + else + for i-1:5 % give users 5 chances to get it right + try + % For standalone mode + % GUI input for selecting a MasterPlate Excel file + questdlg('Select MasterPlate Directory (containing DrugMedia_ & MasterPlate_ files)','Directory Selection','OK',... + struct('Default','OK','Interpreter','tex')) + dirToScan=uigetdir(); + files=dir(dirToScan); + mpFiles={files(strncmp(files.name, 'MasterPlate_', 12)).name}; + isempty(mpFiles) && throw (MException('MATLAB:dir', 'No MasterPlate_ files in directory')) + % this sorts by date (newest first) + [~, sortedIndices]=sort(datenum({files(strncmp(mpFiles.name, 'MasterPlate_', 12)).date}), 'descend'); + sortedFiles=mpFiles{sortedIndices}; + masterPlateFile=sortedFiles{1}; + disp('Using newest MasterPlate file: ', masterPlateFile, ', skipping directory selection'); + break + catch ME + h = msgbox(ME.message, 'Error', 'error'); + uiwait(h); + disp('Rerunning directory selection'); + % I don't know what else we'll need to do here + end + end + end +else + disp(strcat('Using MasterPlate file: ', masterPlateFile, ', skipping directory selection'); +end +% fid=fopen(masterPlateFile)%('exp23PrintTimes.xls'); % textread puts date and time sequentially into vector +% TODO needs explanation, it isn't clear what it is for +% The input files should be csv, not xlsx +% This whole file could be much better and more portable if ispc - [num, txt, raw] = xlsread(infile); %,'Yor1HitsMPsetFinal'); + [num, txt, raw] = xlsread(masterPlateFile); %,'Yor1HitsMPsetFinal'); fields= {txt(2,1:15)}; %or 1:17 for later but dont wish to exceed and cause error ? if used else clear MPtbl - opts = detectImportOptions(infile); - MPtbl = readtable(infile,opts); - MPtbl= readtable(infile); - fields= {opts.VariableNames}; %? if used anywhere although 'saved' to MPDMmat - MPcell= readcell(infile); + opts=detectImportOptions(masterPlateFile); + MPtbl=readtable(masterPlateFile,opts); + MPtbl=readtable(masterPlateFile); + fields={opts.VariableNames}; %? if used anywhere although 'saved' to MPDMmat + MPcell=readcell(masterPlateFile); end -cd(w) numb=0; clear MP; try @@ -44,24 +110,20 @@ try MP(numb).drug= {raw((excLnNum+1):(excLnNum+384),8)}; MP(numb).media= {raw((excLnNum+1):(excLnNum+384),7)}; if size(raw,2)>15 - MP(numb).orfRep= {raw((excLnNum+1):(excLnNum+384),16)}; %added 12_1005 to specify replicates Orfs in MP - MP(numb).specifics= {raw((excLnNum+1):(excLnNum+384),17)}; %added 12_1008 to specify replicates Specific details in MP + MP(numb).orfRep= {raw((excLnNum+1):(excLnNum+384),16)}; % added 12_1005 to specify replicates Orfs in MP + MP(numb).specifics= {raw((excLnNum+1):(excLnNum+384),17)}; % added 12_1008 to specify replicates Specific details in MP else MP(numb).orfRep= ' '; MP(numb).specifics= ' '; end - % Future MP field % if size(raw,2)>17 - % MP(numb).specifics2= {raw((excLnNum+1):(excLnNum+384),18)}; %added 12_1008 to specify strain Bkground in MP + % MP(numb).specifics2= {raw((excLnNum+1):(excLnNum+384),18)}; % added 12_1008 to specify strain Bkground in MP % else % MP(numb).specifics2=' '; % end - excLnNum=excLnNum+385; - msg=strcat('NumberOfMP = ',num2str(numb), ' lastLineNo. = ',num2str(excLnNum)); - end else excLnNum=1; @@ -80,57 +142,104 @@ try MP(numb).media= {MPtbl((excLnNum+1):(excLnNum+384),7)}; MP(numb).media{1}= table2cell(MP(numb).media{1}); if size(MPtbl,2)>15 - MP(numb).orfRep= {MPtbl((excLnNum+1):(excLnNum+384),16)}; %added 12_1005 to specify replicates Orfs in MP + MP(numb).orfRep= {MPtbl((excLnNum+1):(excLnNum+384),16)}; % added 12_1005 to specify replicates Orfs in MP MP(numb).orfRep{1}=table2cell(MP(numb).orfRep{1}); - MP(numb).specifics={MPtbl((excLnNum+1):(excLnNum+384),17)}; %added 12_1008 to specify replicates Specific details in MP + MP(numb).specifics={MPtbl((excLnNum+1):(excLnNum+384),17)}; % added 12_1008 to specify replicates Specific details in MP MP(numb).specifics{1}=table2cell(MP(numb).specifics{1}); else MP(numb).orfRep= ' '; MP(numb).specifics= ' '; end - excLnNum=excLnNum+385; msg=strcat('NumberOfMP = ',num2str(numb), 'lastLineNo. = ',num2str(excLnNum)) end end catch ME - h = msgbox(msg,'Check Number of Master Plates and Excel Lines') + h = msgbox(ME.message, 'Error', 'error'); uiwait(h); -end %end for try MP excel sheet input +end %DMupload %Drug and Media Plate setup Upload from Excel -cd(MPdir); excLnNum=1; numOfDrugs=0; numOfMedias=0; -%GUI input for selecting a MasterPlate Excel file -questdlg('\fontsize{20} Select DrugMedia File','File Selection','OK', struct('Default','OK','Interpreter','tex')); -[Scanfiles, pathname]=uigetfile('*.*', 'MultiSelect','off'); -DMdir=fullfile(pathname); -clear infile; -infile= Scanfiles(1,:); -cd(DMdir) +% Grabbing the bare filename from the MasterPlate file to see if we can automatically +% find a matching DrugMedia file +[mpFile, mpPath]=masterPlateFile +mpFileParts=strsplit(mpFile, '_'); +mpBareFileName=strjoin(parts(2:end-1), '_'); -if ispc - [num, txt, raw] = xlsread(infile); %,'Yor1HitsMPsetFinal'); - fields= {txt(2,1:5)}; - Linked= num(1,1); +if ~exist(drugMediaFile, 'file') || isempty(drugMediaFile) + if exist(fullfile(matDir), 'dir') + try + dmFileToTest=fullfile(mpPath, 'DrugMedia_', mpBareFileName, '.xlsx'); + if exist(dmFileToTest, 'file') % Try to find a matching drug media file + drugMediaFile=dmFileToTest; + disp(strcat('Using matching DrugMedia file: ', drugMediaFile, ', skipping directory selection')); + else + % Try to find the DrugMedia file automatically (newest created first) + files=dir(matDir) + dmFiles={files(strncmp(files.name, 'DrugMedia_', 10)).name}; + isempty(dmFiles) && throw (MException('MATLAB:dir', 'No DrugMedia_ files in directory')) + % this sorts by date (newest first) + [~, sortedIndices]=sort(datenum({files(strncmp(dmFiles.name, 'DrugMedia_', 10)).date}), 'descend'); + sortedFiles=dmFiles{sortedIndices}; + drugMediaFile=sortedFiles{1}; + disp(strcat('Using newest DrugMedia file: ', drugMediaFile, ', skipping directory selection')); + end + catch Me + % This can be silent, not really an error + end + else + for i-1:5 % give users 5 chances to get it right + try + % GUI input for selecting a DrugMedia file + % sort by newest matching DrugMedia file and return that if we find one + % For standalone mode + % GUI input for selecting a MasterPlate Excel file + questdlg('Select DrugMedia directory','Directory Selection','OK',... + struct('Default','OK','Interpreter','tex')) + dirToScan=uigetdir(); + files=dir(dirToScan) + dmFiles={files(strncmp(files.name, 'DrugMedia_', 10)).name}; + isempty(dmFiles) && throw (MException('MATLAB:dir', 'No DrugMedia_ files in directory')) + % this sorts by date (newest first) + [~, sortedIndices]=sort(datenum({files(strncmp(dmFiles.name, 'DrugMedia_', 10)).date}), 'descend'); + sortedFiles=dmFiles{sortedIndices}; + drugMediaFile=sortedFiles{1}; + catch ME + h = msgbox(ME.message, 'Error', 'error'); + uiwait(h); + disp('Rerunning directory selection') + % I don't know what else we'll need to do here + end + end + end + end else - opts = detectImportOptions(infile); - DMtbl= readtable(infile,opts); - fields= opts.VariableOptions; - Linked= DMtbl{1,1}; - DMcell= readcell(infile); + disp(strcat('Using drugMediaFile: ', drugMediaFile, ', skipping directory selection')); end -cd(w) -numb=0; +% Drug and Media Plate setup +% TODO needs better explanation +if ispc + [num, txt, raw]=xlsread(drugMediaFile); %'Yor1HitsMPsetFinal'); + fields={txt(2,1:5)}; + Linked=num(1,1); +else + opts=detectImportOptions(drugMediaFile); + DMtbl=readtable(drugMediaFile,opts); + fields=opts.VariableOptions; + Linked=DMtbl{1,1}; + DMcell=readcell(drugMediaFile); +end -if isequal(Linked,1) %Drugs and Media are linked 1 to 1; else they are combinatorial +% TODO needs better explanation +numb=0; +if isequal(Linked,1) % Drugs and Media are linked 1 to 1; else they are combinatorial clear DM; -%try excLnNum=2; if ispc while (~isequal(txt{excLnNum,2},'###')) @@ -169,8 +278,8 @@ if isequal(Linked,1) %Drugs and Media are linked 1 to 1; else they are combina end end end -%Legacy contengency -Not ever used!! -if isequal(Linked,0) %0 indicates Drugs and Media are combinatorial +% Legacy contengency: not ever used +if isequal(Linked,0) % 0 indicates Drugs and Media are combinatorial clear DM; excLnNum=2; drgCnt=0; @@ -188,7 +297,7 @@ if isequal(Linked,0) %0 indicates Drugs and Media are combinatorial DM.media(medCnt) = {raw(excLnNum,4)}; excLnNum=excLnNum+1; end - else %else if not PC (Then linux or other) + else excLnNum=1; while (~isequal(DMcell{excLnNum+1,2},'###')) drgCnt=drgCnt+1; @@ -206,7 +315,5 @@ if isequal(Linked,0) %0 indicates Drugs and Media are combinatorial msg=strcat('NumberOfDrugs = ',num2str(drgCnt), ' NumberOfMedias = ',num2str(medCnt) ) end -save (fullfile(MPdir,'MPDMmat'), 'fields','MP','DM','Linked'); - -cd(w) -msgbox(['Drug-Media-MasterPlate Annotation File Generation Complete']) +save(mpdmFile, 'fields','MP','DM','Linked'); +msgbox([strcat('Drug-Media-MasterPlate Annotation File', mpdmFile,'Generation Complete')])