Commit earlier refactoring

This commit is contained in:
2024-07-29 11:44:45 -04:00
parent 29cbce0754
commit 527068e683
294 changed files with 5524008 additions and 0 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -0,0 +1,329 @@
%% CALLED BY EASYconsole.m %%
global mpdmFile
global masterPlateFile
global drugMediaFile
global matDir
% If we already have mpdmFile, don't recreate
if (exist(mpdmFile, 'file') && ~isempty(mpdmFile))
fprintf('The Drug Media/MasterPlate Annotation File: %s exists, skipping DMPexcel2mat.m\n', mpdmFile);
return
end
% Do our best to find and load a relevant MasterPlate file
if ~exist(masterPlateFile, 'file') || isempty(masterPlateFile)
if exist(fullfile(matDir), 'dir')
% Try to find the masterPlateFile automatically (newest created first)
try
files=dir(matDir);
mpFiles={files(strncmp(files.name, 'MasterPlate_', 12)).name};
if isempty(mpFiles)
throw(MException('MATLAB:dir', 'No MasterPlate_ files in the default MasterPlate directory'));
end
% this sorts by date (newest first)
[~, sortedIndices]=sort(datenum({files(strncmp(mpFiles.name, 'MasterPlate_', 12)).date}), 'descend');
sortedFiles=mpFiles(sortedIndices);
masterPlateFile=sortedFiles{1};
disp('Using newest MasterPlate file: ', masterPlateFile, ', skipping directory selection');
catch ME
% This can be silent, not really an error
end
for i=1:5 % give users 5 chances to get it right
try
% For standalone mode
% GUI input for selecting a MasterPlate Excel file
questdlg('Select MasterPlate Directory (containing DrugMedia_ & MasterPlate_ files)','Directory Selection','OK',...
struct('Default','OK','Interpreter','tex'))
dirToScan=uigetdir();
files=dir(dirToScan);
mpFiles={files(strncmp(files.name, 'MasterPlate_', 12)).name};
if isempty(mpFiles)
throw(MException('MATLAB:dir', 'No MasterPlate_ files in directory'))
end
% this sorts by date (newest first)
[~, sortedIndices]=sort(datenum({files(strncmp(mpFiles.name, 'MasterPlate_', 12)).date}), 'descend');
sortedFiles=mpFiles{sortedIndices};
masterPlateFile=sortedFiles{1};
disp('Using newest MasterPlate file: ', masterPlateFile, ', skipping directory selection');
break
catch ME
h=msgbox(ME.message, 'Error', 'error');
disp('Rerunning directory selection');
% I don't know what else we'll need to do here
end
end
else
for i=1:5 % give users 5 chances to get it right
try
% For standalone mode
% GUI input for selecting a MasterPlate Excel file
questdlg('Select MasterPlate Directory (containing DrugMedia_ & MasterPlate_ files)','Directory Selection','OK',...
struct('Default','OK','Interpreter','tex'))
dirToScan=uigetdir();
files=dir(dirToScan);
mpFiles={files(strncmp(files.name, 'MasterPlate_', 12)).name};
if isempty(mpFiles)
throw (MException('MATLAB:dir', 'No MasterPlate_ files in directory'));
end
% this sorts by date (newest first)
[~, sortedIndices]=sort(datenum({files(strncmp(mpFiles.name, 'MasterPlate_', 12)).date}), 'descend');
sortedFiles=mpFiles{sortedIndices};
masterPlateFile=sortedFiles{1};
disp('Using newest MasterPlate file: ', masterPlateFile, ', skipping directory selection');
break
catch ME
h=msgbox(ME.message, 'Error', 'error');
uiwait(h);
disp('Rerunning directory selection');
% I don't know what else we'll need to do here
end
end
end
else
fprintf('Using MasterPlate file: %s skipping directory selection\n', masterPlateFile);
end
% fid=fopen(masterPlateFile)%('exp23PrintTimes.xls'); % textread puts date and time sequentially into vector
% TODO needs explanation, it isn't clear what it is for
% The input files should be csv, not xlsx
% This whole file could be much better and more portable
if ispc
[num, txt, raw]=xlsread(masterPlateFile); %,'Yor1HitsMPsetFinal');
fields={txt(2,1:15)}; %or 1:17 for later but dont wish to exceed and cause error ? if used
else
clear MPtbl
opts=detectImportOptions(masterPlateFile);
MPtbl=readtable(masterPlateFile,opts);
MPtbl=readtable(masterPlateFile);
fields={opts.VariableNames}; %? if used anywhere although 'saved' to MPDMmat
MPcell=readcell(masterPlateFile);
end
numb=0;
clear MP;
try
if ispc
excLnNum=3;
while (isequal(txt{excLnNum,1},'###'))
numb=numb+1;
MP(numb).head={raw(excLnNum,2:6)};
MP(numb).recNum={raw((excLnNum+1):(excLnNum+384),1)};
MP(numb).orf={raw((excLnNum+1):(excLnNum+384),2)};
MP(numb).strain={raw((excLnNum+1):(excLnNum+384),3)};
MP(numb).genename={raw((excLnNum+1):(excLnNum+384),12)};
MP(numb).drug={raw((excLnNum+1):(excLnNum+384),8)};
MP(numb).media={raw((excLnNum+1):(excLnNum+384),7)};
if size(raw,2)>15
MP(numb).orfRep={raw((excLnNum+1):(excLnNum+384),16)}; % added 12_1005 to specify replicates Orfs in MP
MP(numb).specifics={raw((excLnNum+1):(excLnNum+384),17)}; % added 12_1008 to specify replicates Specific details in MP
else
MP(numb).orfRep=' ';
MP(numb).specifics= ' ';
end
% Future MP field
% if size(raw,2)>17
% MP(numb).specifics2= {raw((excLnNum+1):(excLnNum+384),18)}; % added 12_1008 to specify strain Bkground in MP
% else
% MP(numb).specifics2=' ';
% end
excLnNum=excLnNum+385;
msg=strcat('NumberOfMP = ',num2str(numb), ' lastLineNo. = ',num2str(excLnNum));
end
else
excLnNum=1;
while (isequal(MPcell{(excLnNum+2),1},'###'))
numb=numb+1;
MP(numb).head={MPtbl(excLnNum,2:6)};
MP(numb).head{1}=table2cell(MP(numb).head{1});
MP(numb).recNum={MPtbl((excLnNum+1):(excLnNum+384),1)};
MP(numb).recNum{1}=table2cell(MP(numb).recNum{1});
MP(numb).orf={MPtbl((excLnNum+1):(excLnNum+384),2)};
MP(numb).orf{1}=table2cell(MP(numb).orf{1});
MP(numb).strain={MPtbl((excLnNum+1):(excLnNum+384),3)};
MP(numb).strain{1}=table2cell(MP(numb).strain{1});
MP(numb).genename={MPtbl((excLnNum+1):(excLnNum+384),12)};
MP(numb).genename{1}= table2cell(MP(numb).genename{1});
MP(numb).media= {MPtbl((excLnNum+1):(excLnNum+384),7)};
MP(numb).media{1}= table2cell(MP(numb).media{1});
if size(MPtbl,2)>15
MP(numb).orfRep= {MPtbl((excLnNum+1):(excLnNum+384),16)}; % added 12_1005 to specify replicates Orfs in MP
MP(numb).orfRep{1}=table2cell(MP(numb).orfRep{1});
MP(numb).specifics={MPtbl((excLnNum+1):(excLnNum+384),17)}; % added 12_1008 to specify replicates Specific details in MP
MP(numb).specifics{1}=table2cell(MP(numb).specifics{1});
else
MP(numb).orfRep= ' ';
MP(numb).specifics= ' ';
end
excLnNum=excLnNum+385;
msg=strcat('NumberOfMP = ',num2str(numb), 'lastLineNo. = ',num2str(excLnNum));
end
end
catch ME
h=msgbox(ME.message, 'Error', 'error');
uiwait(h);
end
%DMupload
%Drug and Media Plate setup Upload from Excel
excLnNum=1;
numOfDrugs=0;
numOfMedias=0;
% Grabbing the bare filename from the MasterPlate file to see if we can automatically
% find a matching DrugMedia file
[mpFile, mpPath]=fullfile(masterPlateFile);
mpFileParts=strsplit(mpFile, '_');
mpBareFileName=strjoin(mpFileParts(2:end-1), '_');
if ~exist(drugMediaFile, 'file') || isempty(drugMediaFile)
if exist(fullfile(matDir), 'dir')
try
dmFileToTest=fullfile(mpPath, 'DrugMedia_', mpBareFileName, '.xlsx');
if exist(dmFileToTest, 'file') % Try to find a matching drug media file
drugMediaFile=dmFileToTest;
fprintf('Using matching DrugMedia file: %s, skipping directory selection\n', drugMediaFile);
else
% Try to find the DrugMedia file automatically (newest created first)
files=dir(matDir);
dmFiles={files(strncmp(files.name, 'DrugMedia_', 10)).name};
if isempty(dmFiles)
throw (MException('MATLAB:dir', 'No DrugMedia_ files in directory'));
end
% this sorts by date (newest first)
[~, sortedIndices]=sort(datenum({files(strncmp(dmFiles.name, 'DrugMedia_', 10)).date}), 'descend');
sortedFiles=dmFiles{sortedIndices};
drugMediaFile=sortedFiles{1};
fprintf('Using newest DrugMedia file: %s, skipping directory selection\n', drugMediaFile);
end
catch Me
% This can be silent, not really an error
end
else
for i=1:5 % give users 5 chances to get it right
try
% GUI input for selecting a DrugMedia file
% sort by newest matching DrugMedia file and return that if we find one
% For standalone mode
% GUI input for selecting a MasterPlate Excel file
questdlg('Select DrugMedia directory','Directory Selection','OK',...
struct('Default','OK','Interpreter','tex'))
dirToScan=uigetdir();
files=dir(dirToScan);
dmFiles={files(strncmp(files.name, 'DrugMedia_', 10)).name};
if isempty(dmFiles)
throw (MException('MATLAB:dir', 'No DrugMedia_ files in directory'));
end
% this sorts by date (newest first)
[~, sortedIndices]=sort(datenum({files(strncmp(dmFiles.name, 'DrugMedia_', 10)).date}), 'descend');
sortedFiles=dmFiles{sortedIndices};
drugMediaFile=sortedFiles{1};
catch ME
h=msgbox(ME.message, 'Error', 'error');
uiwait(h);
disp('Rerunning directory selection');
% I don't know what else we'll need to do here
end
end
end
else
fprintf('Using drugMediaFile: %s, skipping directory selection\n', drugMediaFile);
end
% Drug and Media Plate setup
% TODO needs better explanation
if ispc
[num, txt, raw]=xlsread(drugMediaFile); %'Yor1HitsMPsetFinal');
fields={txt(2,1:5)};
Linked=num(1,1);
else
opts=detectImportOptions(drugMediaFile);
DMtbl=readtable(drugMediaFile,opts);
fields=opts.VariableOptions;
Linked=DMtbl{1,1};
DMcell=readcell(drugMediaFile);
end
% TODO needs better explanation
numb=0;
if isequal(Linked,1) % Drugs and Media are linked 1 to 1; else they are combinatorial
clear DM;
excLnNum=2;
if ispc
while (~isequal(txt{excLnNum,2},'###'))
numb=numb+1;
DM.drug(numb)={raw(excLnNum,2)};
DM.conc(numb)={raw(excLnNum,3)};
DM.media(numb)={raw(excLnNum,4)};
DM.mod1(numb)={raw(excLnNum,5)};
DM.conc1(numb)={raw(excLnNum,6)};
DM.mod2(numb)={raw(excLnNum,7)};
DM.conc2(numb)={raw(excLnNum,8)};
excLnNum=excLnNum+1;
msg=strcat('NumberOf1:1DrugMediaPlates = ',num2str(numb), ' lastLineNo. = ',num2str(excLnNum));
end
else
clear DM
excLnNum=1;
while (~isequal(DMcell{excLnNum+1,2},'###'))
numb=numb+1;
DM.drug(numb)={DMtbl(excLnNum,2)};
DM.drug(numb)=table2cell(DM.drug{numb});
DM.conc(numb)={DMtbl(excLnNum,3)};
DM.conc(numb)=table2cell(DM.conc{numb});
DM.media(numb)={DMtbl(excLnNum,4)};
DM.media(numb)=table2cell(DM.media{numb});
DM.mod1(numb)={DMtbl(excLnNum,5)};
DM.mod1(numb)=table2cell(DM.mod1{numb});
DM.conc1(numb)={DMtbl(excLnNum,6)};
DM.conc1(numb)=table2cell(DM.conc1{numb});
DM.mod2(numb)={DMtbl(excLnNum,7)};
DM.mod2(numb)=table2cell(DM.mod2{numb});
DM.conc2(numb)={DMtbl(excLnNum,8)};
DM.conc2(numb)=table2cell(DM.conc2{numb});
excLnNum=excLnNum+1;
msg=strcat('NumberOf1:1DrugMediaPlates = ',num2str(numb), ' lastLineNo. = ',num2str(excLnNum));
end
end
end
% Legacy contengency: not ever used
if isequal(Linked,0) % 0 indicates Drugs and Media are combinatorial
clear DM;
excLnNum=2;
drgCnt=0;
medCnt=0;
if ispc
while (~isequal(txt{excLnNum,2},'###'))
drgCnt=drgCnt+1;
DM.drug(drgCnt)={raw(excLnNum,2)};
DM.conc(drgCnt)={raw(excLnNum,3)};
excLnNum=excLnNum+1;
end
while (~isequal(txt{excLnNum,4},'###'))
medCnt=medCnt+1;
DM.media(medCnt)={raw(excLnNum,4)};
excLnNum=excLnNum+1;
end
else
excLnNum=1;
while (~isequal(DMcell{excLnNum+1,2},'###'))
drgCnt=drgCnt+1;
DM.drug(drgCnt)={DMtbl(excLnNum,2)};
DM.conc(drgCnt)={DMtbl(excLnNum,3)};
excLnNum=excLnNum+1;
end
while (~isequal(DMcel{excLnNum+1,4},'###'))
medCnt=medCnt+1;
DM.media(medCnt)={DMtbl(excLnNum,4)};
excLnNum=excLnNum+1;
end
end
msg=strcat('NumberOfDrugs = ',num2str(drgCnt), ' NumberOfMedias = ',num2str(medCnt) );
end
save(mpdmFile, 'fields','MP','DM','Linked');
msgbox(sprintf('Drug-Media-MasterPlate Annotation File %s Generation Complete', mpdmFile))

View File

@@ -0,0 +1,668 @@
%% CALLED BY EASYconsole.m %%
% Updated 240724 Bryan C Roessler to improve file operations and portability
%
global scansDir
global matFile
global defImParMat
global printResultsDir
global fotosResultsDir
global pointMapsFile
global pointMapsResultsDir
ln=1;
% Version compatability fixes
if verLessThan('matlab','8.3')
fd4=diff(sym('K / (1 + exp(-r* (t - l )))'),4);
sols=solve(fd4);
else % accomodate new matlab changes after 2014a
syms t K r l;
fd4=diff(K / (1 + exp(-r* (t - l ))),t,4);
sols=solve(fd4);
tmpswap=sols(1);
sols(1)=sols(3);
sols(3)=tmpswap;
end
% MPnum=ImParMat(1);
% opt=questdlg('Print Results Only (RES), DB Only (DB), or Both','Results Printout Options','Res','DB','Both','Both');
opt='Res';
if ~exist('defImParMat','var') || isempty(defImParMat) % TODO needs better explanation
load(pointMapsFile);
end
destPerMP=ImParMat(2); % TODO this is weird, needs explanation
load(matFile);
load(mpdmFile)
numOfDrgs=length(DM.drug);
numOfMeds=length(DM.media);
destPerMP=numOfDrgs;
% TODO this should be rewritten or is uncessary, what is this for
% Determine the longest set of intensity(tPts) for the Experiment Data Set
maxNumIntens=0;
for n=1:size(scan,2)
for m=1:size(scan(n).plate,2)
maxNumIntens=max(maxNumIntens,size(scan(n).plate(m).intens,2));
end
end
% if length(scansDir) == max(strfind(scansDir,'\'))
% localscansDir=scansDir(1:end-1);
% else
% localscansDir=scansDir;
% end
% TODO this seems weird
expNm=scansDir(max(strfind(scansDir,'/'))+1:end);
drivePos=min(strfind(scansDir,'/'));
drive=scansDir(1:(drivePos-1));
DBupload=fullfile(drive,'EZdbFiles','DBupLOADfiles');
% Added to allow backward compatability
% Test for CFoutStd as indication of 2018version with r_refined fit code;If
% earlier version with only a standard composite fite, Print results to !!ResultsStd_...txt only
try
scan(1).plate(1).CFoutStd(1,1);
resultsFilename=fullfile(printResultsDir, strcat('!!ResultsELr_',expNm,'.txt'));
DBfilename=fullfile(printResultsDir, strcat('!!DbaseELr_',expNm,'.txt'));
catch
resultsFilename=fullfile(printResultsDir, strcat('!!ResultsStd_',expNm,'.txt'));
DBfilename=fullfile(printResultsDir, strcat('!!DbaseStd_',expNm,'.txt'));
end
if isequal(opt,'Res')||isequal(opt,'Both'),fid=fopen(resultsFilename,'w');end
if isequal(opt,'DB')||isequal(opt,'Both'),fid2=fopen(DBfilename,'w');end
if isequal(opt,'Res')||isequal(opt,'Both')
fprintf(fid,'%d\t',ln); % Results header
fprintf(fid,'%s\t\n',scansDir);
ln=ln+1;
fprintf(fid,'%d\t',ln);
end
mpCnt=0;
totPlCnt=0;
drgCnt=0;
medCnt=0;
try
load(fullfile(fotosResultsDir,'Nbdg')) % Convolute scan array data into plates
catch
load(fullfile(pointMapsResultsDir,'Nbdg')) % Convolute scan array data into plates
end
for s=1:size(scan,2)
% Convolute scan array data into plates DconB for DBcombo
clear Diag
try
Diag(:,:,:,1)=sbdg{s}(1:1:24,16:-1:1,:);
catch
sbdg{s};
end
for p=1:size((scan(s).plate),2)
totPlCnt=totPlCnt+1;
if destPerMP>1 && rem(totPlCnt,destPerMP)==1
mpCnt=mpCnt+1;
end
if destPerMP==1
mpCnt=mpCnt+1;
end
pertCnt=rem(totPlCnt,destPerMP);
if pertCnt==0
pertCnt=destPerMP;
end
pert=strcat('Perturb_',num2str(pertCnt));
s % TODO seems wrong
p % TODO seems wrong
clear outCmat
outCmat=scan(s).plate(p).CFout;
%Print Time Point HEADER for each plate for newly added intensity data
if isequal(opt,'Res')||isequal(opt,'Both')
fprintf(fid, '\n');
ln=ln+1;
fprintf(fid,'%d\t',ln);
fprintf(fid,'Scan\tPlate\tRow\tCol\t');
try
asd=cell2mat(scan(s).plate(1).CFparameters(1));
aucEndPt=strcat('AUC',num2str(asd(9)));
catch
asd=cell2mat(scan(s).plate(1).CFparameters{1,1}(1,384));
aucEndPt=strcat('AUC',num2str(asd(9)));
end
fprintf(fid, 'Num.\tDiagnostics\tDrug\tConc\tMedia\tModifier1\tConc1\tModifier2\tConc2\tORF\tGene');
fprintf(fid, '\t %s',aucEndPt);
fprintf(fid, '\triseTm\tK\tr\tl\tR-squared\tK-lower\tK-upper\tr-lower\tr-upper\tl-lower\tl-upper\tArea\tLastInten\tSplineMaxRateTm\tLastFitTm\t1stFitTm\tMedianBase\tFitBase\tMinTm\tThreshTm');
if size(outCmat,2)==27
fprintf(fid, '\ttc11Cut\ttc12Cut\ttc21Cut\ttc22Cut'); %'\tEarly1\tEarly2\tLate1\tLate2'); 17_0629 MinBaseIntens update for MedianBase label
end
fprintf(fid, '\tTotFitPts\tPostThreshFitPts\t1stBackgrd\tLstBackgrd\t1stMeanTotBackgrd\tLstMeanTotBackgrd');
end
clear outTseries
outTseries=[];
outTseries=scan(s).plate(p).tSeries;
TseriesSize=size(outTseries,1);
clear outIntens
outIntens=[];
RawIntens=[];
RawIntens=scan(s).plate(p).intens;
RawIntensSize=size(RawIntens,2)
clear Ag; %Ag is Growth Area
Ag=scan(s).plate(p).Ag;
AgSize=size(Ag);
dataLength=min(TseriesSize,RawIntensSize);
if isequal(opt,'Res')||isequal(opt,'Both')
for j=1:dataLength
fprintf(fid, '\t%.5f', outTseries(j));
end
end
numBlkCol=(maxNumIntens - dataLength); %size(outTseries,1));
if isequal(opt,'Res')||isequal(opt,'Both')
for nn=1:numBlkCol %extend to col beyond longest rawDataSet
fprintf(fid, '\t');
end
fprintf(fid,'\tOrfRep');
fprintf(fid,'\tSpecifics');
fprintf(fid,'\tStrainBkGrd');
fprintf(fid, '\n');
ln=ln+1;
fprintf(fid,'%d\t',ln);
end
% Data
n=0;
for r=1:16
for c=1:24
n=n+1;
clear selcode;
Kval=outCmat(n,3);
rSq=outCmat(n,6);
lval=outCmat(n,5);
if Kval>160, selcode='K_Hi'; else selcode=' ';end
if Kval<40, selcode=strcat(selcode,' K_Lo');end
if rSq<.97 && rSq>0, selcode=strcat(selcode,' rSqLo');end
if lval>(0.85*(max(outTseries))), selcode=strcat(selcode,' late');end
if isnan(outCmat(n,7))||isnan(outCmat(n,8))||isnan(outCmat(n,9))...
||isnan(outCmat(n,10))||isnan(outCmat(n,11))...
||isnan(outCmat(n,12)), selcode=strcat(selcode,' NaN');
end
% RiseTime Calculation
K=(outCmat(n,3));
R=(outCmat(n,4));
L=(outCmat(n,5));
if R>0 && L>0 && K>0
rr=R; ll=L;
tc1=eval(sols(2));
tc2=eval(sols(3));
LL=eval(sols(1));
riseTm=LL-tc1;
else
riseTm=0;
end
if Ag(n)< .30*(scan(s).Awindow),selcode=strcat(selcode,' smArea'); end
if outCmat(n,3)==0,selcode=strcat('0 ',selcode);end
orf=cell2mat(MP(mpCnt).orf{1}(n));
gene=cell2mat(MP(mpCnt).genename{1}(n));
orfRep=cell2mat(MP(mpCnt).orfRep{1}(n));
specifics=cell2mat(MP(mpCnt).specifics{1}(n));
strain=cell2mat(MP(mpCnt).strain{1}(n));
drug=char(DM.drug{pertCnt});
conc=char(DM.conc{pertCnt});
media=char(DM.media{pertCnt});
try
mod1=char(DM.mod1{pertCnt});
conc1=char(DM.conc1{pertCnt});
catch
mod1=' ';
conc1=' ';
end
try
mod2=char(DM.mod2{pertCnt});
conc2=char(DM.conc2{pertCnt});
catch
mod2=' ';
conc2=' ';
end
if ~isempty(outCmat)
if isequal(opt,'Res')||isequal(opt,'Both')
fprintf(fid,'%d\t %d\t %d\t %d\t %d\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t',s,p,r,c,n,selcode,drug,conc,media,mod1,conc1,mod2,conc2,orf,gene);
fprintf(fid, '%.5f\t %.5f\t %.5f\t %.5f\t %.5f\t %.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f',...
outCmat(n,1),riseTm,outCmat(n,3),outCmat(n,4),...
outCmat(n,5),outCmat(n,6),outCmat(n,7),outCmat(n,8),...
outCmat(n,9),outCmat(n,10),outCmat(n,11),outCmat(n,12),...
outCmat(n,13),outCmat(n,14),outCmat(n,15),outCmat(n,16));
fprintf(fid, '\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f',...
outCmat(n,17),outCmat(n,18),outCmat(n,19),...
outCmat(n,20),outCmat(n,21));
% Added for data cut times used in 'r'optomized method 06/14/2018
if (size(outCmat,2) == 27)
fprintf(fid, '\t%.5f\t%.5f\t%.5f\t%.5f',...
outCmat(n,24),outCmat(n,25),outCmat(n,26),outCmat(n,27));
end
fprintf(fid, '\t%d\t%d\t%d\t%d\t%d\t%d',...
outCmat(n,22),outCmat(n,23),Diag(c,r,1,p),Diag(c,r,2,p),Diag(c,r,3,p),Diag(c,r,4,p)); %,Diag(r,c,3,p),Diag(r,c,4,p));
end
% DBfile
if isequal(opt,'DB')||isequal(opt,'Both')
dbRsq=0;dbKup=0; dbKlo=0; dbrup=0; dbrlo=0; dbLlo=0; dbLup=0;
if isnumeric(outCmat(n,6)), dbRsq=outCmat(n,6);end
if isnumeric(outCmat(n,7)), dbKup=outCmat(n,7);end
if isnumeric(outCmat(n,8)), dbKlo=outCmat(n,8);end
if isnumeric(outCmat(n,9)), dbrup=outCmat(n,9);end
if isnumeric(outCmat(n,10)), dbrlo=outCmat(n,10);end
if isnumeric(outCmat(n,11)), dbLlo=outCmat(n,11);end
if isnumeric(outCmat(n,12)), dbLup=outCmat(n,12);end
end
if isequal(opt,'DB')||isequal(opt,'Both')
fprintf(fid2,'%s\t %d\t %d\t %d\t %d\t %d\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t',expNm,s,p,r,c,n,selcode,drug,conc,media,mod1,conc1,mod2,conc2,orf,gene);
fprintf(fid2, '%.5f\t %.5f\t %.5f\t %.5f\t %.5f\t %.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f',...
outCmat(n,1),riseTm,outCmat(n,3),outCmat(n,4),...
outCmat(n,5),dbRsq,dbKup,dbKlo,dbrup,dbrlo,dbLlo,dbLup); %\t%.5f\t%.5f\t%.5f\t%.5f
end
% Add Intensities series to end of curve fit data
outIntens=[];
outIntens=zeros(384,dataLength);
intensBlob='';
tmBlob ='';
for j=1:dataLength %size(RawIntens,2) %size(outTseries,1)
if Ag(n)==0,Ag(n)=scan(s).Awindow;end
outIntens(n,j)=RawIntens(n,j)/Ag(n);
if isequal(opt,'Res')||isequal(opt,'Both')
fprintf(fid, '\t%.5f', outIntens(n,j)); % Results print Intens
end
if isequal(opt,'DB')||isequal(opt,'Both')
if j<dataLength
intensBlob=strcat(intensBlob,num2str(outIntens(n,j)),';');
else
intensBlob=strcat(intensBlob,num2str(outIntens(n,j)));
end
if outTseries(j)<.0001,outTseries(j)=0;end
if j<dataLength
tmBlob=strcat(tmBlob,num2str(outTseries(j)),';');
else
tmBlob=strcat(tmBlob,num2str(outTseries(j)));
end
end
end
% Results
if isequal(opt,'Res')||isequal(opt,'Both')
for nn=1:numBlkCol %extend to col beyond longest rawDataSet
fprintf(fid, '\t');
end
% Masterplate sheet
if ~isnan(orfRep)
fprintf(fid, '\t%s', orfRep); %print OrfRep
else
fprintf(fid, '\t%s', ' ');
end
if ~isnan(specifics)
fprintf(fid, '\t%s', specifics);
else
fprintf(fid, '\t%s', ' ');
end
if ~isnan(strain)
fprintf(fid, '\t%s', strain);
else
fprintf(fid, '\t%s', ' ');
end
fprintf(fid, '\n');
ln=ln+1;
fprintf(fid,'%d\t',ln);
end
%DB Raw Intensities and Timepoints
if isequal(opt,'DB')||isequal(opt,'Both')
fprintf(fid2, '\t%s\t%s',intensBlob,tmBlob );
% Masterplate sheet
if ~isnan(orfRep)
fprintf(fid2, '\t%s',orfRep );
else
fprintf(fid2, '\t%s', ' ');
end
if ~isnan(specifics)
fprintf(fid2, '\t%s',specifics );
else
fprintf(fid2, '\t%s', ' ');
end
if ~isnan(strain)
fprintf(fid2, '\t%s',strain );
else
fprintf(fid2, '\t%s', ' ');
end
fprintf(fid2, '\n');
end
end %if ~isempty(outCmat)
end %c
end %r
end %p
end %s
if isequal(opt,'Res')||isequal(opt,'Both')
fclose(fid);
end
if isequal(opt,'DB')||isequal(opt,'Both')
fclose(fid2);
try
copyfile(DBfilename,DBupload)
catch ME
fprintf('DB upload failed with error: %s\n', getReport(ME, 'basic'));
rep=sprintf('Failed copyfile to %s - %s', DBupload, rep);
errordlg(rep);
end
end
% Print results using the standard method
try
scan(1).plate(1).CFoutStd(1,1) %Test for 2018 r_refined version
ln=1;
resultsFilename=fullfile(printResultsDir, strcat('!!ResultsStd_',expNm,'.txt'));
DBfilename=fullfile(printResultsDir, strcat('!!DbaseStd_',expNm,'.txt'));
if isequal(opt,'Res')||isequal(opt,'Both'),fid=fopen(resultsFilename,'w');end
if isequal(opt,'DB')||isequal(opt,'Both'),fid2=fopen(DBfilename,'w');end %121012 Combo
if isequal(opt,'Res')||isequal(opt,'Both') %print Results
fprintf(fid,'%d\t',ln); %Results header
fprintf(fid,'%s\t\n',scansDir);
ln=ln+1;
fprintf(fid,'%d\t',ln);
end
mpCnt=0;
totPlCnt=0;
drgCnt=0;
medCnt=0;
for s=1:size(scan,2)
%Convolute scan array data into plates DconB for DBcombo
clear Diag
try
Diag(:,:,:,1)= sbdg{s}(1:1:24,16:-1:1,:);
catch
sbdg{s};
end
for p=1:size((scan(s).plate),2)
totPlCnt=totPlCnt+1;
if destPerMP>1 &&rem(totPlCnt,destPerMP)==1, mpCnt=mpCnt+1; end
if destPerMP==1,mpCnt=mpCnt+1; end
pertCnt=rem(totPlCnt,destPerMP);
if pertCnt==0, pertCnt= destPerMP;end
pert=strcat('Perturb_',num2str(pertCnt));
s % BCR seems wrong
%Print Time Point HEADER for each plate for newly added intensity data
if isequal(opt,'Res')||isequal(opt,'Both')
fprintf(fid, '\n');
ln=ln+1;
fprintf(fid,'%d\t',ln);
fprintf(fid,'Scan\tPlate\tRow\tCol\t');
try
asd=cell2mat(scan(s).plate(1).CFparameters(1));
aucEndPt=strcat('AUC',num2str(asd(9)));
catch
asd=cell2mat(scan(s).plate(1).CFparameters{1,1}(1,384));
aucEndPt=strcat('AUC',num2str(asd(9)));
end
fprintf(fid, 'Num.\tDiagnostics\tDrug\tConc\tMedia\tModifier1\tConc1\tModifier2\tConc2\tORF\tGene');
fprintf(fid, '\t %s',aucEndPt);
fprintf(fid, '\triseTm\tK\tr\tl\tR-squared\tK-lower\tK-upper\tr-lower\tr-upper\tl-lower\tl-upper\tArea\tLastInten\tSplineMaxRateTm\tLastFitTm\t1stFitTm\tMedianBase\tFitBase\tMinTm\tThreshTm\tTotFitPts\tPostThreshFitPts\t1stBackgrd\tLstBackgrd\t1stMeanTotBackgrd\tLstMeanTotBackgrd'); %17_0629 MinBaseIntens update for MedianBase label
end
clear outTseries
outTseries=[];
outTseries=scan(s).plate(p).tSeries;
TseriesSize= size(outTseries,1);
clear outCmat
outCmat=scan(s).plate(p).CFoutStd;
clear outIntens
outIntens=[];
RawIntens=[];
RawIntens=scan(s).plate(p).intens;
RawIntensSize=size(RawIntens,2);
clear Ag; %Ag is Growth Area
Ag=scan(s).plate(p).Ag;
AgSize= size(Ag);
dataLength= min(TseriesSize,RawIntensSize);
if isequal(opt,'Res')||isequal(opt,'Both')
for j=1:dataLength
fprintf(fid, '\t%.5f', outTseries(j));
end
end
numBlkCol=(maxNumIntens - dataLength); %size(outTseries,1));
if isequal(opt,'Res')||isequal(opt,'Both')
for nn=1:numBlkCol %extend to col beyond longest rawDataSet
fprintf(fid, '\t');
end
fprintf(fid,'\tOrfRep');
fprintf(fid,'\tSpecifics');
fprintf(fid,'\tStrainBkGrd');
fprintf(fid, '\n');
ln=ln+1;
fprintf(fid,'%d\t',ln);
end
% Data
n=0;
for r=1:16
for c=1:24
n=n+1;
clear selcode;
Kval=outCmat(n,3);
rSq=outCmat(n,6);
lval=outCmat(n,5);
if Kval>160, selcode='K_Hi'; else selcode=' ';end
if Kval<40, selcode=strcat(selcode,' K_Lo');end % TODO sprintf if you want a space
if rSq<.97 && rSq>0, selcode=strcat(selcode,' rSqLo');end
if lval>(0.85*(max(outTseries))), selcode=strcat(selcode,' late');end
if isnan(outCmat(n,7))||isnan(outCmat(n,8))||isnan(outCmat(n,9))...
||isnan(outCmat(n,10))||isnan(outCmat(n,11))...
||isnan(outCmat(n,12)), selcode=strcat(selcode,' NaN');
end
% RiseTime Calculation
K=(outCmat(n,3));
R=(outCmat(n,4));
L=(outCmat(n,5));
if R>0 && L>0 && K>0
rr=R; ll=L;
tc1=eval(sols(2));
tc2=eval(sols(3));
LL=eval(sols(1));
riseTm= LL-tc1;
else
riseTm=0;
end
if Ag(n)< .30*(scan(s).Awindow),selcode=strcat(selcode,' smArea'); end % same, need sprintf for space
if outCmat(n,3)==0,selcode=strcat('0 ',selcode); end
orf=cell2mat(MP(mpCnt).orf{1}(n));
gene=cell2mat(MP(mpCnt).genename{1}(n));
orfRep=cell2mat(MP(mpCnt).orfRep{1}(n));
specifics=cell2mat(MP(mpCnt).specifics{1}(n));
strain=cell2mat(MP(mpCnt).strain{1}(n));
drug=char(DM.drug{pertCnt});
conc=char(DM.conc{pertCnt});
media=char(DM.media{pertCnt});
try
mod1=char(DM.mod1{pertCnt});
conc1=char(DM.conc1{pertCnt});
catch
mod1=' ';
conc1=' ';
end
try
mod2=char(DM.mod2{pertCnt});
conc2=char(DM.conc2{pertCnt});
catch
mod2=' ';
conc2=' ';
end
if ~isempty(outCmat)
if isequal(opt,'Res')||isequal(opt,'Both')
fprintf(fid,'%d\t %d\t %d\t %d\t %d\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t',s,p,r,c,n,selcode,drug,conc,media,mod1,conc1,mod2,conc2,orf,gene);
fprintf(fid, '%.5f\t %.5f\t %.5f\t %.5f\t %.5f\t %.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f',...
outCmat(n,1),riseTm,outCmat(n,3),outCmat(n,4),...
outCmat(n,5),outCmat(n,6),outCmat(n,7),outCmat(n,8),...
outCmat(n,9),outCmat(n,10),outCmat(n,11),outCmat(n,12),...
outCmat(n,13),outCmat(n,14),outCmat(n,15),outCmat(n,16));
fprintf(fid, '\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%d\t%d',...
outCmat(n,17),outCmat(n,18),outCmat(n,19),...
outCmat(n,20),outCmat(n,21),outCmat(n,22),outCmat(n,23));
fprintf(fid, '\t%d\t%d\t%d\t%d',...
Diag(c,r,1,p),Diag(c,r,2,p),Diag(c,r,3,p),Diag(c,r,4,p)); %,Diag(r,c,3,p),Diag(r,c,4,p));
end
% DBFile
% RiseTime calculation
if isequal(opt,'DB')||isequal(opt,'Both')
dbRsq= 0;dbKup= 0; dbKlo= 0; dbrup= 0; dbrlo= 0; dbLlo= 0; dbLup= 0;
if isnumeric(outCmat(n,6)), dbRsq= outCmat(n,6);end
if isnumeric(outCmat(n,7)), dbKup= outCmat(n,7);end
if isnumeric(outCmat(n,8)), dbKlo= outCmat(n,8);end
if isnumeric(outCmat(n,9)), dbrup= outCmat(n,9);end
if isnumeric(outCmat(n,10)), dbrlo= outCmat(n,10);end
if isnumeric(outCmat(n,11)), dbLlo= outCmat(n,11);end
if isnumeric(outCmat(n,12)), dbLup= outCmat(n,12);end
end
if isequal(opt,'DB')||isequal(opt,'Both')
fprintf(fid2,'%s\t %d\t %d\t %d\t %d\t %d\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t',expNm,s,p,r,c,n,selcode,drug,conc,media,mod1,conc1,mod2,conc2,orf,gene);
fprintf(fid2, '%.5f\t %.5f\t %.5f\t %.5f\t %.5f\t %.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f',...
outCmat(n,1),riseTm,outCmat(n,3),outCmat(n,4),...
outCmat(n,5),dbRsq,dbKup,dbKlo,...
dbrup,dbrlo,dbLlo,dbLup);
end
% DB Analysis
% Add Intensities series to end of curve fit data
outIntens=[];
outIntens=zeros(384,dataLength);
intensBlob='';
tmBlob ='';
for j=1:dataLength %size(RawIntens,2) %size(outTseries,1)
if Ag(n)==0,Ag(n)=scan(s).Awindow;end
outIntens(n,j)= RawIntens(n,j)/Ag(n);
if isequal(opt,'Res')||isequal(opt,'Both')
fprintf(fid, '\t%.5f', outIntens(n,j)); % Results print Intens
end
if isequal(opt,'DB')||isequal(opt,'Both')
if j<dataLength
intensBlob=strcat(intensBlob,num2str(outIntens(n,j)),';');
else
intensBlob=strcat(intensBlob,num2str(outIntens(n,j)));
end
if outTseries(j)<.0001,outTseries(j)=0;end
if j<dataLength
tmBlob=strcat(tmBlob,num2str(outTseries(j)),';');
else
tmBlob=strcat(tmBlob,num2str(outTseries(j)));
end
end
end
% Results fprint
if isequal(opt,'Res')||isequal(opt,'Both')
for nn=1:numBlkCol %extend to col beyond longest rawDataSet
fprintf(fid, '\t');
end
% Masterplate sheet
if ~isnan(orfRep)
fprintf(fid, '\t%s', orfRep); %print OrfRep
else
fprintf(fid, '\t%s', ' ');
end
if ~isnan(specifics)
fprintf(fid, '\t%s', specifics);
else
fprintf(fid, '\t%s', ' ');
end
if ~isnan(strain)
fprintf(fid, '\t%s', strain);
else
fprintf(fid, '\t%s', ' ');
end
fprintf(fid, '\n');
ln=ln+1;
fprintf(fid,'%d\t',ln);
end
% Raw Intensities and Timepoints
if isequal(opt,'DB')||isequal(opt,'Both')
fprintf(fid2, '\t%s\t%s',intensBlob,tmBlob );
% Masterplate sheet
if ~isnan(orfRep)
fprintf(fid2, '\t%s',orfRep );
else
fprintf(fid2, '\t%s', ' ');
end
if ~isnan(specifics)
fprintf(fid2, '\t%s',specifics );
else
fprintf(fid2, '\t%s', ' ');
end
if ~isnan(strain)
fprintf(fid2, '\t%s',strain );
else
fprintf(fid2, '\t%s', ' ');
end
fprintf(fid2, '\n');
end %DB print
end %if ~isempty(outCmat)
end %c
end %r
end %p
end %s
if isequal(opt,'Res')||isequal(opt,'Both')
fclose(fid);
end
if isequal(opt,'DB')||isequal(opt,'Both')
fclose(fid2);
try
copyfile(DBfilename,DBupload)
catch ME
fsprintf('DB upload failed with error: %s\n', getReport(ME, 'basic'));
rep=sprintf('Failed copyfile to %s - %s\n', DBupload, rep);
errordlg(rep)
end
end
msgbox([sprintf('Printing Script complete. Check !!Results sheets in %s for results.', printResultsDir)])
catch ME
fprintf('Printing Script failed with error: %s\n', getReport(ME, 'basic'));
end

Binary file not shown.

View File

@@ -0,0 +1,527 @@
% Launch the MATLAB EASY console
% Updated 240727 Bryan C Roessler to improve file operations and portability
function varargout = EASYconsole(varargin)
global easyDir
global easySuffix
global scansDir
global easyResultsDir
global easyResultsDirName
global fotosResultsDir
global figsResultsDir
global pointMapsResultsDir
global pointMapsFile
global printResultsDir
global matDir
global matFile
global drugMediaFile
global masterPlateFile
global mpdmFile
global userName
global srchRange
global searchRangeFile
% Initialize some variables from matlab
easyPath=which(mfilename);
[easyDir,easyFileName]=fileparts(easyPath);
easyDir=fullfile(easyDir);
[parentDir, ~]=fileparts(easyDir);
parentDir=fullfile(parentDir); % ../easy/apps
userName=getenv('USER');
dt=datetime;
todayStr=char(dt, 'yyyyMMdd'); % This should match the parent workflow script
demo=1;
if demo
disp('Running in demo mode');
disp('Initialized variables:');
whos;
end
fprintf('This script name: %s\n', easyFileName);
% Set scansDir (project scans directory) intelligently
if exist('PROJECT', 'var') && ~isempty(getenv('PROJECT'))
scansDir=getenv('PROJECT');
fprintf('Using project path: %s from environment variable PROJECT\n', scansDir);
disp('This usually indicates that we are in standalone mode');
elseif exist('SCANS_DIR', 'var') && ~isempty(getenv('SCANS_DIR'))
scansDir=getenv('SCANS_DIR');
fprintf('Using scans directory: %s from environment variable SCANS_DIR\n', scansDir);
disp('This usually indicates that we are in module mode');
else
% TODO Lots of this is hardcoded logic, this TODO is just a reminder to change this block
% when changing EASY and other variables in the parent script
fprintf('Beginning parent scans directory search\n');
fprintf('This usually indicates that we are in stand-alone mode without PROJECT or SCANS_DIR environment variables\n');
dirsToScan={
fullfile(parentDir,'..', '..', 'scans'),
fullfile(parentDir, '..', '..', 'ExpJobs'),
fullfile('/mnt/data/scans'),
fullfile('/mnt/data/ExpJobs'),
fullfile(parentDir, '..', '..', 'templates', 'scans-demo')
};
for d=dirsToScan
if exist(d, 'dir')
subDirs=dir(d);
if ~isempty(subDirs)
fprintf('Found a non-empty parent scans directory in our list: %s\n', d);
fprintf('Scanning inside for a project scan directory\n');
[~, sortedIndices]=sort(datenum({dirs.date}), 'descend'); % sort by newest first
sortedDirs=dirs{sortedIndices};
scansDir=sortedDirs{1};
fprintf('Selected newest project scans directory: %s\n', scansDir);
end
end
end
end
% Sanity check and warning
if exist('PROJECT_USER', 'var') && ~isempty(getenv('PROJECT_USER'))
if ~equal(getenv('PROJECT_USER'), userName)
disp("WARNING: PROJECT_USER does not match the current namespace");
end
end
% Allow module to override hardcoded default EASY directory
if exist('EASY_DIR','var') && ~isempty(getenv('EASY_DIR'))
EASY_DIR=fullfile(getenv('EASY_DIR'));
if ~strcmp(easyDir, EASY_DIR) % sanity check
disp("WARNING: EASY_DIR does not match this script's hardcoded EASY location");
disp("This is probably OK but if strange beahvior arises, we'll need to fix it in code");
easyDir=EASY_DIR;
end
fprintf('Using EASY script directory: %s from environment variable EASY_DIR\n', easyDir);
else
fprintf('Using EASY script directory: %s from hardcoded default\n', easyDir);
end
% If we don't have tan EASY_SUFFIX from the module, generate it from scansDir
if exist('EASY_SUFFIX', 'var') && ~isempty(getenv('EASY_SUFFIX'))
easySuffix=getenv('EASY_SUFFIX');
else
% The following is a way to parse the project name from the scansDir
[ ~, dirName]=fileparts(scansDir);
parts=strsplit(dirName, '_');
scansDate=parts{1};
scansUserName=parts{2};
easySuffix=strjoin(parts(3:end), '_');
% Might as well check this too for fun
if ~strcmp(userName, scansUserName)
disp('WARNING: userName does not match scansUserName');
disp("This usually means that you are attempting to run an EASY analysis on another user's project data scans");
end
% For happiness
if strcmp(todayStr, scansDate)
disp("Early bird gets the worm");
end
end
if (exist('EASY_RESULTS_DIR', 'var') && ~isempty(getenv('EASY_RESULTS_DIR')))
easyResultsDir=fullfile(getenv('EASY_RESULTS_DIR'));
if exist(easyResultsDir, 'dir')
fprintf('WARNING: EASY results dir %s already exists\n', easyResultsDir);
disp('Files in this directory may be overwritten');
end
fprintf('Using output directory: %s from environment variable EASY_RESULTS_DIR\n', easyResultsDir);
else
easyResultsDirName=strcat('Results_',todayStr,'_',userName,'_',easySuffix);
easyResultsDir=fullfile(scansDir,easyResultsDirName);
if exist(easyResultsDir, 'dir')
fprintf('WARNING: EASY results dir %s already exists\n', easyResultsDir);
disp('Files in this directory may be overwritten')
fprintf('Using output directory: %s\n', easyResultsDir);
end
end
if exist('MASTER_PLATE_FILE', 'var') && ~isempty(getenv('MASTER_PLATE_FILE'))
masterPlateFile=fullfile(getenv('MASTER_PLATE_FILE'));
fprintf('Using drug media file: %s from environment variable MASTER_PLATE_FILE\n', masterPlateFile);
else
% Try to find MasterPlate_ file on our own
mp=fullfile(scansDir,'MasterPlateFiles',strcat('MasterPlate_', easySuffix,'.xlsx'));
if exist(mp, 'file')
masterPlateFile=mp;
fprintf('Using drug media file: %s from internal logic\n', masterPlateFile);
else
fprintf('WARNING: Have you created a MasterPlate_ file in %s/MasterPlateFiles/?\n', scansDir);
end
end
if exist('DRUG_MEDIA_FILE', 'var') && ~isempty(getenv('DRUG_MEDIA_FILE'))
drugMediaFile=fullfile(getenv('DRUG_MEDIA_FILE'));
fprintf('Using drug media file: %s from environment variable DRUG_MEDIA_FILE\n', drugMediaFile);
else
% Try to find MasterPlate_ file on our own
dm=fullfile(scansDir,'MasterPlateFiles',strcat('DrugMedia_', easySuffix,'.xlsx'));
if exist(mp, 'file')
drugMediaFile=dm;
fprintf('Using drug media file: %s from internal logic\n', drugMediaFile);
else
fprintf('WARNING: Have you created a DrugMedia_ file in %s/MasterPlateFiles/?\n', scansDir);
end
end
matDir=fullfile(easyResultsDir,'matResults');
if ~exist(matDir, 'dir')
mkdir(matDir);
end
matFile=fullfile(matDir,strcat(easyResultsDirName,'.mat'));
% Pulled these out of par4GblFnc8c
printResultsDir=fullfile(easyResultsDir,'PrintResults');
fotosResultsDir=fullfile(easyResultsDir,'Fotos');
figsResultsDir=fullfile(easyResultsDir,'figs');
pointMapsResultsDir=fullfile(easyResultsDir,'PTmats');
pointMapsFile=fullfile(pointMapsResultsDir,'NImParameters.mat');
oldPointMapsFile=fullfile(pointMapsResultsDir,'ImParameters.mat');
searchRangeFile=fullfile(fotosResultsDir,'CSearchRange.mat');
mpdmFile=fullfile(matDir,'MPDM.mat');
% Decent time to print some helpful vars
if demo
disp('Vars at end of main loop:')
whos;
end
% This can be removed, I think it should add the previous search range?
% Might be nice feature but can remove if it causes issues
% We are using searchRangeNum to hold old CSrchRange value(s)
if exist(searchRangeFile, 'file')
searchRangeNum=load(searchRangeFile);
end
% Add easyDir to the MATLAB path
% I have not idea if this is necessary or works but theoretically should
% reduce directory scoping issues when calling scripts w/o a path
addpath(easyDir);
% GUI interface design
gui_Singleton=1;
gui_State=struct( 'gui_Name', mfilename, ...
'gui_Singleton', gui_Singleton, ...
'gui_OpeningFcn', @EASYconsole_OpeningFcn, ...
'gui_OutputFcn', @EASYconsole_OutputFcn, ...
'gui_LayoutFcn', [] , ...
'gui_Callback', []);
if nargin && ischar(varargin{1})
gui_State.gui_Callback=str2func(varargin{1});
end
if nargout
[varargout{1:nargout}]=gui_mainfcn(gui_State, varargin{:});
else
gui_mainfcn(gui_State, varargin{:});
end
end
% GUI
% Easyconcole_OpeningFcn executes just before the EASYconsole GUI is made visible.
% This function has no output args, see OutputFcn.
% hObject--handle to figure
% eventdata reserved - to be defined in a future version of MATLAB
% handles--structure with handles and user data (see GUIDATA)
% varargin--input arguments to EASYconsole (see VARARGIN)
function EASYconsole_OpeningFcn(hObject, ~, handles, varargin)
global fhconsole
global scansDir
% Choose default command line output for EASYconsole
handles.output=hObject;
% Update handles structure
guidata(hObject, handles);
%Figure header, Toolbar, etc. Setup
fhconsole=gcf;
set(fhconsole,'Toolbar','none');
fhconsole=gcf;
% Pulled this out of the opening function
% Seems better to wait until we have our vars set though?
if exist('scansDir','var') && ~isempty(scansDir)
set(fhconsole,'Name', sprintf('EASYconsole - %s', scansDir));
else
set(fhconsole,'Name','EASYconsole - No Active Experiment.')
end
end
% EASYconsole output
% Outputs from this function are returned to the command line.
% varargout--cell array for returning output args (see VARARGOUT);
% hObject--handle to figure
% eventdata reserved - to be defined in a future version of MATLAB
% handles--structure with handles and user data (see GUIDATA)
function varargout = EASYconsole_OutputFcn(~, ~, handles)
% Get default command line output from handles structure
varargout{1}=handles.output;
end
%% CONSOLE BUTTON INTERFACES %%
% File Button Interface
function FileMenu_Callback(~, ~, ~)
%returnStartDir
end
% Load Experiment Button Interface
function LoadExp_Callback(~, ~, ~)
%returnStartDir
end
% New Experiment Button Interface
function NewExpDat_Callback(~, ~, ~)
global matDir
global matFile
global easyResultsDir
global easyResultsDirName
global fhconsole
global scan
global userName
% Create a new experiment
try
questdlg('\fontsize{20} NAME the file and NAVIGATE to the directory with the image folders.','File Creation','OK', struct('Default','OK','Interpreter','tex'));
[inputFile,inputPath]=uiputfile('.mat');
inputFileName=strrep(inputFile,'.mat','');
easyResultsDirName=strcat('Results_',todayStr,'_',userName,'_',inputFileName);
% Set paths
scansDir=fullfile(inputPath);
easyResultsDir=fullfile(scansDir,easyResultsDirName);
matDir=fullfile(easyResultsDir,'matResults');
matFile=fullfile(matDir,strcat(todayStr,'_',userName,'_',inputFile));
%***Added for 'parfor global' to preallocate 'scan' structure 20-0123*****
nlist=dir(fullfile(scansDir,'*'));
nnn=0;
for n=1:size(nlist,1)
if (~isempty(str2num(nlist(n).name)))
nnn=nnn+1;
PnumLst(nnn)= (str2num(nlist(n).name));
sl(nnn,1)={(nlist(n).name)};
end
end
scanSize=size(sl,1);
scanMax=max(str2double(sl));
clear scan;
scan(scanMax)=struct(); % changed for parfor global 20_0118
save(matFile,'scan')
% create supporting dirs
% this is also in the workflow script but here for standalone mode
dirs={'PrintResults', 'CFfigs', 'Fotos', 'Fotos/BkUp'};
for i=1:length(dirs)
d=dirs{i};
if ~exist(fullfile(easyResultsDir, d), 'dir')
mkdir(fullfile(easyResultsDir, d));
end
end
% templateDirs are stored in the easy template directory
templates={'figs', 'PTmats'};
for i=1:length(templates)
d=dirs{i};
if ~exist(fullfile(easyResultsDir, d), 'dir')
copyfile((fullfile(easyDir,d)), (fullfile(easyResultsDir,d)));
end
end
clear sbdg % reduce possible retention of a previous job sdbg
sbdg= cell(1,scanMax);
save((fullfile(easyResultsDir,'Fotos','Nbdg')),'sbdg');
catch ME
fprintf('ERROR: %s\n', ME.message);
end
% set the title for fhconsole depending on existence
if exist('easyResultsDir','var')&&~isempty(easyResultsDir)
set(fhconsole,'Name',sprintf('EASYconsole - %s', easyResultsDir));
else
set(fhconsole,'Name','EASYconsole - Master Plate directory not selected.');
end
end
% Load a previous experiment
function LoadDatFile_Callback(~, ~, ~)
global matDir
global matFile
global easyResultsDir
global easyPath
global fhconsole
try
questdlg('Load results .mat from ../ExpJobs/YourJob/Results/matResults/','File Creation','OK', struct('Default','OK','Interpreter','tex'));
[inputFile,inputPath]=uigetfile('.mat','Open Experiment folder and data storage .mat file name','MultiSelect','off');
matDir=fullfile(inputPath);
matFile=fullfile(inputPath,inputFile);
load(matFile);
easyResultsDir=fullfile(matDir,'..');
scansDir=fullfile(matDir,'..', '..');
point
% TODO this is pretty hacky and needs something more explicit
if isfolder(fullfile(matDir, '..','..','1')) % If Inovation Vrobot Then
try
exist(pointMapsFile, 'file')
load(pointMapsFile);
catch
try
load(fullfile(easyPath,'NImParameters.mat')); % hardcoded default
catch
disp("Could not load the NImParameters.mat file")
end
end
else % If Epson 10Plate Scans Then>
if exist(fullfile(pointMapsResultsDir,'ImParameters.mat'), 'file')
load(fullfile(pointMapsResultsDir,'ImParameters.mat'));
else
try
load(fullfile(easyPath,'ImParameters.mat'));
catch
disp("Could not load the ImParameters.mat file");
end
end
else
disp('WARNING: cannot find project scans');
end
bkupDir=fullfile(matDir,'BkUp');
if ~exist(bkupDir, 'dir')
mkkdir(bkupDir);
end
% Create supporting dirs
dirs={'PrintResults', 'figs', 'CFfigs', 'PTmats', 'Fotos'};
for i=1:length(dirs)
d=dirs{i};
if ~exist(fullfile(easyResultsDir, d), 'dir')
mkdir(fullfile(easyResultsDir, d));
end
end
catch
end
clear scan
if exist('easyResultsDir','var') && ~isempty(easyResultsDir)
fhconsole=gcf;
set(fhconsole,'Name',sprintf('EASYconsole - %s', easyResultsDir));
else
set(fhconsole,'Name','EASYconsole - Exp. Analysis NOT selected.');
end
end
% Callbacks
% 'Run' in the dropdown menu
function run_Callback(~, ~, ~)
end
function runPlateMapPintool_Callback(~, ~, ~)
try
NImapPT
catch
EASYconsole
end
end
function NImCFcombo_Callback(~, ~, ~)
try
par4Gbl_Main8c
EASYconsole
catch
EASYconsole
end
end
function runPlateImAnal_Callback(~, ~, ~)
try
NImStartupOnly
catch
EASYconsole
end
end
function PlateCFit_Callback(~, ~, ~)
% global matFile % TODO BCR not sure if needed
try
NCstart
catch
EASYconsole
end
end
function GenPrintouts_Callback(~, ~, ~)
end
function uploadExcelMP2DB_Callback(~, ~, ~)
end
function runDMPexcel_Callback(~, ~, ~)
try
DMPexcel2mat
catch
EASYconsole
end
end
function runResults_DBcombo_Callback(~, ~, ~)
try
DgenResults %similar but semicolons removed to restore so cmdLine display info.
%Dgen241010qhtcp %par4global -convert 1x1cell of 384cells to be like previous 1x384 cells CFparameter
catch ME
fprintf('Error in DgenResults: %s\n', ME.message);
EASYconsole
end
end
function Tools_Callback(~, ~, ~)
end
function runOverlayPlots_Callback(~, ~, ~)
try
DoverlayPlots2
EASYconsole
catch
EASYconsole
end
end
function runFotoStrip_Callback(~, ~, ~)
try
F_NImStartup_CentCir
EASYconsole
catch
EASYconsole
end
end
function runDisplayFig_Callback(~, ~, ~)
try
UfigDisplay
catch
EASYconsole
end
end
function runViewParameters_Callback(~, ~, ~)
try
catch
EASYconsole
end
end
function QkviewN_Callback(~, ~, ~)
try
QkviewImages
catch
EASYconsole
end
end
function CFdisplay_Callback(~, ~, ~)
try
NCsingleDisplay
EASYconsole
catch
EASYconsole
end
end

View File

@@ -0,0 +1,458 @@
%% CALLED WHEN ACCESSING 'CurveFit Display' %%
function [scLst, row, col] = NCdisplayGui(scansDir)
xPos=0.05;
btnWid=0.10;
btnHt=0.05;
spacing=0.02; % Spacing between the button and the next command's label
% Figure
% The ADD Groups button
btnNumber=1;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
row=1;
hedit=uicontrol(...
'Style', 'edit',...
'String',row,...
'Units','normalized',...
'Position', btnPos,... % [.002 .70 .08 .10],...
'callback',{@editRowNum}); % 'Position', [5 100 60 20])
function editRowNum(source,~)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<0)||(user_entry>17) )
errordlg('Enter a Row between 1 and 16','Bad Input','modal')
return
end
row=user_entry;
end
btnNumber=2;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
col=1;
hedit=uicontrol(...
'Style', 'edit',...
'String',col,...
'Units','normalized',...
'Position', btnPos,... % [.002 .70 .08 .10],...
'callback',{@entryColNum}); % 'Position', [5 100 60 20])
function entryColNum(source,~)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<0)||(user_entry>25))
errordlg('Enter a Column between 1 and 24','Bad Input','modal')
return
end
col=user_entry;
end
% Read in numeric folder names
nlist=dir(fullfile(scansDir,'*'));
nnn=0;
for n=1:size(nlist,1)
if (~isempty(str2num(nlist(n).name)))
nnn=nnn+1;
PnumLst(nnn)=(str2num(nlist(n).name));
slst(nnn,1)={nlist(n).name};
slst(nnn,1)={(nlist(n).name)};
end
end
hListbox=uicontrol(...
'Style', 'listbox',...
'String',sort(slst),...
'value',[],...
'max',1000,...
'min',1,...
'Units','normalized',...
'Position', [.40 .40 .10 .60],...
'callback',{@load_listbox}); %'uiresume(gcbf)'); 'Position', [5 100 60 20])
function load_listbox(source,~)
userIndx=(get(source,'value'));
userStr=(get(source,'string'));
%scLstIndx=str2num(char(strrep(userStr(userIndx), 'Scan', '')))
user_entry=userStr(userIndx);
scLst=user_entry;
end
btnNumber=10;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
hedit8=uicontrol(...
'Style', 'pushbutton',...
'String',{'Continue'},...
'Units','normalized',...
'Position', btnPos,...
'callback','uiresume(gcbf)');
% Labels
xLPos=0.175;
yPos=0;
btnWid=0.20;
lblNumber=1;
yPos=0.85-(lblNumber-1)*(btnHt+spacing);
btnPos=[xLPos yPos-spacing btnWid btnHt];
htxt=uicontrol(...
'Style', 'text',...
'String','Row',...
'Units','normalized',...
'Position', btnPos);
lblNumber=2;
yPos=0.85-(lblNumber-1)*(btnHt+spacing);
btnPos=[xLPos yPos-spacing btnWid btnHt];
htxt=uicontrol(...
'Style', 'text',...
'String','Column',...
'Units','normalized',...
'Position', btnPos);
% Not needed for Ncode ImRobot
uiwait(gcf);
end %function end $$$$$[/INST]
%}
%{
%-------------------333333-----------
lblNumber=3;
yPos=0.85-(lblNumber-1)*(btnHt+spacing);
btnPos=[xLPos yPos-spacing btnWid btnHt];
htxt=uicontrol(...
'Style', 'text',...
'String','BG Threshold (%above) Detection',...
'Units','normalized',...
'Position', btnPos);
%-------------------4-----------
lblNumber=4;
yPos=0.85-(lblNumber-1)*(btnHt+spacing);
btnPos=[xLPos yPos-spacing btnWid btnHt];
htxt=uicontrol(...
'Style', 'text',...
'String','SpotDetThres(1-60%)',...
'Units','normalized',...
'Position', btnPos);
%-------------------55555-----------
lblNumber=5;
yPos=0.85-(lblNumber-1)*(btnHt+spacing);
btnPos=[xLPos yPos-spacing btnWid btnHt];
htxt=uicontrol(...
'Style', 'text',...
'String','Radius',... %'String','Width',...
'Units','normalized',...
'Position', btnPos);
%-------------------66666-----------
lblNumber=6;
yPos=0.85-(lblNumber-1)*(btnHt+spacing);
btnPos=[xLPos yPos-spacing btnWid btnHt];
htxt=uicontrol(...
'Style', 'text',...
'String','Dither',...
'Units','normalized',...
'Position', btnPos);
%-------------------77777-----------
lblNumber=7;
yPos=0.85-(lblNumber-1)*(btnHt+spacing);
btnPos=[xLPos yPos-spacing btnWid btnHt];
htxt=uicontrol(...
'Style', 'text',...
'String','SearchRange',...
'Units','normalized',...
'Position', btnPos);
%-------------------88888-----------
%{
lblNumber=8;
yPos=0.85-(lblNumber-1)*(btnHt+spacing);
btnPos=[xLPos yPos-spacing btnWid btnHt];
htxt=uicontrol(...
'Style', 'text',...
'String','Blank2',...
'Units','normalized',...
'Position', btnPos);
%----------------------------------------
%}
%}
%{
%-------------------66666-----------
btnNumber=6;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
srcExtendFactor=ImParMat(7);
hedit=uicontrol(...
'Style', 'edit',...
'String',srcLoIntensThres,...
'Units','normalized',...
'Position', btnPos,...
'callback',{@entryExtendFactor});
function entryExtendFactor(source,eventdata)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<1.8)||(user_entry>4.0))
errordlg('You must enter a numeric value between 1.8 and 2.1','Bad Input','modal')
return
end
ExtendFactor=user_entry
ImParMat(7)= ExtendFactor
ExtendFactor
end
%}
%{
%-------------------333333-----------
btnNumber=3;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
srcBGthreshold=ImParMat(3);
hedit=uicontrol(...
'Style', 'edit',...
'String',srcBGthreshold,...
'Units','normalized',...
'Position', btnPos,... % [.002 .70 .08 .10],...
'callback',{@entryBGthreshold}); % 'Position', [5 100 60 20])
function entryBGthreshold(source,eventdata)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<1)||(user_entry>100))
errordlg('Enter a numeric value between 1 and 100 percent to produce a Background Threshold value as a percent above the time series average background for each spot.','Bad Input','modal')
return
end
BGthresInput=user_entry
ImParMat(3)= BGthresInput
BGthresInput
end
%-------------------444444-----------
btnNumber=4; %Enter spot detection threshold (lock-in Image frame)
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
srcSpotThres=ImParMat(4);
hedit=uicontrol(...
'Style', 'edit',...
'String',srcSpotThres,...
'Units','normalized',...
'Position', btnPos,...
'callback',{@entrySpotThres});
function entrySpotThres(source,eventdata)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<1)||(user_entry>60))
errordlg('You must enter a numeric value between 1 and 60','Bad Input','modal')
return
end
spotThres=user_entry
ImParMat(4)= spotThres
spotThres
end
%
%---------555555 Radius Entry After Sept.2014---------------------------------**
btnNumber=5;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
srcRadius=ImParMat(10);
hedit=uicontrol(...
'Style', 'edit',...
'String',srcRadius,...
'Units','normalized',...
'Position', btnPos,... % [.002 .70 .08 .10],...
'callback',{@entryRadius}); % 'Position', [5 100 60 20])
function entryRadius(source,eventdata)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<12)||(user_entry>17))
errordlg('You must enter a numeric value between 12 and 17','Bad Input','modal')
return
end
Radius=user_entry
ImParMat(10)= Radius
Radius
end
%---------555555 Width Entry prior the Sept.2014---------------------------------**
%{
btnNumber=5;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
srcWidth=ImParMat(5);
hedit=uicontrol(...
'Style', 'edit',...
'String',srcWidth,...
'Units','normalized',...
'Position', btnPos,... % [.002 .70 .08 .10],...
'callback',{@entryWidth}); % 'Position', [5 100 60 20])
function entryWidth(source,eventdata)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<5)||(user_entry>41))
errordlg('You must enter a numeric value between 5 and 40','Bad Input','modal')
return
end
Width=user_entry
ImParMat(5)= Width
Width
end
%}
%-------------------66666 Dither unnecessary after Sept.2014-----------
btnNumber=6;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
srcDither= ImParMat(6);
hedit=uicontrol(...
'Style', 'edit',...
'String',srcDither,...
'Units','normalized',...
'Position', btnPos,...
'callback',{@entryDither});
function entryDither(source,eventdata)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<0)||(user_entry>5))
errordlg('You must enter a numeric value between 1 and 4','Bad Input','modal')
return
end
Dither=user_entry
ImParMat(6)= Dither
Dither
end
%-------------------77777----------- Added July 7,2015 to allow Search Range constraint
btnNumber=7;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
try
srchRange=ImParMat(12);
searchRangeNum=ImParMat(12)
catch %Legacy default value was 18 before being made a user input variable (ImParMat(12)). A preferable value now might be 12 or 14.
srchRange=18;
ImParMat(12)=18
searchRangeNum=ImParMat(12)
end
%{
if size(scLst)>1
srchRange=ImParMat(12);
else
try
srchRange=CSearchRange(str2double(scLst))
catch
srchRange=ImParMat(12);
end
end
%}
hSearchRange=uicontrol(...
'Style', 'edit',...
'String',srchRange,...
'Units','normalized',...
'Position', btnPos,...
'callback',{@CsearchRange});
function CsearchRange(source,eventdata)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<1)||(user_entry>50)) %originally 18; 19_0729 increased
errordlg('You must enter a numeric value between 1 and 18 12->18 recommended. (ImParMat(12)))','Bad Input','modal')
return
end
searchRangeNum=user_entry
end
%-------------------77777-----------
%{
btnNumber=7;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
srcExtend=ImParMat(7);
hedit=uicontrol(...
'Style', 'edit',...
'String',srcExtend,...
'Units','normalized',...
'Position', btnPos,...
'callback',{@entryExtend});
function entryExtend(source,eventdata)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<-0.10)||(user_entry>0.4))
errordlg('You must enter a numeric value between 0 and 0.4. 0.10 recommended','Bad Input','modal')
return
end
extend=user_entry
ImParMat(7)= extend
extend
end
%-------------------888888-----------
btnNumber=8;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
%ImParMat(8)=1;
srcpointExtend=ImParMat(8);
hedit=uicontrol(...
'Style', 'edit',...
'String',srcpointExtend,...
'Units','normalized',...
'Position', btnPos,...
'callback',{@entrypointExtend});
function entrypointExtend(source,eventdata)
user_entry=str2double(get(source,'string'));
user_entry= floor(user_entry);
if (isnan(user_entry)||(user_entry<-3)||(user_entry>5))
errordlg('You must enter an integer value between 0 and 5. 1 recommended','Bad Input','modal')
return
end
pointExtend=user_entry
ImParMat(8)= pointExtend
pointExtend
end
%}
%-------------------999999-----------
btnNumber=9;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
hedit=uicontrol(...
'Style', 'popupmenu',...
'String',{'GrowthArea','FixedArea'},...
'Units','normalized',...
'Position', btnPos,...
'callback',{@grwArea});
function grwArea(source,eventdata)
str=get(source, 'String');
val=get(source,'Value');
% Set current data to the selected data set.
switch str{val};
case 'GrowthArea' ;% User selects Peaks.
SWgrowthArea=1
case 'FixedArea' % User selects Membrane.
SWgrowthArea=0
end
end
%}

View File

@@ -0,0 +1,232 @@
%% CALLED BY par4GblFnc8c.m %%
function [par4scanselIntensStd,par4scanselTimesStd,par4scanTimesELr,par4scanIntensELr,par4scanCFparameters,par4scanCFdate,outC,outCstd]= ...
NCfitImCFparforFailGbl2(parMat,times, values, timeOffsets, fileSuffix, AUCfinalTime, ~, spotAreas, printResultsDir, ~,~, sols, ~) %,scan)
% Preallocation for parfor loop
st(1,1:size(times,2))=1111;
resMat(1,1:27)=0;
resMatStd=resMat;
outC=zeros(384,27);
outCstd=zeros(384,27);
for m=1:384
pa{m}=st;
par4scanCFparameters{m}=parMat;
par4scanCFdate{m}=datestr((now),31);
end
par4scanselTimesStd=pa;
par4scanselIntensStd=pa;
par4scanTimesELr=pa;
par4scanIntensELr=pa;
par4resMat=zeros(384,27);
par4resMatStd=zeros(384,27);
% Spot (cultures) loop
for ii=1:384 % startSpot:numCultures
ii; % db print out the culture number
timepts=[];
currValues=[];
currSpotAreas=[];
currSpotArea=[];
dataMatrix=[];
selTimesStd=[]; % 191024 parfor
selIntensStd=[]; % 191024 parfor
FiltTimesELr=[]; % 191024 parfor
NormIntensELr=[]; % 191024 parfor
% add offset...1 offset PER PLATE
timepts=times + timeOffsets; % (floor((ii-1)/arrayFormat) + 1);
currValues=values(ii,:); % change values(spotNum,:);
% get spot areas for this culture
currSpotArea=spotAreas(:,ii);
% just use the area at the last time point
% currSpotArea=currSpotAreas(1);
% Preallocate to accomodate parfor loop
resMatStd=zeros(1,27);
resMat=zeros(1,27);
currNormIntens=currValues/currSpotArea;
tmpx=find(currNormIntens>5); %15jh % 2.3);
validSpot=true;
if(isempty(tmpx) || length(tmpx)<3)
validSpot=false;
normIntens=currNormIntens;
filterTimes=timepts; %filterTimes; %currTimes;
selTimesStd=timepts;
selIntensStd=currNormIntens;
FiltTimesELr=timepts;
NormIntensELr=currNormIntens;
else
% NCfilImCF.m
% Preallocate incase something bails in NCscurImCFparfor
resMatStd=zeros(1,27);
resMat=zeros(1,27);
hold off;
dataMatrix=[];
K=0;r=0;l=0;Klow=0;Kup=0;rlow=0;rup=0;llow=0;lup=0;AUC=0;MSR=0;rsquare=0;
bl=0;
Tpt1=0;numFitTpts=0;thresGT2=0;minTime=0;fitbl=0; % diagnostic outputs only
timepts=timepts; % timepts=currTimes; parfor
normIntens=currNormIntens;
dataMatrix=[]; % arfor move clear from NCfitImCF...m
loIntensThres=parMat(4);
stdLoIntLim=parMat(5);
% Basic filtering
% [loInten Thres,stdbased Trim before Scurve start, and dropout detection]
if(max(normIntens) > 2.29)
threshold=loIntensThres; %1.9; %Increase this value to reduce low data point (flag=2)
else
threshold=0;
end
dropThreshold=-0.0001*max(normIntens);
% Initialize dataMatrix
dataMatrix(1,:)=timepts;
dataMatrix(2,:)=normIntens;
dataMatrix(3,:)=ones;
dataMatrix(4,:)=normIntens;
% Determine a mean Intensity Index point and assoc'd TimePt
a=min(normIntens(normIntens>=0)); %(find(normIntens>=0)));
b=max(normIntens(normIntens>=0)); %(find(normIntens>=0)));
c=0.5*(b-a);
d=b-c;
meanIntIndPt=find(normIntens>d,1);
meanInt=normIntens(meanIntIndPt);
meanTime=times(meanIntIndPt);
% NCLoIntstdTrim
% NCLoSstdTrim.m called by NCfilImCF and NCfil.m
flg1=0;
loScurvLim=stdLoIntLim;
loStimeN=1;
last2n=1;
stdDev=[];
nrmIntens0=normIntens;
for n=1:meanIntIndPt
if nrmIntens0(n)<=0
nrmIntens0(n)=0;
end
if(nrmIntens0(n)<threshold)
if (loStimeN-2)>0
dataMatrix(3,1:(n-2))=2; % add to lowIntensity cull flags, the pre S cull data
else
dataMatrix(3,1:n)=2;
end
dataMatrix(3,1:(n-2))=2;
last2n=n;
end
if n<(length(nrmIntens0)-3)
x=nrmIntens0(n:(n+3));
stdDev(n)=std(x);
if (stdDev(n)<loScurvLim && flg1~=1)
loStime=timepts(n);
loStimeN=n;
end
if stdDev(n)>6
flg1=1;
end
end
end
% TODO repetitive code
if (loStimeN-2)>0
dataMatrix(3,1:(loStimeN-2))=2; % add to lowIntensity cull flags, the pre S cull data
else
dataMatrix(3,1:(loStimeN-2))=2
end
qcutoff=2;
qind=find(normIntens>2); %,:,'first');
if ~isempty(qind(3))
qcutoff=qind(3);
end
[minInt,I]=min(normIntens(2:qcutoff));
bl=minInt;
minTime=dataMatrix(1,I);
if (length(qind)>5)&&I>1
dataMatrix(3,1:(I-1))=5;
end
tGT2Flg=0;
for n=1:length(normIntens)
dataMatrix(4,n)=normIntens(n)-bl;
if n>I && dataMatrix(4,n)>=2 && tGT2Flg==0
thresGT2=n;thresGT2Tm=dataMatrix(1,n);tGT2Flg=1;
end
end
resMat(18)=bl;
resMatStd(18)=bl;
resMatStd(20)=minTime;
resMat(20)=minTime;
% DropOut cull section (single drop points)
DropOutStartPt=length(normIntens);
for n=1:length(normIntens)
if(n>1)
if(((normIntens(n)- normIntens(n-1))< dropThreshold)) && ...
(n > max(meanIntIndPt,thresGT2) )
dataMatrix(3,n)=6;
end
end
end
% TODO should/could this be removed as recreated in%NCscurImCF_3parfor.m
% Post Stdtest cull for low intensities inclusion of additional low value points
% selTimes=[--,--] %don't know size before as it is a filtered output
tmpIndx=0;
for n=1:length(normIntens)
if (dataMatrix(3,n)==1)
tmpIndx=tmpIndx+1;
selTimes(tmpIndx)=dataMatrix(1,n); % selTimes(nn)=dataMatrix(1,n);
selIntens(tmpIndx)=dataMatrix(4,n); %s elIntens(nn)=dataMatrix(4,n);
end
end
selTimes=selTimes';
selIntens=selIntens';
filtNormIntens=normIntens;
dataMatrix0=dataMatrix;
filterTimes=timepts; % parfor
%NCscurImCF
%NCscurImCF_1
%NCscurImCF_2
%NCscurImCF_3
%NCscurImCF_3parfor
%NCscurImCF_3parfor(dataMatrix, AUCfinalTime)
%dataMatrix %debugging parfor gbl ok 85.7145; 126.4579,6, 124.5264 37tPt
%adsfj %debugging parfor gbl
[resMatStd, resMat, selTimesStd, selIntensStd, FiltTimesELr, NormIntensELr] =...
NCscurImCF_3parfor(dataMatrix0, AUCfinalTime, currSpotArea, sols, bl, minTime);
end
par4scanselTimesStd{ii}=selTimesStd %timepts'; %filterTimes';
par4scanselIntensStd{ii}=selIntensStd; %normIntens';
par4scanTimesELr{ii}=FiltTimesELr; % 19_1021 preserve for CurveDisplay and EZview
par4scanIntensELr{ii}=NormIntensELr; % 19_1021 preserve for CurveDisplay and EZview
outC(ii,:)=resMat; %{ii, par4resMat};
outCstd(ii,:)=resMatStd; %{ii, par4resMatStd};
end
% To accomodate parfor
% Copy par4scan thru global p4 functions inside of parfor loop --then outside to par4Gbl_Main8b.m
fileExt='.txt';
filePrefix='FitResultsComplete_';
fileNamePlate=[filePrefix fileSuffix fileExt];
fileName=fullfile(printResultsDir, fileNamePlate); %[printResultsDir fileNamePlate];
fid=fopen(fileName,'w');
fprintf(fid, 'Fit Results Complete\n');
%fprintf(fid, 'Num.\tAUC\tMSR\tK\tr\tl\tR-squared\tK-lower\tK-upper\tr-lower\tr-upper\tl-upper\tl-lower\tArea\tLastInten\tSpineMaxRateTimePt\tLastFitTimePt\n');
fclose(fid);
end

View File

@@ -0,0 +1,353 @@
%% CALLED BY NCfitImCFparforFailGbl2.m %%
function [resMatStd, resMat, selTimesStd, selIntensStd, FiltTimesELr, NormIntensELr] =...
NCscurImCF_3parfor(dataMatrix, AUCfinalTime, currSpotArea, sols, bl, minTime)
% Preallocate
resMatStd=zeros(1,27);
resMat=zeros(1,27);
% Set internal variables sent to matlab fit function
me=200;
meL=750;
mi=25;
miL=250;
rmsStg1=0;
rmsStg1I(1)=0;
slps=1;
filterTimes=[];
normIntens=[];
nn=1;
numFitTpts=0;
% Build filterTimes and normIntens from spot dataMatrix selection codes produced in filter section
for n=1:size(dataMatrix,2)
if (((dataMatrix(3,n)==1))||(dataMatrix(3,n)==3)||(dataMatrix(3,n)==2)...
||(dataMatrix(3,n)==0))
filterTimes(nn)=dataMatrix(1,n);
normIntens(nn)=dataMatrix(4,n);
nn=nn+1;
end
end
filterTimes=filterTimes';
selTimesStd=filterTimes;
normIntens=normIntens';
selIntensStd=normIntens;
lastTptUsed=1;
lastIntensUsed=1;
thresGT2TmStd=0;
try
lastTptUsed=max(filterTimes);
lastIntensUsed=normIntens(length(normIntens));
lastIntensUsedStd=lastIntensUsed;
lastTptUsedStd=lastTptUsed;
Tpt1Std=filterTimes(1);
numFitTptsStd=nnz((normIntens(:)>=0)==1);
thresGT2=find(((normIntens(:)>2)==1), 1);
if isempty(thresGT2)
thresGT2TmStd=0;
else
thresGT2TmStd=filterTimes(thresGT2);
end
numTptsGT2Std=nnz((normIntens(:)>=2)==1); % nnz(filterTimes(find(filterTimes>=thresGT2Tm)));
K_Guess=max(normIntens);
numTimePts=length(filterTimes);
opts=fitoptions('Method','Nonlinear','Robust','On','DiffMinChange',1.0E-11,'DiffMaxChange',0.001,...
'MaxFunEvals',me, 'MaxIter', mi, 'TolFun', 1.0E-12, 'TolX', 1.0E-10, 'Lower', [K_Guess*0.5,0,0],...
'StartPoint', [K_Guess,filterTimes(floor(numTimePts/2)),0.30], 'Upper', [K_Guess*2.0,max(filterTimes),1.0],'Display','off');
ftype=fittype('K / (1 + exp(-r* (t - l )))','independent','t','dependent',['K','r','l'],'options',opts);
% Carry out the curve fitting process
[fitObject, errObj]=fit(filterTimes,normIntens,ftype);
coeffsArray=coeffvalues(fitObject);
rmsStg1=errObj.rsquare;
rmsStg1I(slps)=errObj.rsquare;
sDat(slps,1)=errObj.rsquare;
K=coeffsArray(1); sDat(slps,2)=coeffsArray(1); % Carrying Capacity
l=coeffsArray(2); sDat(slps,3)=coeffsArray(2); % lag time
r=coeffsArray(3); sDat(slps,4)=coeffsArray(3); % rateS
% Integrate (from first to last time point)
numVals=size(filterTimes);
numVals=numVals(1);
t_begin=0;
t_end=AUCfinalTime;
AUC=(K/r*log(1+exp(-r*(t_end-l)))-K/r*log(exp(-r*(t_end-l)))) - (K/r*log(1+exp(-r*(t_begin-l)))-K/r*log(exp(-r*(t_begin-l))));
MSR=r;
rsquare=errObj.rsquare;
confObj=confint(fitObject,0.9); % get the 90% confidence
NANcond=0; stdNANcond=0; % stdNANcond added to relay not to attempt ELr as there is no curve to find critical point
confObj_filtered=confObj;
Klow=confObj(1,1); sDat(slps,5)=confObj(1,1);
Kup=confObj(2,1); sDat(slps,6)=confObj(2,1);
llow=confObj(1,2); sDat(slps,7)=confObj(1,2);
lup=confObj(2,2); sDat(slps,8)=confObj(2,2);
rlow=confObj(1,3); sDat(slps,9)=confObj(1,3);
rup=confObj(2,3); sDat(slps,10)=confObj(2,3);
if(isnan(Klow)||isnan(Kup)||isnan(llow)||isnan(lup)||isnan(rlow)||isnan(rup))
NANcond=1; stdNANcond=1; % stdNANcond added to relay not to attempt ELr as there is no curve to find critical point
end
catch
% if no data is given, return zeros
AUC=0;MSR=0;K=0;r=0;l=0;rsquare=0;Klow=0;Kup=0;
rlow=0;rup=0;lup=0;llow=0;
NANcond=1; stdNANcond=1; %stdNANcond added to relay not to attempt ELr as there is no curve to find critical point
end
if (exist('K','var')&& exist('r','var') && exist('l','var'))
t=(0:1:200);
Growth=K ./ (1 + exp(-r.* (t - l )));
fitblStd=min(Growth);
end
cutTm(1:4)=1000; %-1 means cuts not used or NA
% Preserve for ResultsStd
resMatStd(1)=AUC;
resMatStd(2)=MSR;
resMatStd(3)=K;
resMatStd(4)=r;
resMatStd(5)=l;
resMatStd(6)=rsquare;
resMatStd(7)=Klow;
resMatStd(8)=Kup;
resMatStd(9)=rlow;
resMatStd(10)=rup;
resMatStd(11)=llow;
resMatStd(12)=lup;
resMatStd(13)=currSpotArea;
resMatStd(14)=lastIntensUsedStd; % filtNormIntens(length(filtNormIntens));
maxRateTime=0; %[]; %Std shows []; ELr shows 0; %parfor
resMatStd(15)=0; %maxRateTimestdMeth;
resMatStd(16)=lastTptUsedStd;
if isempty(Tpt1Std)
Tpt1Std=777;
end
resMatStd(17)=Tpt1Std;
resMatStd(18)=bl; % perform in the filter section of NCfitImCFparfor
resMatStd(19)=fitblStd; % taken from NCfil... and not affected by NCscur...changes
resMatStd(20)=minTime; % not affected by changes made in NCscur...for refined 'r'
resMatStd(21)=thresGT2TmStd;
resMatStd(22)=numFitTptsStd;
resMatStd(23)=numTptsGT2Std;
resMatStd(24)=999; % yhe Standard method has no cuts .:.no cutTm
resMatStd(25)=999;
resMatStd(26)=999;
resMatStd(27)=999;
% ELr New Experimental data through L+deltaS Logistic fit for 'Improved r' Fitting
FiltTimesELr=[]; %{ii}=filterTimes;
NormIntensELr=[]; %{ii}=normIntens;
normIntens=selIntensStd;
filterTimes=selTimesStd;
stdIntens=selIntensStd;
tmpIntens=selIntensStd;
stdTimes=selTimesStd;
if stdNANcond==0
% Determine critical points and offsets for selecting Core Data based on
% Standard curve fit run. Put diff into NImStartupImCF02.m calling source
% to reduce repeated execution since it doesn't change.
% fd4=diff(sym('K / (1 + exp(-r* (t - l )))'),4);
% sols=solve(fd4);
tc1=eval(sols(2));
tc2=eval(sols(3));
LL=l; %eval(sols(1)); %exactly the same as 'l' from std. fit method-Save time
rsTmStd=LL-tc1; %%riseTime (first critical point to L)
deltS=rsTmStd/2;
tc1Early=tc1-deltS; %AKA- tc1AdjTm %2*tc1 -LL
L_Late=LL+deltS;
tc1EdatPt=find(filterTimes>(tc1Early),1,'first');
cutTm(1)=filterTimes(2);
cutDatNum(1)=2;
cutTm(2)=tc1Early;
cutDatNum(2)=tc1EdatPt-1;
L_LDatPt=find(filterTimes< L_Late,1,'last');
tc2LdatPt=find(filterTimes< tc2+rsTmStd,1,'last');
cutTm(3)=L_Late;
cutDatNum(3)=L_LDatPt;
% Select Core Data Set (Remove Early data before critical point)
ints=[];
ints(1:L_LDatPt-tc1EdatPt+2)=(tmpIntens(L_LDatPt));
ints(2:end)=tmpIntens(tc1EdatPt:L_LDatPt);
ints(1)=tmpIntens(1);
tms=[];
tms(1:L_LDatPt-tc1EdatPt+2)=(stdTimes(L_LDatPt));
tms(2:end)=stdTimes(tc1EdatPt:L_LDatPt);
tms(1)=stdTimes(1);
% Include/Keep late data that define K
if length(tmpIntens(tc2LdatPt:end))> 4
KlastInts=tmpIntens(tc2LdatPt:end);
KlastTms=stdTimes(tc2LdatPt:end);
lengthKlast=length(tmpIntens(tc2LdatPt:end));
ints(end:(end+ lengthKlast-1))=KlastInts;
tms(end:(end+ lengthKlast-1 ))=KlastTms;
cutTm(4)=tc2+rsTmStd;
cutDatNum(4)=tc2LdatPt-1;
else
lengthKlast=length(tmpIntens(tc2LdatPt-1:end));
if lengthKlast>1
KlastInts=tmpIntens(end-(lengthKlast-1):end);
KlastTms=stdTimes(end-(lengthKlast-1):end);
ints(end:(end+ lengthKlast-1 ))=KlastInts;
tms(end:(end+ lengthKlast-1 ))=KlastTms;
end
cutTm(4)=stdTimes(tc2LdatPt-1);
cutDatNum(4)=tc2LdatPt-2; %length(stdTimes(end-(lengthKlast-1):end));
end
Ints=[];
Tms=[];
Ints=ints';
Tms=tms';
try
filterTimes=Tms; filterTimes4=Tms;
normIntens=Ints; normIntens4=Ints;
% Classic symmetric logistic curve fit setup restated as COMMENTS for reference convenience
% opts=fitoptions is the same as for Std and so is redundant
% opts=fitoptions('Method','Nonlinear','Robust','On',...
% 'DiffMinChange',1.0E-11,'DiffMaxChange',0.001,...
% 'MaxFunEvals',me, 'MaxIter', mi, 'TolFun', 1.0E-12, 'TolX', 1.0E-10, 'Lower', [K_Guess*0.5,0,0], 'StartPoint', [K_Guess,filterTimes(floor(numTimePts/2)),0.30], 'Upper', [K_Guess*2.0,max(filterTimes),1.0]);
ftype=fittype('K / (1 + exp(-r* (t - l )))','independent','t','dependent',['K','l','r'],'options',opts);
fitObject=[]; errObj=[];
% carry out the curve fitting process
[fitObject, errObj]=fit(Tms,Ints,ftype);
coeffsArray=coeffvalues(fitObject);
r3=coeffsArray(3); % sDat(slps,4)=coeffsArray(3); % rateS
if (exist('K','var')&& exist('r','var') && exist('l','var'))
t=(0:1:200);
GrowthELr=K ./ (1 + exp(-r.* (t - l )));
fitblELr=min(GrowthELr); %jh diag
end
catch
% if no data is given, return zeros
AUC=0;MSR=0;K=0;r=0;l=0;rsquare=0;Klow=0;Kup=0;
rlow=0;rup=0;lup=0;llow=0; %normIntens=[];
end
end
% Update values if r is better(higher) with removal of early data
try
if r3>r && stdNANcond==0
r=r3; sDat(slps,4)=sDat(slps,4); % rateS
K=coeffsArray(1); sDat(slps,2)=coeffsArray(1); % Carrying Capacity
l=coeffsArray(2); sDat(slps,3)=coeffsArray(2); % lag time
coeffsArray=coeffvalues(fitObject);
rmsStg1=errObj.rsquare;
rmsStg1I(slps)=errObj.rsquare;
sDat(slps,1)=errObj.rsquare;
% JH diagnostics
numFitTpts=nnz((normIntens(:)>=0)==1);
thresGT2=find(((normIntens(:)>2)==1), 1);
thresGT2Tm=filterTimes(thresGT2);
numTptsGT2=nnz((normIntens(:)>=2)==1);
numTimePts=length(filterTimes);
AUC=(K/r*log(1+exp(-r*(t_end-l)))-K/r*log(exp(-r*(t_end-l)))) - (K/r*log(1+exp(-r*(t_begin-l)))-K/r*log(exp(-r*(t_begin-l))));
MSR=r3;
rsquare=errObj.rsquare;
confObj=confint(fitObject,0.9); % get the 90% confidence
NANcond=0;
confObj_filtered=confObj;
Klow=confObj(1,1); sDat(slps,5)=confObj(1,1);
Kup=confObj(2,1); sDat(slps,6)=confObj(2,1);
llow=confObj(1,2); sDat(slps,7)=confObj(1,2);
lup=confObj(2,2); sDat(slps,8)=confObj(2,2);
rlow=confObj(1,3); sDat(slps,9)=confObj(1,3);
rup=confObj(2,3); sDat(slps,10)=confObj(2,3);
if(isnan(Klow)||isnan(Kup)||isnan(llow)||isnan(lup)||isnan(rlow)||isnan(rup))
NANcond=1;
end
filterTimes=Tms;
normIntens=Ints;
resMat(17)=.00002;
resMat(18)=bl;
resMat(19)=fitblELr;
resMat(20)=minTime;
else % r is better than r3 so use the Std data in the ELr result sheet
filterTimes=selTimesStd;
normIntens=selIntensStd;
lastTptUsed=lastTptUsedStd; % Reinstall Std values for jh diags
Tpt1=filterTimes(1);
try
if isempty(Tpt1)
Tpt1=0.00002; %777;
end
catch
Tpt1=0.00002; %777;
end
resMat(17)=Tpt1;
numFitTpts=numFitTptsStd;
numTptsGT2=numTptsGT2Std;
thresGT2Tm=thresGT2TmStd;
cutTm(1:4)=1000; % 1 means cuts not used or NA
resMat(18)=bl; % only applicable to Std curve Fit; ELr superceeds and makes meaningless
resMat(19)=fitblStd; % only applicable to Std curve Fit; ELr superceeds and makes meaningless
resMat(20)=minTime; % only applicable to Std curve Fit; ELr superceeds and makes meaningless
end % if r3>r1
catch
% if no data is given, return zeros
AUC=0;MSR=0;K=0;r=0;l=0;rsquare=0;Klow=0;Kup=0;
rlow=0;rup=0;lup=0;llow=0; % normIntens=[];
end
resMat(1)=AUC;
resMat(2)=MSR;
resMat(3)=K;
resMat(4)=r;
resMat(5)=l;
resMat(6)=rsquare;
resMat(7)=Klow;
resMat(8)=Kup;
resMat(9)=rlow;
resMat(10)=rup;
resMat(11)=llow;
resMat(12)=lup;
resMat(13)=currSpotArea;
resMat(14)=lastIntensUsed; %filtNormIntens(length(filtNormIntens));
% spline fit unneccessary and removed therfor no max spline rate time->set 0
maxRateTime=0; % ELr will show 0; Std will show []
resMat(15)=maxRateTime;
resMat(16)=lastTptUsed; % filterTimes(length(filterTimes));
try % if Std fit used no cuts .:. no cutTm
resMat(24)=cutTm(1);
resMat(25)=cutTm(2);
resMat(26)=cutTm(3);
resMat(27)=cutTm(4);
catch
resMat(24)=999; % if Std fit used no cuts .:. no cutTm
resMat(25)=999;
resMat(26)=999;
resMat(27)=999;
end
FiltTimesELr=filterTimes;
NormIntensELr=normIntens;
lastTptUsed=max(filterTimes);
lastIntensUsed=normIntens(length(normIntens));
if (exist('K','var')&& exist('r','var') && exist('l','var'))
t=(0:1:200);
Growth=K ./ (1 + exp(-r.* (t - l )));
fitbl=min(Growth); % jh diag
end
try % jh diag
if isempty(thresGT2Tm)
thresGT2Tm=0
end
catch
thresGT2Tm=0;
numTptsGT2=0;
end
resMat(21)=thresGT2Tm;
resMat(22)=numFitTpts;
resMat(23)=numTptsGT2;
end

View File

@@ -0,0 +1,89 @@
%% CALLED WHEN ACCESSING 'CurveFit Display' %%
global scLst
global scan
hf=figure;
% Parameter Entry
[scLst, row, col]=NCdisplayGui(scansDir);
close(hf)
selSpot=(row-1)*24 + col;
for iPlate=1:length(scLst)
scanPltNum=str2double(scLst(iPlate));
K=scan(scanPltNum).plate(1).CFout((selSpot),3);
r=scan(scanPltNum).plate(1).CFout((selSpot),4);
l=scan(scanPltNum).plate(1).CFout((selSpot),5);
suffix=strcat('Scan-Plate', scLst(iPlate)); % char(QspLst(n));
% fileSpotSuffix=strcat('-Spot#',num2str(selSpot),'-Row=',selSpotRC(1),'-Col=',selSpotRC(2),'-FitData','-L=',num2str(l),'-r=',num2str(r),'-K=',num2str(K));
fileSpotSuffix=strcat('-Spot#',num2str(selSpot),'-Row=',num2str(row),'-Col=',num2str(col),'-FitData','-L=',num2str(l),'-r=',num2str(r),'-K=',num2str(K));
filenameNoExt=[suffix fileSpotSuffix];
timeArr=scan(scanPltNum).plate(1).tSeries;
rawIntens=scan(scanPltNum).plate(1).intens((selSpot),:)/scan(scanPltNum).plate(1).Ag((selSpot));
try
filterTms=scan(scanPltNum).plate(1).filterTimes{(selSpot)};
normInts=scan(scanPltNum).plate(1).normIntens{(selSpot)};
catch
end
if (exist('K','var')&& exist('r','var') && exist('l','var'))
t=(0:1:200);
Growth=K ./ (1 + exp(-r.* (t - l )));
end
if length(scLst)>1
figure
else
cla
end
hold on
plot(timeArr,rawIntens,'g*');
try
plot(filterTms,normInts,'o');
catch
end
hold on;
title(filenameNoExt); % this didn't make sense so changed
xlabel('Hours');
ylabel('Intensities Normalized by Area');
grid on;
if (exist('K','var')&& exist('r','var') && exist('l','var'))
plot(t, Growth,'b-');
% Plot L on curvefit figure
grL=Growth(round(l)); % growthCurve timePT for l in hours
plot(l,0:grL,'-b') % to display position of l
plot(l,grL,'^b') % to display l position on curve as an arrowhead
% Plot Arbitrary User Entry AUC "finalTimePt"
% plot(finalTimePt,0,'+m')
% plot(0:finalTimePt,bl,'-m')
end
end
%Spot entry form------------------------------------------------------
%{
prompt={'Enter spot to analyse:'};
dlg_title='Input spot to curve fit';
num_lines=1;
def={'1'};
selSpot=inputdlg(prompt,dlg_title,num_lines,def);
K=scan(scanPltNum).plate(1).CFout(str2double(selSpot),3);
r=scan(scanPltNum).plate(1).CFout(str2double(selSpot),4);
l=scan(scanPltNum).plate(1).CFout(str2double(selSpot),5);
suffix=strcat('Scan-Plate', scLst); %char(QspLst(n));
fileSpotSuffix=strcat('-Spot#',selSpot,'-FitData','-L=',num2str(l),'-r=',num2str(r),'-K=',num2str(K));
filenameNoExt=[suffix fileSpotSuffix];
timeArr=scan(scanPltNum).plate(1).tSeries;
rawIntens=scan(scanPltNum).plate(1).intens(str2double(selSpot),:)/scan(scanPltNum).plate(1).Ag(str2double(selSpot));
filterTms=scan(scanPltNum).plate(1).filterTimes{str2double(selSpot)};
normInts=scan(scanPltNum).plate(1).normIntens{str2double(selSpot)};
%}
%-----------------------------------------------------------------------
%{
prompt={'Enter Spot row:','Enter Spot column:'};
dlg_title='Input spot to curve fit';
num_lines=2;
def={'1','1'};
selSpotRC=inputdlg(prompt,dlg_title,num_lines,def);
row=str2double(selSpotRC(1)); col=str2double(selSpotRC(2));
%}
%row=cell2mat(row);

View File

@@ -0,0 +1,59 @@
%% CALLED BY par4Gbl_Main8c.m %%
%{
%Imaging ToolBox method
r=14;
A=zeros(70,70); %(fIntsc(refPtR:(refPtRExt),refPtC:(refPtCExt)))
m={40,40};
A(m{:})=1;
B=imdilate(A,strel('disk',r,0) );
imshow(B)
area=pi*r^2
clear all
%}
%without Image Proc. Toolbox
%r=14;
%A=zeros(70,70);
%A=zeros(r,r);
%P=[40,40];
%center=[refPtR+ round(.5*width), refPtC+ round(.5*width)];
%A=zeros(70,70);
%---------------------------------------------------------------------
%radius=14;
diaExt=2*(radius+1);
circBoxA=zeros(diaExt,diaExt);
center=[radius+2, radius+2];
[m,n]=size(circBoxA);
X=bsxfun(@plus,(1:m)', zeros(1,n));
Y=bsxfun(@plus,(1:n), zeros(m,1));
cirMask=sqrt(sum(bsxfun(@minus,cat(3,X,Y),reshape(center,1,1,[])) .^2,3))<=radius;
area=pi*radius^2;
cirPixA=nnz(cirMask);
optCirMask=double(cirMask);
optCirMask(optCirMask==0)=0.8;
% Foto Circle Fram(e)
expansion=2;
radExpan=radius+expansion;
FdiaExt=2*(radExpan);
circBoxA=zeros(FdiaExt,FdiaExt);
center=[radExpan+1, radExpan+1];
[m, n ]=size(circBoxA);
X=bsxfun(@plus,(1:m)', zeros(1,n));
Y=bsxfun(@plus,(1:n), zeros(m,1));
FcirMask=sqrt(sum(bsxfun(@minus,cat(3,X,Y),reshape(center,1,1,[])) .^2,3))<=radExpan;
%FcirPixA=nnz(cirMask);
FoptCirMask=double(FcirMask);
FoptCirMask(FoptCirMask==1)=2;
%FoptCirMask(FoptCirMask==0)=1;
%********Combine Masks to create circular boundry************
padOptCirMask=padarray(optCirMask,[expansion-1 expansion-1],0.8);
FoptCirMask=FoptCirMask .* padOptCirMask;
FoptCirMask(FoptCirMask==1.6)=0.8;
FoptCirMask(FoptCirMask==0)=1;
FoptCirMask(FoptCirMask==2)=1;
%---------------------------------------------------
%imagesc(cirMask)

View File

@@ -0,0 +1,247 @@
%% CALLED BY par4Gbl_Main8c.m %%
% TODO Should some of these vars be pulled out higher so they are easier to change?
%
function NImParamRadiusGui(scansDir)
global SWsingleSc
global SWgrowthArea
global scan
global scLst
global ImParMat
global searchRangeNum
global defImParMat
global fhImRun
global fhconsole
global easyResultsDir
global fotosResultsDir
global pointMapsResultsDir
global pointMapsFile
global matFile
global numRows
global numCols
global scanSize
global scanMax
global searchRangeFile
% Ncode ImRobot adaptation
% TODO this code block and variables needs explanation
defImParMat=[1,1,15,34,24,1,0,0,1,14,1,18];
if ImParMat(3)==0 || ImParMat(4)==0 ||ImParMat(5)==0 || ImParMat(10)==0 ||ImParMat(11)==0
ImParMat=defImParMat;
end
if size(ImParMat,2)<12
ImParMat(12)=18; % default before user input CsearchRange value
msg='Data made before SearchRange user entry added (ImParMat(12). 18 was the set value and the current default.)';
end
% ImParMat=defImParMat; %Activate for INITIAL USE only
MPnum=1;
destPerMP=1;
selScan=1;
SWgrowthArea=1;
if exist(pointMapsFile, 'file')
load(pointMapsFile);
else
load(fullfile(easyDir, 'PTmats', 'NImParameters')) % hardcoded default
disp('WARNING: Using hardcoded NImParameters.mat')
end
ImParMat;
% if ~exist('searchRangeNum','var') || isempty(searchRangeNum)
if exist(searchRangeFile, 'file')
load(searchRangeFile);
CSearchRange; % TODO, might be an issue, figure out what this is doing
end
% yInitPos=0.30;
xPos=0.05;
btnWid=0.10;
btnHt=0.05;
spacing=0.02;% Spacing between the button and the next command's label
% The ADD Groups button
btnNumber=1;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
fhImParm=gcf;
if exist('easyResultsDir','var')&& ~isempty(easyResultsDir)
set(fhImParm,'NumberTitle','off')
set(fhImParm,'Name',strcat('ImageAnalysis- ',char(easyResultsDir)))
else
set(fhImParm,'NumberTitle','off')
set(fhImParm,'Name','EASYconsole - Exp. Analysis NOT selected.')
end
btnNumber=5;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
srcRadius=ImParMat(10);
hedit=uicontrol(...
'Style', 'edit',...
'String',srcRadius,...
'Units','normalized',...
'Position', btnPos,... % [.002 .70 .08 .10],...
'callback',{@entryRadius}); % 'Position', [5 100 60 20])
function entryRadius(source,~)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<12)||(user_entry>17))
errordlg('You must enter a numeric value between 12 and 17','Bad Input','modal')
return
end
Radius=user_entry;
ImParMat(10)=Radius;
Radius;
end
btnNumber=6;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
srcDither=ImParMat(6);
hedit=uicontrol(...
'Style', 'edit',...
'String',srcDither,...
'Units','normalized',...
'Position', btnPos,...
'callback',{@entryDither});
function entryDither(source,~)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<0)||(user_entry>5))
errordlg('You must enter a numeric value between 1 and 4','Bad Input','modal')
return
end
Dither=user_entry;
ImParMat(6)=Dither;
Dither;
end
btnNumber=7;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
% TODO, I don't think these are defined?
try
srchRange=ImParMat(12);
catch % Legacy default value was 18 before being made a user input variable (ImParMat(12)). A preferable value now might be 12 or 14.
srchRange=18;
ImParMat(12)=18;
end
hSearchRange=uicontrol(...
'Style', 'edit',...
'Value',searchRangeNum,...
'Units','normalized',...
'Position', btnPos,...
'callback',{@searchRangeCallback});
function searchRangeCallback(source,~)
user_entry=str2double(get(source,'string'));
if (isnan(user_entry)||(user_entry<1)||(user_entry>50)) %originally 18; 19_0729 increased
errordlg('You must enter a numeric value between 1 and 18 12->18 recommended. (ImParMat(12)))','Bad Input','modal')
return
end
searchRangeNum=user_entry;
end
% Ncode 12_0120 for reading in numeric folder names
nlist=dir(fullfile(scansDir,'*'));
nnn=0;
for n=1:size(nlist,1)
if (~isempty(str2num(nlist(n).name)))
nnn=nnn+1;
PnumLst(nnn)=(str2num(nlist(n).name));
sl(nnn,1)={(nlist(n).name)};
end
end
scanSize=size(sl,1);
scanMax=max(str2double(sl));
hListbox=uicontrol(...
'Style', 'listbox',...
'String',sort(sl),...
'value',[],...
'max',1000,...
'min',1,...
'Units','normalized',...
'Position', [.70 .40 .10 .60],...
'callback',{@load_listbox}); %'uiresume(gcbf)'); 'Position', [5 100 60 20])
function load_listbox(source,~)
userIndx=(get(source,'value'));
userStr=(get(source,'string'));
%scLstIndx=str2num(char(strrep(userStr(userIndx), 'Scan', '')))
user_entry=userStr(userIndx);
scLst=user_entry;
if size(scLst,1)>1
% searchRangeNum=num2str(ImParMat(12))
set(hSearchRange,'string',num2str(ImParMat(12)))
else
try
searchRangeNum=CSearchRange(str2double(scLst));
set(hSearchRange,'string',CSearchRange(str2double(scLst)))
catch
% CSrchRng=num2str(ImParMat(12))
% set(hSearchRange,'string',num2str(ImParMat(12)))
end
end
end
scLst;
btnNumber=10;
yPos=0.85-(btnNumber-1)*(btnHt+spacing);
btnPos=[xPos yPos-spacing btnWid btnHt];
hedit8=uicontrol(...
'Style', 'pushbutton',...
'String',{'Continue'},...
'Units','normalized',...
'Position', btnPos,...
'callback','uiresume(gcbf)');
% Labels
xLPos=0.175;
yPos=0;
btnWid=0.20;
lblNumber=5;
yPos=0.85-(lblNumber-1)*(btnHt+spacing);
btnPos=[xLPos yPos-spacing btnWid btnHt];
htxt=uicontrol(...
'Style', 'text',...
'String','Radius',... %'String','Width',...
'Units','normalized',...
'Position', btnPos);
lblNumber=6;
yPos=0.85-(lblNumber-1)*(btnHt+spacing);
btnPos=[xLPos yPos-spacing btnWid btnHt];
htxt=uicontrol(...
'Style', 'text',...
'String','Dither',...
'Units','normalized',...
'Position', btnPos);
lblNumber=7;
yPos=0.85-(lblNumber-1)*(btnHt+spacing);
btnPos=[xLPos yPos-spacing btnWid btnHt];
htxt=uicontrol(...
'Style', 'text',...
'String','SearchRange',...
'Units','normalized',...
'Position', btnPos);
uiwait(gcf);
for i=1:length(scLst)
CSearchRange(str2double(scLst(i)))=CSrchRng;
ImParMat(12)=CSrchRng;
end
ImParMat;
searchRangeNum;
save(pointMapsFile, 'ImParMat');
save(searchRangeFile,'searchRangeNum');
close
return
end

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,15 @@
%% PART OF GUI FUNCTIONALITY %%
% Display the position of the data cursor
function output_txt = datatipp(~,event_obj)
% obj Currently not used (empty)
% event_obj Handle to event object
% output_txt Data cursor text string (string or cell array of strings).
pos=get(event_obj,'Position');
output_txt={['X: ',num2str(pos(1),4)],['Y: ',num2str(pos(2),4)]};
% If there is a Z-coordinate in the position, display it as well
if length(pos) > 2
output_txt{end+1}=['Z: ',num2str(pos(3),4)];
end
end

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,36 @@
%% CALLED BY par4Gbl_Main8c.m %%
function[p4L4,...
TmpexpScanIntens4,TmpFexpScanSpots4,TmpFexpScanBMtp4,TmpanlZoneRefs4,Tmpsbdg4]= ...
p4loop8c(parMat,tptLength,numScans,selScanNumLst,SWsingleSc,Fflg,PTmapPos,optCirMask,diaExt,doCircle,cirPixA,cirMask,width, ...
TmpexpScanIntens00,TmpFexpScanSpots00,TmpFexpScanBMtp00,TmpanlZoneRefs00,~,tifFileLstP4,pathname,ImParMat, ...
numRows,numCols,scLst,easyResultsDir,scansDir, p4L00,TmpexpScanIntens4,TmpFexpScanSpots4,TmpFexpScanBMtp4,TmpanlZoneRefs4, Tmpsbdg00, Tmpsbdg4)
p4L0=p4L00;
TmpexpScanIntens0=TmpexpScanIntens00;
TmpFexpScanSpots0=TmpFexpScanSpots00;
TmpFexpScanBMtp0=TmpFexpScanBMtp00;
TmpanlZoneRefs0=TmpanlZoneRefs00;
Tmpsbdg0= Tmpsbdg00;
% for scCount=1:numScans
if SWsingleSc==1
parforArg=0;
else
parforArg=inf;
end
% for (scCount=1:numScans)
parfor (scCount=1:numScans,parforArg)
scCount % for debugging
p4L0=cell(18,1);
[p4L3,TmpexpScanIntens3,TmpFexpScanSpots3,TmpFexpScanBMtp3,TmpanlZoneRefs3,Tmpsbdg3]= ...
par4GblFnc8c(parMat,tptLength,numScans,selScanNumLst,SWsingleSc,Fflg,PTmapPos,optCirMask,diaExt,doCircle,cirPixA,cirMask,width, ...
TmpexpScanIntens0,TmpFexpScanSpots0,TmpFexpScanBMtp0,TmpanlZoneRefs0,scCount,tifFileLstP4,pathname,ImParMat, ...
numRows,numCols, scLst,easyResultsDir,scansDir, p4L0,Tmpsbdg0);
p4L4(:,scCount)=p4L3; % (:,scCount);
TmpexpScanIntens4(scCount)=TmpexpScanIntens3;
TmpFexpScanSpots4(scCount)=TmpFexpScanSpots3;
TmpFexpScanBMtp4(scCount)=TmpFexpScanBMtp3;
TmpanlZoneRefs4(scCount)=TmpanlZoneRefs3;
Tmpsbdg4(scCount)=Tmpsbdg3;
end

View File

@@ -0,0 +1,300 @@
%% CALLED By p4loop8c.m %%
function [p4L2, TmpexpScanIntens3,TmpFexpScanSpots3,TmpFexpScanBMtp3,TmpanlZoneRefs3,Tmpsbdg3]= ...
par4GblFnc8c(parMat,tptLength,~,selScanNumLst,~,Fflg,PTmapPos,optCirMask,diaExt,doCircle,cirPixA,cirMask,~,...
TmpexpScanIntens,TmpFexpScanSpots,TmpFexpScanBMtp,TmpanlZoneRefs,scCount,tifFileLstP4,~,ImParMat, ...
numRows,numCols,scLst,easyResultsDir,scansDir,~, Tmpsbdg)
global printResultsDir
selScan=selScanNumLst(scCount);
tptLength0=length(tifFileLstP4{scCount});
% tptLength=numFiles;
tifFileLst={tifFileLstP4(scCount)};
% Extract the Imaging time stamps from selected tif files
% clear('e','f'); %can't use clear inside a parfor loop. Preallocation
% can be larger than useable .bmp files! Therefore must be small and
% increased during for loop to maintain cell size integrity between e & f
e=cell(1,2); f=cell(1,2); %(tptLength,2); f=cell(tptLength,2);
nndx=0;
for tPt=1:tptLength0 %size(tifFileLst,1) %length(tifFileLst)
tifFileLstP4{scCount}(tPt)
scLst;
scLst(scCount)
char(scLst(scCount))
char(fullfile(scansDir,char(scLst(scCount))))
scansDir;
swCatch=0;
nndx=nndx+1;
tifFile=char(fullfile(scansDir,char(scLst(scCount)), tifFileLstP4{scCount}(tPt)));
try
info=imfinfo(tifFile); % D:\jwrDevel\DevelCurveFittingJWR\ImageScans\Scan2\020hr002.tif
catch ME
% Note: During parallel operation Matlab will not open file (fid)
% etc. Therefore error message text will not be written.The only way
% to get it out would be to pass variable through called function to
% the ..Main8c script outside the parfor loop and print to file from
% there. Consequently for now it only prints error to file when one
% edits p4loop8c.m from 'parfor' to ordinary 'for' loop
fFail=((fullfile(printResultsDir,'ImageFileReadFailure.txt')));
fid=fopen(fFail,'a'); %create,open and append
% fprintf(fid,'%s \n',char(tifFile))
fclose(fid);
nndx=nndx-1;
swCatch=1;
rep=getReport(ME, 'basic');
rep=strcat('Read info failure for-',tifFile,' -', rep);
% fprintf(fid,'%s \n',rep) %See Note:
end
tptLength=nndx;
scTmNumeric=1; %initialize for parfor
if swCatch==0
scTmNumeric(nndx)=datenum(info.FileModDate);
e(nndx,:)={tifFile, scTmNumeric(nndx)};
% newtifFileLst(nndx)=tifFileLst(tPt);
end
end
% Clear tifFileLst;
tifFileLst=cell(nndx,1);
f={sortrows(e,2)};
tifFileLst=f{1,1}(:,1);
areaOfIntensAboveBG=zeros(24,16,length(tifFileLst));
% Calculate Time series for each Plate of Selected Scan
lastPlateOnScan=1; %Ncode
tSeriesv=[]; t0Seriesv=[];
if tptLength>0 % added to jump over and fill data for invalid Sscan(plate runs
scTmNumv=cell2mat(f{1,1}(:,2)); % (:,2))120613 fix for variant length data
prtTmNumv=min(scTmNumv)-.000001;
tSeriesv=((scTmNumv-prtTmNumv)*24);
t0Seriesv=((scTmNumv-scTmNumv(1))*24);
end
% Added to jump over and fill data for invalid Sscan(plate runs
if tptLength>=3
% Create blank scan
ImHeigth=2075;
ImWidth=1400;
Empsc=zeros(ImHeigth,ImWidth); %Ncode
% Start scan loop
lastPlateOnScan=1; %Ncode
disp('Before call to NIscanIntens.....')
% Execute Image conversion into Intensity Data
[Tmpsbdg2, scanIntens, ~, ~, ~, TmpexpScanIntens2, TmpFexpScanSpots2, TmpFexpScanBMtp2, TmpanlZoneRefs2,areaOfIntensAboveBG]= ...
NIscanIntensBGpar4GblFnc(Fflg,tifFileLst, ImParMat, PTmapPos,optCirMask,diaExt,doCircle,cirPixA,numRows,numCols,ImHeigth,ImWidth,cirMask, ...
tptLength,selScan,Empsc,TmpexpScanIntens,TmpFexpScanSpots,TmpFexpScanBMtp,TmpanlZoneRefs,easyResultsDir, Tmpsbdg);
TmpexpScanIntens3=TmpexpScanIntens2;
TmpFexpScanSpots3=TmpFexpScanSpots2;
TmpFexpScanBMtp3=TmpFexpScanBMtp2;
TmpanlZoneRefs3=TmpanlZoneRefs2;
Tmpsbdg3=Tmpsbdg2;
% clear plate
CFscanIntens=zeros(384,1); % zeros(16,24);
plate=[];
plate=zeros(24,16,tptLength);
plate(:,:,:,1)=scanIntens(1:1:24,16:-1:1,:); % TmpexpScanIntens2(1:1:24,16:-1:1,:);%Ncode Dev3Vertical Similar to below
% NIcheck: Check for bad scans at time points (swapped plates etc.)
% Set up cell arrays for storing each plate in each Scan ..(scan,plate)
SWprint=0;
% Construct Legacy ...Intens.txt file
if SWprint==1
filename=fullfile(easyResultsDir,'PrintResults',(strcat('Plate',num2str(selScan),'_Intens.txt')));
fid=fopen(filename,'w');
end
if length(size(plate))==2 % only two dims if only one image
numOfImages=1;
else
numOfImages=size(plate,3);
end
CFscanIntens=zeros(384,numOfImages);
locIndx=0; pl=1;
for n=1:numCols % Ncode changed to 16 for Vert
for m=1:numRows % Ncode change to 24 for Vert
locIndx=locIndx+1;
for k=1:numOfImages
if SWprint==1
if k==1 && numOfImages==1, fprintf(fid,'%.2f\n',plate(m,n,k,pl));end
if k==1 && numOfImages~=1, fprintf(fid,'%.2f',plate(m,n,k,pl));end
if k>1 && k<numOfImages, fprintf(fid,'\t%.2f',plate(m,n,k,pl));end %
if k~=1 && k==numOfImages,fprintf(fid,'\t%.2f\n',plate(m,n,k,pl));end
end
CFscanIntens(locIndx,k)=plate(m,n,k,pl);
end
end
end
if SWprint==1
fclose(fid);
end
% printGrwAreas
% Area as the max number of pixels with intensities greater than the Background
% grArea(row,col,tPt,Plate) Note that images r and c are 90 degrees shifted
% and then mirror imaged. Thus data must be inverted and rotated to get
% for the rows a-p and col 1-24 (and the EMS expected 1-384) orientation
% clear grArea
grArea=ones(24,16,tptLength);
grArea(:,:,:,1)=areaOfIntensAboveBG(1:1:24,16:-1:1,:); %for Ncode dev3vert
% for p=1:lastPlateOnScan
if SWprint==1
if SWgrowthArea==1
filename=fullfile(printResultsDir,(strcat('Plate',num2str(selScan),'_Areas.txt')));
else
filename=fullfile(printResultsDir,(strcat('ALTgrowthA_Scan',num2str(selScan),'_Plate',num2str(1),'_Areas.txt')));
filename=fullfile(printResultsDir,(strcat('ALTgrowthA_Plate',num2str(selScan),'_Plate','_Areas.txt')));
end
fid=fopen(filename,'w');
end
locIndx=0;
Ag=zeros(1,384);
for m=1:16 % Ncode change from 16 %Vert back to 16
for n=1:24 % Ncode change from 24 %Vert back to 24
growthArea=max(grArea(n,m,:,1));
if SWprint==1
fprintf(fid,'%3.f\n',growthArea);
end
locIndx=locIndx+1;
Ag(locIndx)=growthArea;
end
end
if SWprint==1
fclose(fid);
% fclose('all'); % TODO do we need this?
end
% AutoImCF curveFit kickoff setup
autoImCFlag=1;
if autoImCFlag==1, SWgrowthArea=1;end
times=tSeriesv;
scanAreas=Ag; % growthArea;
offsets=0;
suffix=strcat('Scan', num2str(selScan),'_Plate', num2str(1)); % char(QspLst(n));
AUCfinalTime=96;
arrayFormat=384;
% Version compatability fixes
%{
v11a='7.12.0.635 (R2011a)';
v14a='8.3.0.532 (R2014a)';
%v14b='8.4.0.150421 (R2014b)';
v18a='9.4.0.813654 (R2018a)';
v18b='9.5.0.944444 (R2018b)';
% Fourth order derivative of logistic equation
if isequal(v18a,version)||isequal(v18b,version)
syms t K r l;
fd4=diff(K / (1 + exp(-r* (t - l ))),t,4);
else
fd4=diff(sym('K / (1 + exp(-r* (t - l )))'),4);
end
sols=solve(fd4);
if isequal(v11a,version) || isequal(v14a,version)
else
tmpswap=sols(1);
sols(1)=sols(3);
sols(3)=tmpswap;
end
%}
if verLessThan('matlab','8.3') % original work 23_0227 updated 23_0525 (8.4 changed to 8.3)
fd4=diff(sym('K / (1 + exp(-r* (t - l )))'),4);
sols=solve(fd4);
else % accomodate new matlab changes after 2014a fix 2nd update 23_0227
syms t K r l;
fd4=diff(K / (1 + exp(-r* (t - l ))),t,4);
sols=solve(fd4);
tmpswap=sols(1);
sols(1)=sols(3);
sols(3)=tmpswap;
end
% CFscanIntens
% Call CurveFit routine
% NCfitImCF(times, CFscanIntens, offsets, suffix, AUCfinalTime, arrayFormat, scanAreas, printResultsDir, autoImCFlag, selScan,sols) %,scan)%, scPltList) outputDirectory;
% [scanTmp2]=NCfitImCFparforFailGbl(times, CFscanIntens, offsets, suffix, AUCfinalTime, arrayFormat, scanAreas, printResultsDir, autoImCFlag, selScan, sols, scanTmp); %,scan)%, scPltList) outputDirectory;
[par4scanselIntensStd,par4scanselTimesStd,par4scanTimesELr,par4scanIntensELr,par4scanCFparameters,par4scanCFdate,outC2,outCstd2]= ...
NCfitImCFparforFailGbl2(parMat,times, CFscanIntens, offsets, suffix, AUCfinalTime, arrayFormat, scanAreas, printResultsDir, autoImCFlag, selScan, sols); %,scan)%, scPltList) outputDirectory;
else %fill with default values when an invalid plate scan occurs
CFscanIntens=zeros(16,24);
Ag=zeros(1,384);
% Preallocation for parfor loop
times=tSeriesv;
st(1,1:size(times,2))=1111;
resMat(1,1:27)=0;
resMatStd=resMat;
outC2=zeros(384,27);
outCstd2=zeros(384,27);
for m=1:384
pa{m}=st;
par4scanCFparameters{m}=parMat;
par4scanCFdate{m}=datestr((now),31);
end
par4scanselTimesStd=pa;
par4scanselIntensStd=pa;
par4scanTimesELr=pa;
par4scanIntensELr=pa;
par4resMat=zeros(384,27);
par4resMatStd=zeros(384,27);
%{
TmpexpScanIntens00=cell(1); %cell(1,scanMax);
TmpFexpScanSpots00=cell(1); %cell(1,scanMax);
TmpFexpScanBMtp00=cell(1); %cell(1,scanMax);
TmpanlZoneRefs00=cell(1); %cell(1,scanMax);
Tmpsbdg00=cell(1);
%}
TmpexpScanIntens3=cell(1); %TmpexpScanIntens2;
TmpFexpScanSpots3=cell(1); %TmpFexpScanSpots2;
TmpFexpScanBMtp3=cell(1); %TmpFexpScanBMtp2;
TmpanlZoneRefs3=cell(1); %TmpanlZoneRefs2;
Tmpsbdg3=cell(1); %Tmpsbdg2;
end %if tptLenth>=3 line19 20_0928
tSeriesv; %debuggin parfor
p4L1{1}=tSeriesv;
p4L1{2}=t0Seriesv;
p4L1{3}=datestr(prtTmNumv,31);
p4L1{4}=CFscanIntens;
locIndx=0;
for n=1:numCols % Ncode changed to 16 for Vert
for m=1:numRows % Ncode change to 24 for Vert
locIndx=locIndx+1;
rc=[n,m];
p4rcTmp(locIndx)={rc};
p4pIndxTmp(locIndx)=locIndx;
end
end
p4L1{5}=p4rcTmp;
p4L1{6}=p4pIndxTmp;
p4L1{7}=Ag;
p4L1{8}=par4scanselIntensStd;
p4L1{9}=par4scanselTimesStd;
p4L1{10}=par4scanTimesELr;
p4L1{11}=par4scanIntensELr;
p4L1{12}=par4scanCFparameters;
p4L1{13}=par4scanCFdate;
p4L1{14}=outC2;
p4L1{15}=outCstd2;
p4L1{16}=selScan;
p4L1{17}=cirPixA;
p4L1{18}=datestr((now),31); % TODO this seems bad
p4L2=p4L1;
end

View File

@@ -0,0 +1,246 @@
%% CALLED BY EASYconsole.m %%
global SWgrowthArea
global scLst
global ImParMat
global scansDir
global matFile
global fhconsole
global easyResultsDir
global printResultsDir
global pointMapsResultsDir
global fotosResultsDir
global matDir
global ImWidth
global ImHeigth
global numRows
global numCols
global scan
global scanMax
global tptLength
global easyDir
numRows=24; % for Single Vertical
numCols=16; % for Single Vertical
% CIRCLE related
doCircle=1; % use Circle area analysis 14_0807
radius=14;
ImParMat(10)=radius;
ImParMat(11)=doCircle;
% TODO what is this for?
Fflg=1;
% Don't think we need this
% fclose('all'); % close all open files
% Unset some vars just in case they are already set
clear('scanIntens','Scanfiles','pathname','tifFileLstP4');
try
clf(fhconsole,'reset');
catch
end
% No idea why we're doing this so commenting out
% close
% EASYconsole
if exist(matFile, 'file')
bkUpMatFile=fullfile(matDir,'BkUp',matFile);
copyfile(matFile,bkUpMatFile);
end
% Reloacated from 'PTmats' to prevent potential overwrite when PTmats is
% copied into new job when the PT template is about the same. We also
% now have a default template if one is not made. i.e., when the images
% from the new experiment are too sketchy to make a good pintool
% template. By moving it to 'Fotos' we avoid possible issues due to
% copying the Nbdg.mat file along with the default template '.mat' files.
% A copy of Ndbg.mat is placed also saved to the 'PTmats' directory
% after each run to allow previous version of EASY to access data made
% by EASY versions after 20_0819.
try
load(fullfile(fotosResultsDir,'Nbdg')); %Modified to load from 'Fotos' 20_0819
catch
load(fullfile(pointMapsResultsDir,'Nbdg')); %Left in to accomodate loads of work before 20_0819
end
% Load Fotos stored data
fotosToLoad = {'Coordinates', 'BGatTpts', 'anlZones', 'NCFparms'};
for i=1:length(fotosToLoad)
try
load(fullfile(fotosResultsDir, fotosToLoad{i}));
catch
load(fullfile(easyDir,'parameters'));
end
end
% Get Print Times
PrintTimes=[];
scLst={};
% Parameter Entry
NImParamRadiusGui(scansDir); % Ncode 122111replaced removed ,numOfPrtTimes)
width=24;
widthEx=width-1; % width extention from reference point
dither=ImParMat(6);
radius=ImParMat(10);
NIcircle;
% Load Stuff
lastPlateOnLastScan=1; % Ncode
if size(scLst,1)==1
SWsingleSc=1;
else
SWsingleSc=0;
end
dvec=datevec(datestr(floor(now))); %method to get current offset year '01-Jan-"currentyr"'
dvec(2)=1;
dvec(3)=1;
% yrOffset=datenum('01-Jan-2012');%(dvec); %('01-Jan-2009'); %time num default is currentyear jan1
numScans=size(scLst,1);
if(isequal(SWsingleSc,1))
selScan=str2double(char(scLst(1)));
else
% startScan=1;
end
SWgrowthArea=ImParMat(9);
load(fullfile(pointMapsResultsDir,'NPTmapSearch'));
PTmapPos=detPos;
selScanNumLst=[];
Scanfiles=[];
pathname=[];
for ii=1:length(scLst)
if (SWsingleSc == 1)
% TODO proably need to make this more explicit with paths
[Scanfiles, pathname]=uigetfile('*.bmp', 'Select files','MultiSelect','on'); % change '*hr*.bmp' 12/20/2011
if ischar(Scanfiles)
scd=imread(char(Scanfiles));
tptLength=1;
else
scd=imread(char(Scanfiles(1,1)));
tptLength=length(Scanfiles);
end
ImHeigth=size(scd,1);
ImWidth=size(scd,2);
sc=scd(1:ImHeigth,1:ImWidth);
end
numFiles=size(Scanfiles,2);
% Initialize tifFilesLst for parfor loop
dir(fullfile(scansDir, char(scLst(ii)), '*.bmp'));
numFiles=length(tifFileLst4MultiT);
tptLength=numFiles;
tifFileLstP4{ii}={tifFileLst4MultiT.name};
end
for jj=1:numScans % startScan:numScans
selScan=str2double(char(scLst(jj)));
selScanNumLst(jj)=selScan;
end
selScanNumLst2=selScanNumLst; % function passthrough, passback to par4gbl_Main 20_0205
% Preallocation
scCount=[];
TmpexpScanIntens00=cell(1); % cell(1,scanMax);
TmpFexpScanSpots00=cell(1); % cell(1,scanMax);
TmpFexpScanBMtp00=cell(1); % cell(1,scanMax);
TmpanlZoneRefs00=cell(1); % cell(1,scanMax);
Tmpsbdg00=cell(1);
TmpexpScanIntens4=cell(1,numScans);
TmpFexpScanSpots4=cell(1,numScans);
TmpFexpScanBMtp4=cell(1,numScans);
TmpanlZoneRefs4=cell(1,numScans);
Tmpsbdg4=cell(1,numScans);
TmpexpScanIntens5=cell(1,numScans);
TmpFexpScanSpots5=cell(1,numScans);
TmpFexpScanBMtp5=cell(1,numScans);
TmpanlZoneRefs5=cell(1,numScans);
Tmpsbdg5=cell(1,numScans);
p4L00=cell(18,1);
p4L0=p4L00;
p4L4=cell(18,numScans);
p4L5=p4L4;
Ag=ones(384);
CFscanIntens=zeros(16,24);
[p4L4,TmpexpScanIntens5,TmpFexpScanSpots5,TmpFexpScanBMtp5,TmpanlZoneRefs5,Tmpsbdg5]= ...
p4loop8c(parMat,tptLength,numScans,selScanNumLst,SWsingleSc,Fflg,PTmapPos,optCirMask,diaExt,doCircle,cirPixA,cirMask,width, ...
TmpexpScanIntens00,TmpFexpScanSpots00,TmpFexpScanBMtp00,TmpanlZoneRefs00,scCount,tifFileLstP4,pathname,ImParMat, ...
numRows,numCols,scLst,easyResultsDir,scansDir, p4L00,TmpexpScanIntens4,TmpFexpScanSpots4,TmpFexpScanBMtp4,TmpanlZoneRefs4, ...
Tmpsbdg00,Tmpsbdg4);
for scanCnt=1:numScans
selScan=p4L4{16,scanCnt}; % determine the actual scan in the scanCnt parfor distributed "id"
scan(selScan).plate(1).tSeries=cell2mat(p4L4(1,scanCnt));
scan(selScan).plate(1).t0Series=cell2mat(p4L4(2,scanCnt));
scan(selScan).plate(1).printTm=cell2mat(p4L4(3,scanCnt));
scan(selScan).plate(1).intens=cell2mat(p4L4(4,scanCnt));
scan(selScan).plate(1).rc=p4L4(5,scanCnt);
scan(selScan).plate(1).pIndx=cell2mat(p4L4(6,scanCnt));
scan(selScan).plate(1).Ag=cell2mat(p4L4(7,scanCnt));
scan(selScan).plate(1).selIntens=p4L4(8,scanCnt);
scan(selScan).plate(1).selTimes=p4L4(9,scanCnt);
scan(selScan).plate(1).filterTimes=p4L4(10,scanCnt);
scan(selScan).plate(1).normIntens=p4L4(11,scanCnt);
% scan(selScan).plate(1).CFparameters=p4L4(12,scanCnt); %Need to convert to a matrix form like Old versions
CFparm(1:384)=p4L4{12,scanCnt}(1:384);
scan(selScan).plate(1).CFparameters=CFparm;
scan(selScan).plate(1).CFdate=p4L4(13,scanCnt);
scan(selScan).plate(1).CFout=cell2mat(p4L4(14,scanCnt));
scan(selScan).plate(1).CFoutStd=cell2mat(p4L4(15,scanCnt));
scan(selScan).Awindow=cell2mat(p4L4(17,scanCnt));
scan(selScan).Idate=cell2mat(p4L4(18,scanCnt));
expScanIntens(selScan)=TmpexpScanIntens5(scanCnt);
FexpScanSpots(selScan)=TmpFexpScanSpots5(scanCnt);
FexpScanBMtp(selScan)=TmpFexpScanBMtp5(scanCnt);
anlZoneRefs(selScan)=TmpanlZoneRefs5(scanCnt);
if ~isempty(Tmpsbdg5{scanCnt})
sbdg(selScan)=Tmpsbdg5(scanCnt);
else
sbdg{selScan}=uint8(zeros(24,16,4));
end
end
% Save data in .mat files
save(matFile,'scan');
% save((fullfile(easyResultsDir,'PTmats','Nbdg')), 'sbdg'); %legacy location can probably get rid of in time
save((fullfile(fotosResultsDir,'Nbdg')), 'sbdg');
save((fullfile(fotosResultsDir,'Coordinates')),'FexpScanSpots') %Saves frames at each tPt
save((fullfile(fotosResultsDir,'BGatTpts')),'FexpScanBMtp')
save((fullfile(fotosResultsDir,'anlZones')),'anlZoneRefs')%Saves anl Positions at each tPt
% Print FitResults
fileExt='.txt';
filePrefix='FitResults_';
for scanCnt=1:numScans
selScan=p4L4{16,scanCnt}; % determine the actual scan in the scanCnt parfor distributed "id"
fileSuffix=sprintf('Scan%d_Plate%d', selScan, 1);
fileNamePlate=[filePrefix fileSuffix fileExt];
fileName=fullfile(printResultsDir, fileNamePlate); % [outputDirectory fileNamePlate];
% This,fprint for loop,is an very old legacy feature which slows processing. Could be
% removed but allows easy observation of how a run is progressing and can be
% used as a diagnostic tool.
outCprint=p4L4;
fid=fopen(fileName,'w');
fprintf(fid, 'Num.\tAUC\tMSR\tK\tr\tl\tR-squared\tK-lower\tK-upper\tr-lower\tr-upper\tl-upper\tl-lower\tArea\tLastInten\tSpineMaxRateTimePt\tLastFitTimePt\n');
for n=1:384 % startCount:numCultures
fprintf(fid,'%d\t',n);
fprintf(fid, '%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\n',...
outCprint{14,scanCnt}(n,1),outCprint{14,scanCnt}(n,2),outCprint{14,scanCnt}(n,3),outCprint{14,scanCnt}(n,4),...
outCprint{14,scanCnt}(n,5),outCprint{14,scanCnt}(n,6),outCprint{14,scanCnt}(n,7),outCprint{14,scanCnt}(n,8),...
outCprint{14,scanCnt}(n,9),outCprint{14,scanCnt}(n,10),outCprint{14,scanCnt}(n,11),outCprint{14,scanCnt}(n,12),...
outCprint{14,scanCnt}(n,13),outCprint{14,scanCnt}(n,14),outCprint{14,scanCnt}(n,15),outCprint{14,scanCnt}(n,16));
end
fclose(fid);
end

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,390 @@
#!/usr/bin/perl
# $Id: analyze.pl,v 1.9 2008/05/14 20:45:37 sherlock Exp $
# Date : 16th October 2003
# Author : Gavin Sherlock
# License information (the MIT license)
# Copyright (c) 2003 Gavin Sherlock; Stanford University
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
use strict;
use warnings;
use diagnostics;
use Data::Dumper;
use Getopt::Long;
use IO::File;
use GO::TermFinder;
use GO::AnnotationProvider::AnnotationParser;
use GO::OntologyProvider::OboParser;
use GO::TermFinderReport::Text;
use GO::Utils::File qw (GenesFromFile);
use GO::Utils::General qw (CategorizeGenes);
$|=1;
###################################################################################
sub Usage{
###################################################################################
my $message = shift;
if (defined $message){
print $message, "\n";
}
print <<USAGE;
This program takes a list of files, each of which contain a list of
genes, with one gene per line. It will findTerms for the lists of
genes in each of the GO aspects, outputting the results to a file
named for the original file, but with a .terms extension. It will only
output terms with a corrected P-value of <= 0.05.
It will use the first supplied argument as the annotation file, the
second argument as the expected number of genes within the organism,
the third argument is the name of the obo file, and all subsequent
files as ones containing lists of genes.
Usage:
analyze.pl <annotation_file> <numGenes> <obofile> <file1> <file2> <file3> ... <fileN>
e.g.
analyze.pl -a ../t/gene_association.sgd -n 7200 -o ../t/gene_ontology_edit.obo genes.txt genes2.txt
USAGE
exit;
}
# we need at least 3 arguments, an annotation file, the number of
# genes in the genome, and a file of input genes to test
&Usage if (@ARGV < 3);
# now get our annotation file and number of genes
my $annotationFile = '';
my $totalNum = '';
my $oboFile = '';
my $background = '';
my $aspect = '';
GetOptions( "annotations=s" => \$annotationFile,
"obofile=s" => \$oboFile,
"background=s" => \$background,
"numGenes=i" => \$totalNum,
"aspect=s" => \$aspect
);
if ($oboFile !~ /\.obo$/){
# require the obo file to have a .obo extension
&Usage("Your obo file does not have a .obo extension.");
}
if ($annotationFile !~ /\.sgd$/){
&Usage("Perhaps we are missing an annotation file.");
}
my @population = ();
if ($background) {
@population = GenesFromFile($background)
}
# now set up the objects we need
my $process = GO::OntologyProvider::OboParser->new(ontologyFile => $oboFile,
aspect => 'P');
my $component = GO::OntologyProvider::OboParser->new(ontologyFile => $oboFile,
aspect => 'C');
my $function = GO::OntologyProvider::OboParser->new(ontologyFile => $oboFile,
aspect => 'F');
my $annotation = GO::AnnotationProvider::AnnotationParser->new(annotationFile=>$annotationFile);
my @termFinders = ();
if ($background) {
if ($aspect =~ /^P$|^$/) {
push @termFinders, GO::TermFinder->new(annotationProvider=> $annotation,
ontologyProvider => $process,
population => \@population,
aspect => 'P');
}
if ($aspect =~ /^C$|^$/) {
push @termFinders, GO::TermFinder->new(annotationProvider=> $annotation,
ontologyProvider => $component,
population => \@population,
aspect => 'C');
}
if ($aspect =~ /^F$|^$/) {
push @termFinders, GO::TermFinder->new(annotationProvider=> $annotation,
ontologyProvider => $function,
population => \@population,
aspect => 'F');
}
} else {
if ($aspect =~ /^P$|^$/) {
push @termFinders, GO::TermFinder->new(annotationProvider=> $annotation,
ontologyProvider => $process,
totalNumGenes => $totalNum,
aspect => 'P');
}
if ($aspect =~ /^C$|^$/) {
push @termFinders, GO::TermFinder->new(annotationProvider=> $annotation,
ontologyProvider => $component,
totalNumGenes => $totalNum,
aspect => 'C');
}
if ($aspect =~ /^F$|^$/) {
push @termFinders, GO::TermFinder->new(annotationProvider=> $annotation,
ontologyProvider => $function,
totalNumGenes => $totalNum,
aspect => 'F');
}
}
my $report = GO::TermFinderReport::Text->new();
my $cutoff = 0.1;
# now go through each file
foreach my $file (@ARGV){
print "Analyzing $file\n";
my @genes = GenesFromFile($file);
my (@list, @notFound, @ambiguous);
CategorizeGenes(annotation => $annotation,
genes => \@genes,
ambiguous => \@ambiguous,
unambiguous => \@list,
notFound => \@notFound);
my $outfile = $file.".terms";
my $fh = IO::File->new($outfile, q{>} )|| die "Cannot make $outfile : $!";
print "Results being put in $outfile\n";
if (@list){
print $fh "The following gene(s) will be considered:\n\n";
foreach my $gene (@list){
print $fh $gene, "\t", $annotation->standardNameByName($gene), "\n";
}
print $fh "\n";
}else{
print $fh "None of the gene names were recognized\n";
print $fh "They were:\n\n";
print $fh join("\n", @notFound), "\n";
$fh->close;
next;
}
if (@ambiguous){
# note, some of these ambiguous names would be perfectly fine
# if put into GO::TermFinder if they are also standard names.
# Currently the behavior of analyze.pl differs from the
# default behavior of GO::TermFinder
print $fh "The following gene(s) are ambiguously named, and so will not be used:\n";
print $fh join("\n", @ambiguous), "\n\n";
}
if (@notFound){
print $fh "The following gene(s) were not recognized, and will not be considered:\n\n";
print $fh join("\n", @notFound), "\n\n";
}
foreach my $termFinder (@termFinders){
# it's possible that the supplied number of genes on the
# command line was less than indicated by the annotation
# provider, and thus the TermFinder may have used a larger
# number than was entered on the command line.
my $totalNumGenesUsedInBackground = $termFinder->totalNumGenes;
print $fh "Finding terms for ", $termFinder->aspect, "\n\n";
my @pvalues = $termFinder->findTerms(genes => \@list, calculateFDR => 1);
if($#pvalues == 0) {
print "WARNIING: NO p-value structures returned by findTerms(";
print join ",", @list;
print ")\n";
print $fh "\n\n";
$fh->close;
exit();
}
my $numHypotheses = $report->print(pvalues => \@pvalues,
numGenes => scalar(@list),
totalNum => $totalNumGenesUsedInBackground,
cutoff => $cutoff,
fh => $fh);
my $numProcesses = $#pvalues + 1;
print "Number of GO processes found: $numProcesses\n";
print "Number of hypotheses passed cutoff: $numHypotheses\n";
# if they had no significant P-values
if ($numHypotheses == 0){
print $fh "No terms were found for this aspect with a corrected P-value <= $cutoff.\n";
}
print $fh "\n\n";
}
$fh->close;
}
=pod
=head1 NAME
analyze.pl - batch processor to find terms for lists of genes in various files
=head1 SYNOPSIS
This program takes a list of files, each of which contain a list of
genes, with one gene per line. It will findTerms for the lists of
genes in each of the GO aspects, outputting the results to a file
named for the original file, but with a .terms extension. It will
only output terms with a corrected P-value of <= 0.05.
It will use the first supplied argument as the annotation file, the
second argument as the expected number of genes within the organism,
the third argument is the name of the obo file, and all subsequent
files as ones containing lists of genes.
Usage:
analyze.pl <annotation_file> <numGenes> <obofile> <file1> <file2> <file3> ... <fileN>
e.g.
analyze.pl ../t/gene_association.sgd 7200 ../t/gene_ontology_edit.obo genes.txt genes2.txt
An example output file might look like this:
The following gene(s) will be considered:
YDL235C YPD1
YDL224C WHI4
YDL225W SHS1
YDL226C GCS1
YDL227C HO
YDL228C YDL228C
YDL229W SSB1
YDL230W PTP1
YDL231C BRE4
YDL232W OST4
YDL233W YDL233W
YDL234C GYP7
Finding terms for P
Finding terms for C
Finding terms for F
-- 1 of 15--
GOID GO:0005096
TERM GTPase activator activity
CORRECTED P-VALUE 0.0113038452336839
UNCORRECTED P-VALUE 0.00113038452336839
NUM_ANNOTATIONS 2 of 12 in the list, vs 31 of 7272 in the genome
The genes annotated to this node are:
YDL234C, YDL226C
-- 2 of 15--
GOID GO:0008047
TERM enzyme activator activity
CORRECTED P-VALUE 0.0316194107645226
UNCORRECTED P-VALUE 0.00316194107645226
NUM_ANNOTATIONS 2 of 12 in the list, vs 52 of 7272 in the genome
The genes annotated to this node are:
YDL234C, YDL226C
-- 3 of 15--
GOID GO:0005083
TERM small GTPase regulatory/interacting protein activity
CORRECTED P-VALUE 0.0340606972468798
UNCORRECTED P-VALUE 0.00340606972468798
NUM_ANNOTATIONS 2 of 12 in the list, vs 54 of 7272 in the genome
The genes annotated to this node are:
YDL234C, YDL226C
-- 4 of 15--
GOID GO:0030695
TERM GTPase regulator activity
CORRECTED P-VALUE 0.0475469908576535
UNCORRECTED P-VALUE 0.00475469908576535
NUM_ANNOTATIONS 2 of 12 in the list, vs 64 of 7272 in the genome
The genes annotated to this node are:
YDL234C, YDL226C
=head1 AUTHORS
Gavin Sherlock, sherlock@genome.stanford.edu
=cut

View File

@@ -0,0 +1,48 @@
gene_association.sgd.gz This file is TAB delimited and contains all GO annotations for yeast genes (protein and RNA)
The gene_association.sgd.gz file uses the standard file format for
gene_association files of the Gene Ontology (GO) Consortium. A more
complete description of the file format is found here:
http://www.geneontology.org/GO.format.annotation.shtml
Columns are: Contents:
1) DB - database contributing the file (always "SGD" for this file)
2) DB_Object_ID - SGDID
3) DB_Object_Symbol - see below
4) NOT (optional) - 'NOT', 'contributes_to', or 'colocalizes_with' qualifier for a GO annotation, when needed
5) GO ID - unique numeric identifier for the GO term
6) DB:Reference(|DB:Reference) - the reference associated with the GO annotation
7) Evidence - the evidence code for the GO annotation
8) With (or) From (optional) - any With or From qualifier for the GO annotation
9) Aspect - which ontology the GO term belongs in
10) DB_Object_Name(|Name) (optional) - a name for the gene product in words, e.g. 'acid phosphatase'
11) DB_Object_Synonym(|Synonym) (optional) - see below
12) DB_Object_Type - type of object annotated, e.g. gene, protein, etc.
13) taxon(|taxon) - taxonomic identifier of species encoding gene product
14) Date - date GO annotation was made
15) Assigned_by - source of the annotation (e.g. SGD, UniProtKB, YeastFunc, bioPIXIE_MEFIT)
Note on SGD nomenclature (pertaining to columns 3 and 11):
Column 3 - When a Standard Gene Name (e.g. CDC28, COX2) has been
conferred, it will be present in Column 3. When no Gene Name
has been conferred, the Systematic Name (e.g. YAL001C,
YGR116W, YAL034W-A) will be present in column 3.
Column 11 - The Systematic Name (e.g. YAL001C, YGR116W, YAL034W-A,
Q0010) will be the first name present in Column 11. Any other
names (except the Standard Name, which will be in Column 3 if
one exists), including Aliases used for the gene will also be
present in this column.
Please note that ORFs classified as 'Dubious' are not included in this file, as there is currently
no experimental evidence that a gene product is produced in S. cerevisiae.
This file is updated weekly.
For more information on the Gene Ontology (GO) project, see:
http://www.geneontology.org/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,54 @@
#!/usr/bin/env perl
use strict;
use warnings;
use diagnostics;
use File::Map qw(map_file);
my $infile = shift;
my $input;
map_file $input, $infile;
{
local $_ = $input;
(my $f = $infile) =~ s/(.*\/)?(.*)(\.[^\.]*){2}/$2/;
my %orfgene = (/(Y\w+)\s+(\w+)\n/g);
my @indices = (/\Q-- \E(\d+) of \d+\Q --\E/g);
my @ids = (/GOID\s+GO:(\d+)/g);
my @terms = (/TERM\s+(.*?)\n/g);
my @pvalues = (/\nCORRECTED P-VALUE\s+(\d.*?)\n/g);
my @clusterf = (/NUM_ANNOTATIONS\s+(\d+ of \d+)/g);
my @bgfreq = (/, vs (\d+ of \d+) in the genome/g);
my @orfs = (/The genes annotated to this node are:\n(.*?)\n/g);
s/, /:/g for @orfs;
my @genes;
for my $orf (@orfs) {
my @otmp = split /:/, $orf;
my @gtmp = map { $orfgene{$_} } @otmp;
push @genes, (join ':', @gtmp);
}
&header();
for my $i (0 .. (@ids - 1)) {
&report($f, $ids[$i], $terms[$i], $pvalues[$i], $clusterf[$i], $bgfreq[$i], $orfs[$i], $genes[$i]);
}
}
sub header {
print "REMc ID\tGO_term ID\tGO-term\tCluster frequency\tBackground frequency\tP-value\tORFs\tGenes\n";
}
sub report {
my ($f, $id, $term, $p, $cfreq, $bgfreq, $orfs, $genes) = @_;
$cfreq =~ /(\d+) of (\d+)/;
$cfreq = sprintf "%d out of %d genes, %.1f%%", $1, $2, (100*$1/$2);
$bgfreq =~ /(\d+) of (\d+)/;
$bgfreq = sprintf "%d out of %d genes, %.1f%%", $1, $2, (100*$1/$2);
print "$f\t$id\t$term\t$cfreq\t$bgfreq\t$p\t$orfs\t$genes\n";
}

View File

@@ -0,0 +1,130 @@
#!/usr/bin/env python
"""
Improved code to determine the origin column (OligCol) without user input of the argument
and removed the former sys.argv[2]. JWR 22_0816
"""
"""
this code can be used for the parse of the REMc "-finalTable.csv" output file
to make a series of subdatasets, which reflect the pedigree structure
of the way cluters breaking up.
"""
import sys, os, string, glob
try:
data_file_Path = sys.argv[1]
#print data_file_Path
#cluster_ori_col_num = sys.argv[2]
output_path = sys.argv[2]
except:
print ('Usage: python parse_clustering_result_to_Pedigree_Dataset_and_genelist.py /datasetPath/datasetfilename cluster_origin_column_num output_path_name')
print ('Data file not found')
sys.exit(1)
#define a function to reading files and generate the list
def read_file(file_path):
with open(file_path, 'r') as file:
attributes = file.readline().strip().split(',')
gene_list = [elements[1] for line in file for elements in [line.strip().split(',')]]
return gene_list
# define a function to write the list into a file named in hierarchical series
def write_cluster_orf_list(orf_list, output_dir, cluster_name):
cluster_file_path = os.path.join(output_dir, f"cluster_name, 'txt')
with open(cluster_file_path, 'w') as outfile:
for orf in orf_list:
outfile.write(orf.strip() + '\n')
# define a function to write the clusters information into a series of files
def write_cluster_results(attributes, orf_list, data_dict, output_directory, cluster_name):
file_path = os.path.join(output_directory, f"{cluster_name}-finaltable.csv")
with open(file_path, 'w') as output_file:
output_file.write(attributes)
output_file.write('\n')
for orf in orf_list:
output_file.write(data_dict[orf.strip()].strip())
output_file.write('\n')
# define a function to write the cluster name origina extensive final table
def write_extended_final_table(attributes, data, ori_name_column_number, output_directory, output_file_name):
output_file_path = os.path.join(output_directory, f"{output_file_name}-oriExtFinalTable.csv")
with open(output_file_path, 'w') as output_file:
output_file.write(attributes)
output_file.write('\n')
for orf in data:
elements = data[orf].split(',')
ori_name_list = elements[int(ori_name_column_number)-1].strip().split(';')
for ori_name in ori_name_list:
elements.append(orii_name.strip())
output_file.write(','.join(elements))
output_file.write('\n')
# Read the data file
try:
data = open(data_file_Path,'r')
except OSError:
print ('input file does not exists')
# first the title line would be read and kept
attributes = data.readline().strip().split(',')
print(attributes)
print(len(attributes))
OrigCol= len(attributes) - 1
print(OrigCol)
# then the data
data_dict = {}
for data_line in data:
data_line = data_line.strip()
line_elements = data_line.split(',')
orf_identifier = line_elements[1].strip().upper()
data_dict[orf_identifier] = ','.join(line_elements).upper()
data.close()
#print dataDic
print ("OrigCol is ", str(OrigCol))
fileDic = {}
for orf in dataDic:
line = dataDic[orf].split(',')
#read the cluster name len(attributes)
clusterOrigin = line[int(OrigCol) - 1]
#clusterOrigin = line[int(cluster_ori_col_num) - 1]
#print clusterOrigin
clusterOrigin = clusterOrigin.strip()
#print clusterOrigin
clusterIdentifier = clusterOrigin.split(';')[0:-1]
#print clusterIdentifier
for identifier in clusterIdentifier:
identifier = identifier.strip()
upper_identifier = identifier.upper()
if upper_identifier not in fileDic:
fileDic[upper_identifier] = line[1]
else:
fileDic[upper_identifier] += ',' + line[1]
input_file_identifier = data_file_Path.strip().split('/')[-1].strip().split('.csv')[-3]
#make the output folder
try:
os.mkdir(str(output_path)+str(input_file_identifier))
except OSError:
print ('dir exists')
#Writing the extensive ori name finaltable
Writing_ext_final_table(attributeLine, dataDic,str(OrigCol),str(output_path)+str(input_file_identifier), str(input_file_identifier))
#Writing_ext_final_table(attributeLine, dataDic,cluster_ori_col_num,str(output_path)+str(input_file_identifier), str(input_file_identifier))
#write the genelist files
for cluster_name in fileDic:
#print fileDic[cluster_name].split(',')
Writing_clusterORF_list(fileDic[cluster_name].split(','), str(output_path)+str(input_file_identifier), cluster_name)
#write the cluster result files
Writing_cluster_results(attributeLine, fileDic[cluster_name].split(','), dataDic,str(output_path)+str(input_file_identifier),cluster_name)

View File

@@ -0,0 +1,103 @@
#!/usr/bin/env python
# This code is to concatenate the batch GO Term Finder results (.tsv) generated from batch GTF perl code(Chris Johnson, U of Tulsa) into a list table
import os
import glob
def list_files(directory):
"""Return a list of all files in the given directory."""
return glob.glob(os.path.join(directory, '*.txt.tsv'))
def concatenate_gtf_results(data_dir, output_file):
"""Concatenate the GTF results into a single file."""
output = open(output_file, 'w')
files = list_files(data_dir)
files.sort()
for file_path in files:
file_name = os.path.basename(file_path).rstrip('.txt.tsv')
with open(file_path, 'r') as f:
labels = f.readline().strip().split('\t')
output.write('\t'.join(labels) + '\n')
for line in f:
line = line.strip().strip('\t')
if line:
output.write(line + '\n')
output.close()
if __name__ == '__main__':
if len(sys.argv) != 3:
print('Usage: python Concatenate_GTF_results.py data_dir output_file')
sys.exit(1)
data_dir = sys.argv[1]
output_file = sys.argv[2]
concatenate_gtf_results(data_dir, output_file)
# Old version
# def list_files(directory):
# """Return a list of all files in the given directory."""
# return glob.glob(os.path.join(directory, '*.txt.tsv'))
# try:
# data_file_Path = sys.argv[1]
# output_file_Path = sys.argv[2]
# except:
# print ('Usage: python Concatenate_GTF_results.py /datasetPath /outputFilePath_and_Name')
# print ('Data file not found, error in given directory')
# sys.exit(1)
# try:
# output = open(output_file_Path, 'w')
# except OSError:
# print ('output file error')
# # get all the GTF result files in given directory
# File_list = []
# File_list = list_files(data_file_Path)
# File_list.sort()
# i = 0
# for file in File_list:
# #parse the file names given in absolute path
# file_name = file.strip().split('/')[-1]
# file_name = file_name.rstrip('.txt.tsv')
# # function to read tsv files from a given directory
# #open the file
# data = open(file,'r')
# #reading the label line
# labelLine = data.readline()
# label = labelLine.strip().split('\t')
# #write the label
# #updates2010July26: update following label writing code
# if i == 0:
# # output.write('cluster origin')
# for element in label:
# output.write(element)
# output.write('\t')
# i = i + 1
# #updates2010July26 End
# #switch to the next line
# output.write('\n')
# #read the GO terms
# GOTermLines = data.readlines()
# for GOTerm in GOTermLines:
# GOTerm = GOTerm.strip().strip('\t')
# if GOTerm != '':
# #updates2010July26: remove the code to write the first column 'REMc cluster ID'
# #output.write(file_name)
# #output.write('\t')
# ##updates2010July26 update end
# output.write(GOTerm + '\n')
# #output.write('\n')
# output.close()

View File

@@ -0,0 +1,131 @@
#!/usr/bin/env python
"""
Reads the REMc "-finalTable.csv" output file and makes a series of subdatasets
that reflect the pedigree structure of the way clusters break up.
"""
import sys
import os
import string
import glob
def reading_single_file(file_path):
"""
Reads a file and generates a list of gene names.
"""
with open(file_path, 'r') as data:
attribute_line = data.readline().strip()
attributes = attribute_line.split(',')
gene_list = []
for dataline in data:
dataline = dataline.strip()
elements = dataline.split(',')
gene_list.append(elements[1])
return gene_list
def writing_cluster_orf_list(list, output_dir, real_cluster_ori_name):
"""
Writes a list of ORF names into a file in hierarchical series.
"""
outfile_path = os.path.join(output_dir, f"{real_cluster_ori_name}.txt")
with open(outfile_path, 'w') as outfile:
for orf in list:
outfile.write(orf.strip())
outfile.write('\n')
def writing_cluster_results(attributes, orf_list, dic, output_dir, real_cluster_ori_name):
"""
Writes clusters information into a series of files.
"""
outfile_path = os.path.join(output_dir, f"{real_cluster_ori_name}-finaltable.csv")
with open(outfile_path, 'w') as outfile:
outfile.write(attributes)
outfile.write('\n')
for orf in orf_list:
outfile.write(dic[orf.strip()].strip())
outfile.write('\n')
def writing_ext_final_table(attributes, dic, ori_name_col_num, output_dir, output_file_name):
"""
Writes the cluster name extensive final table.
"""
outfile_path = os.path.join(output_dir, f"{output_file_name}-oriExtFinalTable.csv")
with open(outfile_path, 'w') as outfile:
outfile.write(attributes)
outfile.write('\n')
for orf in dic:
elements = dic[orf].split(',')
ori_name_list = elements[int(ori_name_col_num) - 1].strip().split(';')
for ori_name in ori_name_list:
elements.append(ori_name.strip())
outfile.write(','.join(elements))
outfile.write('\n')
def main():
"""
Main function to parse the REMc -finalTable.csv output file.
"""
try:
data_file_path = sys.argv[1]
output_path = sys.argv[2]
except IndexError:
print('Usage: python parse_clustering_result_to_Pedigree_Dataset_and_genelist.py '
'/datasetPath/datasetfilename cluster_origin_column_num output_path_name')
print('Data file not found')
sys.exit(1)
try:
with open(data_file_path, 'r') as data:
attribute_line = data.readline().strip()
attributes = attribute_line.split(',')
orig_col = len(attributes) - 1
data_dict = {}
for dataline in data:
dataline = dataline.strip()
elements = dataline.split(',')
data_dict[str.upper(elements[1].strip())] = ','.join(elements).upper()
except FileNotFoundError:
print('Input file does not exist')
sys.exit(1)
file_dict = {}
for orf in data_dict:
line = data_dict[orf].split(',')
cluster_origin = line[int(orig_col) - 1].strip()
cluster_identifier = cluster_origin.split(';')[0:-1]
for i, identifier in enumerate(cluster_identifier):
identifier = identifier.strip()
if identifier not in file_dict:
file_dict[identifier] = line[1]
else:
file_dict[identifier] = f"{file_dict[identifier]},{line[1]}"
input_file_identifier = os.path.basename(data_file_path).split('.csv')[0]
output_dir = os.path.join(output_path, input_file_identifier)
os.makedirs(output_dir, exist_ok=True)
# Writing the extensive ori name finaltable
writing_ext_final_table(attribute_line, data_dict, orig_col, output_dir, input_file_identifier)
# Writing the genelist files
for cluster_name in file_dict:
writing_cluster_orf_list(file_dict[cluster_name].split(','), output_dir, cluster_name)
# Writing the cluster result files
writing_cluster_results(attribute_line, file_dict[cluster_name].split(','), data_dict,
output_dir, cluster_name)
if __name__ == '__main__':
main()

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,55 @@
#!/usr/bin/env Rscript
# Load the openxlsx package
library(openxlsx)
args <- commandArgs(TRUE)
outDir <- args[1]
# Function to combine CSV and TXT files into a workbook with named sheets
combineFilesToWorkbook <- function(file_list, output_file) {
# Create a new workbook
wb <- createWorkbook()
# Loop through the file list
for (i in seq_along(file_list)) {
file <- file_list[i]
# Extract the sheet name from the file name based on the extension
ext <- tools::file_ext(file)
sheet_name <- tools::file_path_sans_ext(basename(file))
# Read the data based on the file extension
if (ext %in% c("csv", "txt")) {
if (ext == "csv") {
data <- read.csv(file)
} else if (ext == "txt") {
data <- readLines(file)
}
# Create a new sheet in the workbook with the extracted sheet name
addWorksheet(wb, sheetName = sheet_name)
# Write the data to the sheet
writeData(wb, sheet = sheet_name, x = data)
}
}
# Save the workbook as an Excel file
saveWorkbook(wb, output_file)
}
Component <- file.path(outDir, "Component", "ComponentResults.txt")
Function <- file.path(outDir, "Function", "FunctionResults.txt")
Process <- file.path(outDir, "Process", "ProcessResults.txt")
# Specify the list of input files (both CSV and TXT)
file_list <- c(Component,Process,Function)
# Specify the output file name
output_file <- file.path(outDir, "GTFCombined.xlsx")
# Call the function to combine the files into a workbook with named sheets
combineFilesToWorkbook(file_list, output_file)

View File

@@ -0,0 +1,658 @@
#!/usr/bin/env Rscript
# This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments
#
# Updated 240724 Bryan C Roessler to improve file operations and portability
# NOTE: The two required arguments are the same and now there are two optional arguments
# 1. Exp1
# 2. Exp2
# 3. StudyInfo.csv file
# 4. Output Directory
library("ggplot2")
library("plotly")
library("htmlwidgets")
library("extrafont")
library("grid")
library("ggthemes")
args <- commandArgs(TRUE)
exp_name <- args[1]
exp_name2 <- args[2]
if (length(args) > 3) {
study_info_file <- args[3]
} else {
study_info_file <- "StudyInfo.csv"
}
if (length(args) > 4) {
output_dir <- args[4]
} else {
output_dir <- "gta"
}
# if (length(args) > 3) {
# sgd_terms_file <- args[3]
# } else {
# sgd_terms_file <- "../Code/go_terms.tab"
# }
# if (length(args) > 4) {
# sgd_features_file <- args[4]
# } else {
# sgd_features_file <- "../Code/gene_association.sgd"
# }
expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name))
expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name2))
Labels<- read.csv(file=study_info_file,stringsAsFactors = FALSE)
Name1 <- Labels[expNumber1,2]
Name2 <- Labels[expNumber2,2]
go_terms_file <- "Average_GOTerms_All.csv"
input_file1 <- file.path(output_dir,exp_name,go_terms_file)
input_file2 <- file.path(output_dir,exp_name2,go_terms_file)
pairDirL= file.path(output_dir,"PairwiseCompareL_",exp_name,"-",exp_name2)
pairDirK= file.path(output_dir,"PairwiseCompareK_",exp_name,"-",exp_name2)
outPathGTAcompare= file.path(output_dir,"PairwiseCompareL")
outPathGTAcompare[2]= file.path(output_dir,"PairwiseCompareK")
#dir.create(outPathGTAcompare[1])
dir.create(pairDirL) #(outPathGTAcompare[1])
dir.create(pairDirK) #(outPathGTAcompare[2])
###########BEGIN PAIRWISE L-----LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
outputpath <- pairDirL #outPathGTAcompare[1] #Args[5]
#outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/"
print("39")
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
print(117)
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("130")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOntology.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2),]
X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2),]
X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2),]
X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2),]
Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2),]
Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2),]
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2),]
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2),]
print("155")
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#2-174")
#2
fname <- paste("/Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
#3
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#3")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#4
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#4")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#only output GTA terms where average score is still above 2 after subtracting the SD
#Z1 will ID aggravators, Z2 alleviators
Z1 <- X
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
Z2 <- X
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
#5
X_abovethreshold <- X[!(is.na(X$Overlap)),]
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#5")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#6
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#6")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
X_abovethreshold$X1_Rank <- NA
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1,ties.method = "random")
X_abovethreshold$X2_Rank <- NA
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2,ties.method = "random")
#7
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#7")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#8
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#8")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
print("write csv files L")
write.csv(x=X,file = paste(outputpath,"/All_GTA_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
write.csv(x=X_abovethreshold,file = paste(getwd(),"/",outputpath,"/","AboveThreshold_GTA_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
#End of L GTA Pairwise Compare
###########BEGIN PAIRWISE K-----KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
#define the output path (as fourth argument from Rscript)
outputpath <- pairDirK #outPathGTAcompare[2] #Args[5]
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
#1
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("127")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOntology.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#2
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2),]
X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2),]
X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2),]
X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2),]
Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2),]
Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2),]
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2),]
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2),]
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
plotly_path <- paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#2-170")
#2
fname <- paste("/Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#3
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
plotly_path <- paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#3")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#4
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#4")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#5
#only output GTA terms where average score is still above 2 after subtracting the SD
#Z1 will ID aggravators, Z2 alleviators
Z1 <- X
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 - Z1$Z_lm_K_SD_X1
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 - Z1$Z_lm_K_SD_X2
Z2 <- X
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 + Z1$Z_lm_K_SD_X1
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 + Z1$Z_lm_K_SD_X2
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
X_abovethreshold <- X[!(is.na(X$Overlap)),]
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#5")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#6
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#6")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#7
X_abovethreshold$X1_Rank <- NA
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1,ties.method = "random")
X_abovethreshold$X2_Rank <- NA
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2,ties.method = "random")
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#7")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#8
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#8")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
print("write csv files")
write.csv(x=X,file = paste(getwd(),"/",outputpath,"/","All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
write.csv(x=X_abovethreshold,file = paste(getwd(),"/",outputpath,"/","AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
#End of GTA Pairwise compare for K values

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,696 @@
#!/usr/bin/env Rscript
# Makes heat maps of multiple experiments
#
# Updated 240724 Bryan C Roessler to improve file operations and portability
# I tried to leave as much logic intact as possible, just feeding in vars in a better way
# NOTE: The script now has 7 required arguments and a variable number of input experiments
# @arg $1 string StudyInfo.csv file
# @arg $2 string gene_ontology_edit.obo file
# @arg $3 string go_terms.tab file
# @arg $4 string All_SGD_GOTerms_for_QHTCPtk.csv
# @arg $5 string ZScores_interaction.csv
# @arg $6 string base directory
# @arg $7 string output directory
library("ontologyIndex")
library("ggplot2")
library("RColorBrewer")
library("grid")
library("ggthemes")
#library("plotly")
#library("htmlwidgets")
library("extrafont")
library("stringr")
library("org.Sc.sgd.db")
library("ggrepel")
library("gplots")
# Load arguments
args <- commandArgs(TRUE)
study_info_file <- args[1]
ontology_file <- args[2]
sgd_terms_tfile <- args[3]
all_sgd_terms_csv <- args[4]
zscores_file <- args[5]
base_dir <- args[6]
output_dir <- args[7]
study_nums <- args[8:length(args)]
#import standard tables used in Sean's code That should be copied to each ExpStudy
labels<- read.csv(file=study_info_file,stringsAsFactors = FALSE)
Ontology <- get_ontology(file=ontology_file,propagate_relationships = "is_a",extract_tags = "minimal")
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) #all ORFs associated with GO term
Terms <- read.delim(file=sgd_terms_tfile,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
XX3 <- read.csv(file=all_sgd_terms_csv,stringsAsFactors=FALSE,header = TRUE)
XX3[,1] <- paste("GO:",formatC(XX3[,1],width=7,flag="0"),sep="")
XX3[,2] <- gsub(pattern = " ",replacement = "_",x = XX3[,2])
XX3[,2] <- gsub(pattern = "/",replacement = "_",x = XX3[,2])
# Load input files
for (study_num in study_nums) {
input_file <- file.path(base_dir,paste('Exp',study_num),zscores_file)
if (file.exists(input_file)) {
assign(paste(X, study_num), read.csv(file=input_file,stringsAsFactors=FALSE,header=TRUE))
assign(paste(Name, study_num), labels[study_num,2])
}
}
for (study_num in study_nums) {
eval(paste("function",study_num))
}
if (length(study_nums) > 0) {
X1$ORF <- X1$OrfRep
X1$ORF <- gsub("_1","",x=X1$ORF)
X1$ORF <- gsub("_2","",x=X1$ORF)
X1$ORF <- gsub("_3","",x=X1$ORF)
X1$ORF <- gsub("_4","",x=X1$ORF)
X1$Score_L <- "No Effect"
try(X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth")
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X1$Score_K <- "No Effect"
try(X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth")
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X1 for K and L
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
X1$Rank_L <- rank(X1$Z_lm_L)
X1$Rank_K <- rank(X1$Z_lm_K)
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
}
if (length(study_nums) > 1) {
X2$ORF <- X2$OrfRep
X2$ORF <- gsub("_1","",x=X2$ORF)
X2$ORF <- gsub("_2","",x=X2$ORF)
X2$ORF <- gsub("_3","",x=X2$ORF)
X2$ORF <- gsub("_4","",x=X2$ORF)
X2$Score_L <- "No Effect"
try(X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth")
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X2$Score_K <- "No Effect"
try(X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth")
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X2
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
X2$Rank_L <- rank(X2$Z_lm_L)
X2$Rank_K <- rank(X2$Z_lm_K)
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
X <- cbind(X1,X2)
}
if (length(study_nums) > 2) {
X3$ORF <- X3$OrfRep
X3$ORF <- gsub("_1","",x=X3$ORF)
X3$ORF <- gsub("_2","",x=X3$ORF)
X3$ORF <- gsub("_3","",x=X3$ORF)
X3$ORF <- gsub("_4","",x=X3$ORF)
X3$Score_L <- "No Effect"
try(X3[is.na(X3$Z_lm_L),]$Score_L <- "No Growth")
try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X3$Score_K <- "No Effect"
try(X3[is.na(X3$Z_lm_K),]$Score_K <- "No Growth")
try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X3
X3[is.na(X3$Z_lm_L),]$Z_lm_L <- 0.001
X3[is.na(X3$Z_lm_K),]$Z_lm_K <- 0.001
X3$Rank_L <- rank(X3$Z_lm_L)
X3$Rank_K <- rank(X3$Z_lm_K)
X3 <- X3[order(X3$OrfRep,decreasing = FALSE),]
colnames(X3) <- paste(colnames(X3),"_X3",sep="")
X <- cbind(X,X3)
}
if (length(study_nums) > 3) {
X4$ORF <- X4$OrfRep
X4$ORF <- gsub("_1","",x=X4$ORF)
X4$ORF <- gsub("_2","",x=X4$ORF)
X4$ORF <- gsub("_3","",x=X4$ORF)
X4$ORF <- gsub("_4","",x=X4$ORF)
X4$Score_L <- "No Effect"
try(X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth")
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X4$Score_K <- "No Effect"
try(X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth")
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X4
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
X4$Rank_L <- rank(X4$Z_lm_L)
X4$Rank_K <- rank(X4$Z_lm_K)
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
X <- cbind(X,X4)
}
if (length(study_nums) > 4) {
X5$ORF <- X5$OrfRep
X5$ORF <- gsub("_1","",x=X5$ORF)
X5$ORF <- gsub("_2","",x=X5$ORF)
X5$ORF <- gsub("_3","",x=X5$ORF)
X5$ORF <- gsub("_4","",x=X5$ORF)
X5$Score_L <- "No Effect"
try(X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth")
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X5$Score_K <- "No Effect"
try(X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth")
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X5
X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
X5$Rank_L <- rank(X5$Z_lm_L)
X5$Rank_K <- rank(X5$Z_lm_K)
X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
X <- cbind(X,X5)
}
X$ORF <- X$OrfRep_X1
if (length(study_nums) > 1) {
X$ORF <- gsub("_1","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" ]
X_heatmap <- X_heatmap[,c(10,1,4,5,8,9,2,3,6,7)]
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
}
if (length(study_nums) > 2) {
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" ]
#Reorder columns
X_heatmap <- X_heatmap[,c(14,1,4,5,8,9,12,13,2,3,6,7,10,11)] #Three
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
}
if (length(study_nums) > 3) {
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" ]
#Reorder columns
X_heatmap <- X_heatmap[,c(18,1,4,5,8,9,12,13,16,17,2,3,6,7,10,11,14,15)] #Four
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name4,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
}
if (length(study_nums) > 4) {
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
X$ORF <- gsub("_4","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"]
#Reorder columns
X_heatmap <- X_heatmap[,c(22,1,4,5,8,9,12,13,16,17,20,21,2,3,6,7,10,11,14,15,18,19)]
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name4,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name5,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
}
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
Ontology <- get_ontology(file=ontology_file,propagate_relationships = "is_a",extract_tags = "minimal")
print(Ontology)
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
#Terms is the GO term list jwr moved up to TAABLES
Terms <- read.delim(file=sgd_terms_tfile,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#BIG LOOP BIG LOOP ------------------------------------------------------
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
for(s in 1:dim(XX3)[1]){
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
#GO_ID_Arg <- "GO:0006325"
GO_ID_Arg_loop <- as.character(XX3[s,1])
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
#only make plots if parent term has fewer than 500 children
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(length(GOTerm_parent) > 100){
next()
}
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(Parent_Size < 2){
next()
}
if(Parent_Size > 2000){
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 1000 && Parent_Size <= 2000){
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 500 && Parent_Size <= 1000){
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 200 && Parent_Size <= 500){
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 100 && Parent_Size <= 200){
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 60 && Parent_Size <= 100){
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 30 && Parent_Size <= 60){
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 3 && Parent_Size <= 30){
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size == 2){
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,91 @@
#!/usr/bin/env Rscript
# This script will add the shift data to the finalTable.csv file
#
# May want to reorder columns in excel before making heatmaps - otherwise all the shift data will be plotted next to each other.
library(plyr)
library(dplyr)
library(sos)
args=commandArgs(TRUE)
if (length(args) > 1) {
finalTable <- args[1]
} else {
finalTable <- "REMcRdy_lm_only.csv-finalTable.csv" # for legacy workflow
}
if (length(args) > 2) {
shiftFile <- args[2]
} else {
shiftFile <- "Shift_only.csv" # for legacy workflow
}
if (length(args) > 3) {
studyInfo <- args[3]
} else {
studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
}
if (length(args) > 4) {
output <- args[4]
} else {
output<- "REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
}
# Read in the REMc finalTable data
X = data.frame(read.csv(file=finalTable,header=TRUE,stringsAsFactors = FALSE))
# Read in the shift data From ../JoinInteractions
Y = data.frame(read.csv(file=shiftFile,header=TRUE,stringsAsFactors = FALSE))
Labels <- read.delim(studyInfo,skip=0,as.is=T,row.names=1,strip.white=TRUE)
# Determine the number of cols - needed to create the correct number of new cols
Xcolnum <- length(X[1,])
ADDnum <- Xcolnum + length(Y[1,]) - 2
# Create new columns filled with NAs to be filled with data
Xtemp= X
Xtemp[,(Xcolnum+1):ADDnum] <- NA
# Match the orf names in each row to a orf name in the shift data file and then add the shift data to the finalTable file
shiftTbl <-as.data.frame(matrix(nrow=1,ncol=length(Y)-2)) #the df shiftTbl must be initialized before for loop
for(i in 1:length(X[,1])){
Shiftrownum = match(X[i,2],Y[,1])
shiftTbl[i,]= Y[Shiftrownum,3:length(Y[1,])]
Xtemp[i,(Xcolnum+1):ADDnum] <- Y[Shiftrownum,3:length(Y[1,])]
}
headerX= colnames(Xtemp)
headerY= colnames(Y)
shfHdr= headerY[3:length(headerY)]
combTbl<- X[,1:3]
lmTbl= select(Xtemp, contains('Z_lm')) #X[,(4:Xcolnum-2)]
shiftTbl<- select(Xtemp, contains('V'))
clustTbl<- select(Xtemp, contains('cluster.'))
# Give the new column names the same names as in the shift file
Xcols = colnames(X)
Ycols = colnames(Y)[3:length(Y[1,])]
newCols = c(Xcols[1:Xcolnum],Ycols)
# Reorder columns for generating heatmaps
combI= combTbl #Starting Template orf, Genename columns
headersRemc<-newCols #colnames(X)
newHeaders= newCols[1:3]
lmHdr= colnames(lmTbl) #newCols[4:(length(Xcols)-2)]
clstHdr= colnames(clustTbl) #select(newCols, contains('cluster.')) #newCols[3+length(lmHdr):2]
intLvHdr= vector()
#Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
for(i in 1:(length(shiftTbl[1,]))){
combI=cbind.data.frame(combI, shiftTbl[i])
combI=cbind.data.frame(combI, lmTbl[i])
intLvHdrx= c(shfHdr[i],lmHdr[i] )
intLvHdr= c(intLvHdr,intLvHdrx)
}
combIHdr= c(colnames(combTbl),intLvHdr,clstHdr)
combI=cbind.data.frame(combI, clustTbl)
colnames(combI)= combIHdr
write.csv(combI,file=output, row.names=FALSE)

View File

@@ -0,0 +1,288 @@
#!/usr/bin/env Rscript
# This script will make heatmaps for the REMc analysis
# need to give the input "finalTable.csv" file after running REMc generated by eclipse
library(RColorBrewer)
library(gplots)
args <- commandArgs(TRUE)
# Set output dir
if (length(args) > 1) {
input_finalTable <- args[1]
} else {
input_finalTable <- "/REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
}
if (length(args) > 2) {
outDir <- args[2]
} else {
outDir <- "/REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
}
hmapfile <- data.frame(read.csv(file=input_finalTable,header=TRUE,sep=",",stringsAsFactors = FALSE))
# set NAs to NA
hmapfile[hmapfile == -100] <- NA
hmapfile[hmapfile == 100] <- NA
hmapfile[hmapfile == 0.001] <- NA
hmapfile[hmapfile == -0.001] <- NA
# select the number of rows based on the number of genes
num_total_genes <- length(hmapfile[,1])
# Break out the cluster names so each part of the cluster origin can be accessed
# line below removed because it adds to many genes to clusters when going past 1-0-10 since it cannot differentiate between 1-0-1 and 1-0-10 when using grepl.
# hmapfile$cluster.origin = gsub(" ","",x=hmapfile$cluster.origin)
hmapfile$cluster.origin = gsub(";"," ;",x=hmapfile$cluster.origin)
hmapfile$cluster.origin = strsplit(hmapfile$cluster.origin,';')
#use tail(x,n) for accessing the outward most cluster
clust_rounds <- 0
for(i in 1:num_total_genes){
if(length(hmapfile$cluster.origin[[i]]) > clust_rounds){
clust_rounds <- length(hmapfile$cluster.origin[[i]])
}
}
unique_clusts <- unique(hmapfile$cluster.origin[1:num_total_genes])
unique_clusts <- unique_clusts[unique_clusts != " "]
# Select only the unique cluster names
unique_clusts <- sort(unique(unlist(unique_clusts,use.names= FALSE)),decreasing=FALSE)
num_unique_clusts <- length(unique_clusts)
# Base the color key on a statistical analysis of the L and K data
# need to create "breaks" to set the color key, need to have 12 different breaks (for 11 colors)
# scale() will calculate the mean and standard deviation of the entire vector, then "scale" each element by those values by subtracting the mean and dividing by the sd.
# hmapfile[,4:(length(hmapfile[1,]) - 2)] <- scale(hmapfile[,4:(length(hmapfile[1,]) - 2)])
# change so that the L data is multiplied to be on the same scale as the K data
KEY_MIN <- 0
KEY_MAX <- 0
K_MIN <- 0
L_MAX <- 0
KcolumnValues <- vector()
LcolumnValues <- vector()
for(i in 4:(length(hmapfile[1,]) - 2)){
if(grepl("_Z_lm_K",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
KcolumnValues <- append(KcolumnValues,i)
}
if(grepl("_Z_lm_L",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
LcolumnValues <- append(LcolumnValues,i)
}
}
# L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[4]
# K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[2]
# L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.975,1),na.rm=TRUE)[4]
# K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.025,.5,.99,1),na.rm=TRUE)[2]
# Z scores are
L_MAX <- 12
K_MIN <- -12
# L_Multiplier <- as.numeric(abs(K_MIN/L_MAX))
# hmapfile[,LcolumnValues] <- hmapfile[,LcolumnValues] * L_Multiplier
# if(grepl("SHIFT",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
# print("FOUND SHIFT VALUES")
# hmapfile[,(LcolumnValues - 1)] <- hmapfile[,(LcolumnValues-1)] * L_Multiplier
# }
# KEY_MAX <- as.numeric(L_MAX * L_Multiplier)
# KEY_MIN <- as.numeric(K_MIN)
KEY_MAX <- as.numeric(L_MAX)
KEY_MIN <- as.numeric(K_MIN)
print(KEY_MIN)
print(L_MAX)
# print(L_Multiplier)
colormapbreaks <- c(KEY_MIN,KEY_MIN*(5/6),KEY_MIN*(4/6),KEY_MIN*(3/6),KEY_MIN*(2/6),KEY_MIN*(1/6),KEY_MAX*(1/6),KEY_MAX*(2/6),KEY_MAX*(3/6),KEY_MAX*(4/6),KEY_MAX*(5/6),KEY_MAX)
# print(colormapbreaks)
# Probably should give a way to detect shift in case that is is not in the first row... (maybe just grepl for the whole column name?)
# However since also using this to amend the first part. Could possibly identify all the ones that contain the word shift and then create an object containing just those numbers
# then could just use these values and create spaces only between interaction values - possibly could get rid of redundant shift values if we don't want to view these
# could we pool all the shift data/average it?
if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=2)
#ev_repeat = rep("white",length(even_columns))
#ev_repeat = rep("red",(length(hmapfile[1,]) - 5))
#middle_col <- (length(hmapfile[1,]) - 5)/2
#ev_repeat[(middle_col/2)] <- "black"
#print(ev_repeat)
}
if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == FALSE){
even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=1)
print("NO SHIFT VALS FOUND")
}
#FOR THIS SCRIPT ONLY (rap tem hu script)
#even_columns <- c(2,5,7,10,12,15,17)
#m <- 0
colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1,]) - 2)])
#print(colnames_edit)
for(i in 1:length(colnames_edit)){
if(grepl("Shift",colnames_edit[i],fixed=TRUE) == TRUE){
colnames_edit[i] <- ""
colnames_edit[i+1] <- gsub(pattern = "_Z_lm_",replacement = " ",x = colnames_edit[i+1])
try(colnames_edit[i+1] <- gsub(pattern = "_",replacement = " ",x = colnames_edit[i+1]))
# INT_store <- strsplit(colnames_edit[i+1], "Z_lm")
# print(length(unlist(INT_store)))
# if(length(unlist(INT_store)) == 4){
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],sep=" ")
# }
# if(length(unlist(INT_store)) == 3){
#
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],sep=" ")
# }
# if(length(unlist(INT_store)) == 5){
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],unlist(INT_store)[4],sep=" ")
# }
# if(length(unlist(INT_store)) == 6){
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[6],sep=" ")
# }
}
}
print(colnames_edit)
#break()
#colnames_edit[5] <- "TEM HLEG K"
#colnames_edit[10] <- "TEM HL K"
#colnames_edit[15] <- "TEM HLEG L"
#colnames_edit[20] <- "TEM HL L"
# Create the heatmaps
for(i in 1:num_unique_clusts){
cluster <- unique_clusts[i]
cluster_data <- subset(hmapfile,grepl(cluster,cluster.origin))
cluster_length <- length(cluster_data[,1])
if(cluster_length != 1){
X0 <- as.matrix(cluster_data[,4:(length(hmapfile[1,]) - 2)])
if(cluster_length >= 2001){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=20,width=15)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor= "white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
#cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
#ColSideColors=ev_repeat,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
#abline(v=0.5467,col="black")
dev.off()
}
if(cluster_length >= 201 && cluster_length <= 2000){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=15,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
#abline(v=0.5316,col="black")
dev.off()
}
if(cluster_length >= 150 && cluster_length <= 200){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=12,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.2, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
dev.off()
}
if(cluster_length >= 101 && cluster_length <= 149){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,mypath,height=12,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.2, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
dev.off()
}
if(cluster_length >= 60 && cluster_length <= 100){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=12,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.4, scale = "none",
breaks=colormapbreaks,symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
dev.off()
}
if(cluster_length <= 59 && cluster_length >= 30){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=9,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
dev.off()
}
if(cluster_length <= 29){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=7,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA,
distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.9, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
dev.off()
}
}
#print(paste("FINISHED", "CLUSTER",cluster,sep=" "))
}

View File

@@ -0,0 +1,351 @@
#!/usr/bin/env Rscript
# This script will make homology heatmaps for the REMc analysis
# This script didn't have any hard set inputs so I didn't bother
library(RColorBrewer)
library(gplots)
library(tidyverse)
args <- commandArgs(TRUE)
# Need to give the input "finalTable.csv" file after running REMc generated by eclipse
inputFinalTable <- args[1]
# Give the DAmP_list.txt as the third argument - will color the gene names differently
DAmPs <- Args[2]
DAmP_list <- read.delim(file=DAmPs,header=F,stringsAsFactors = F)
# Give the yeast human homology mapping as the fourth argument - will add the genes to the finalTable and use info for heatmaps
mapFile <- Args[3]
mapping <- read.csv(file=mapFile,stringsAsFactors = F)
# Define the output path for the heatmaps - create this folder first - in linux terminal in the working folder use > mkdir filename_heatmaps
outputPath <- Args[4]
# Read in finalTablewithShift
hmapfile <- data.frame(read.csv(file=inputFinalTable,header=TRUE,sep=",",stringsAsFactors = FALSE))
# Map the finalTable to the human homolog file
hmapfile_map <- hmapfile
# Match using OrfRep after dropping the _1 _2 _3 _4
# But need to also account for some older files have ORF as column name rather than OrfRep in finalTable file
if(colnames(hmapfile_map)[2] == "OrfRep"){
try(hmapfile_map$ORFMatch <- hmapfile_map$OrfRep)
}
if(colnames(hmapfile_map)[2] == "ORF"){
try(hmapfile_map$ORFMatch <- hmapfile_map$ORF)
}
hmapfile_map$ORFMatch <- gsub("_1","",x=hmapfile_map$ORFMatch)
hmapfile_map$ORFMatch <- gsub("_2","",x=hmapfile_map$ORFMatch)
hmapfile_map$ORFMatch <- gsub("_3","",x=hmapfile_map$ORFMatch)
hmapfile_map$ORFMatch <- gsub("_4","",x=hmapfile_map$ORFMatch)
# Join the hmapfile using
hmapfile_w_homolog <- full_join(hmapfile_map,mapping,by=c("ORFMatch"="ensembl_gene_id"))
# Remove matches that are not from the finalTable
hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$likelihood) == F,]
# Write csv with all info from mapping file
write.csv(hmapfile_w_homolog,file=paste(outputPath,"/",inputFinalTable,"_WithHomologAll.csv",sep=""),row.names = F)
# Remove the non matches and output another mapping file - this is also one used to make heatmaps
hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$external_gene_name_Human) == F,]
write.csv(hmapfile_w_homolog,file=paste(outputPath,"/",inputFinalTable,"_WithHomologMatchesOnly.csv",sep=""),row.names = F)
# Add human gene name to the Gene column
hmapfile_w_homolog$Gene <- paste(hmapfile_w_homolog$Gene,hmapfile_w_homolog$external_gene_name_Human,sep="/")
# Only keep the finalTable file columns and the homology info
hmap_len <- dim(hmapfile)[2]
hmapfile_w_homolog_remake <- cbind(hmapfile_w_homolog[,1:hmap_len], hsapiens_homolog_orthology_type=hmapfile_w_homolog$hsapiens_homolog_orthology_type)
hmapfile <- hmapfile_w_homolog_remake
# Set NAs to NA
hmapfile[hmapfile == -100] <- NA
hmapfile[hmapfile == 100] <- NA
hmapfile[hmapfile == 0.001] <- NA
hmapfile[hmapfile == -0.001] <- NA
# Select the number of rows based on the number of genes
num_total_genes <- length(hmapfile[,1])
# break out the cluster names so each part of the cluster origin can be accessed
# line below removed because it adds to many genes to clusters when going past 1-0-10 since it cannot differentiate between 1-0-1 and 1-0-10 when using grepl.
# hmapfile$cluster.origin = gsub(" ","",x=hmapfile$cluster.origin)
hmapfile$cluster.origin = gsub(";"," ;",x=hmapfile$cluster.origin)
hmapfile$cluster.origin = strsplit(hmapfile$cluster.origin,';')
# use tail(x,n) for accessing the outward most cluster
clust_rounds <- 0
for(i in 1:num_total_genes){
if(length(hmapfile$cluster.origin[[i]]) > clust_rounds){
clust_rounds <- length(hmapfile$cluster.origin[[i]])
}
}
unique_clusts <- unique(hmapfile$cluster.origin[1:num_total_genes])
unique_clusts <- unique_clusts[unique_clusts != " "]
#select only the unique cluster names
unique_clusts <- sort(unique(unlist(unique_clusts,use.names= FALSE)),decreasing=FALSE)
num_unique_clusts <- length(unique_clusts)
# Base the color key on a statistical analysis of the L and K data
# need to create "breaks" to set the color key, need to have 12 different breaks (for 11 colors)
# scale() will calculate the mean and standard deviation of the entire vector, then "scale" each element by those values by subtracting the mean and dividing by the sd.
# hmapfile[,4:(length(hmapfile[1,]) - 2)] <- scale(hmapfile[,4:(length(hmapfile[1,]) - 2)])
# Change so that the L data is multiplied to be on the same scale as the K data
KEY_MIN <- 0
KEY_MAX <- 0
K_MIN <- 0
L_MAX <- 0
KcolumnValues <- vector()
LcolumnValues <- vector()
for(i in 4:(length(hmapfile[1,]) - 3)){
if(grepl("_Z_lm_K",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
KcolumnValues <- append(KcolumnValues,i)
}
if(grepl("_Z_lm_L",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
LcolumnValues <- append(LcolumnValues,i)
}
}
# L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[4]
# K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[2]
# L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.975,1),na.rm=TRUE)[4]
# K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.025,.5,.99,1),na.rm=TRUE)[2]
# Z scores are
L_MAX <- 12
K_MIN <- -12
# L_Multiplier <- as.numeric(abs(K_MIN/L_MAX))
# hmapfile[,LcolumnValues] <- hmapfile[,LcolumnValues] * L_Multiplier
# if(grepl("SHIFT",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
# print("FOUND SHIFT VALUES")
# hmapfile[,(LcolumnValues - 1)] <- hmapfile[,(LcolumnValues-1)] * L_Multiplier
# }
#KEY_MAX <- as.numeric(L_MAX * L_Multiplier)
#KEY_MIN <- as.numeric(K_MIN)
KEY_MAX <- as.numeric(L_MAX)
KEY_MIN <- as.numeric(K_MIN)
print(KEY_MIN)
print(L_MAX)
#print(L_Multiplier)
colormapbreaks <- c(KEY_MIN,KEY_MIN*(5/6),KEY_MIN*(4/6),KEY_MIN*(3/6),KEY_MIN*(2/6),KEY_MIN*(1/6),KEY_MAX*(1/6),KEY_MAX*(2/6),KEY_MAX*(3/6),KEY_MAX*(4/6),KEY_MAX*(5/6),KEY_MAX)
#print(colormapbreaks)
# Probably should give a way to detect shift in case that is is not in the first row... (maybe just grepl for the whole column name?)
# However since also using this to amend the first part. Could possibly identify all the ones that contain the word shift and then create an object containing just those numbers
# then could just use these values and create spaces only between interaction values - possibly could get rid of redundant shift values if we don't want to view these
# could we pool all the shift data/average it?
if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=2)
# ev_repeat = rep("white",length(even_columns))
# ev_repeat = rep("red",(length(hmapfile[1,]) - 5))
# middle_col <- (length(hmapfile[1,]) - 5)/2
# ev_repeat[(middle_col/2)] <- "black"
# print(ev_repeat)
}
if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == FALSE){
even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=1)
print("NO SHIFT VALS FOUND")
}
# for this script only (rap tem hu script)
# even_columns <- c(2,5,7,10,12,15,17)
# m <- 0
colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1,]) - 3)])
colnames(DAmP_list)[1] <- "ORF"
hmapfile$DAmPs <- "YKO"
colnames(hmapfile)[2] <- "ORF"
try(hmapfile[hmapfile$ORF %in% DAmP_list$ORF,]$DAmPs <- "YKD")
# X <- X[order(X$DAmPs,decreasing = TRUE),]
hmapfile$color2 <- NA
try(hmapfile[hmapfile$DAmPs == "YKO",]$color2 <- "black")
try(hmapfile[hmapfile$DAmPs == "YKD",]$color2 <- "red")
hmapfile$color <- NA
try(hmapfile[hmapfile$hsapiens_homolog_orthology_type == "ortholog_many2many",]$color <- "#F8766D")
try(hmapfile[hmapfile$hsapiens_homolog_orthology_type == "ortholog_one2many",]$color <- "#00BA38")
try(hmapfile[hmapfile$hsapiens_homolog_orthology_type == "ortholog_one2one",]$color <- "#619CFF")
# print(colnames_edit)
for(i in 1:length(colnames_edit)){
if(grepl("Shift",colnames_edit[i],fixed=TRUE) == TRUE){
colnames_edit[i] <- ""
colnames_edit[i+1] <- gsub(pattern = "_Z_lm_",replacement = " ",x = colnames_edit[i+1])
try(colnames_edit[i+1] <- gsub(pattern = "_",replacement = " ",x = colnames_edit[i+1]))
# INT_store <- strsplit(colnames_edit[i+1], "Z_lm")
# print(length(unlist(INT_store)))
# if(length(unlist(INT_store)) == 4){
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],sep=" ")
# }
# if(length(unlist(INT_store)) == 3){
#
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],sep=" ")
# }
# if(length(unlist(INT_store)) == 5){
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],unlist(INT_store)[4],sep=" ")
# }
# if(length(unlist(INT_store)) == 6){
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[6],sep=" ")
# }
}
}
print(colnames_edit)
# break()
# colnames_edit[5] <- "TEM HLEG K"
# colnames_edit[10] <- "TEM HL K"
# colnames_edit[15] <- "TEM HLEG L"
# colnames_edit[20] <- "TEM HL L"
# Create the heatmaps
for(i in 1:num_unique_clusts){
cluster <- unique_clusts[i]
cluster_data <- subset(hmapfile,grepl(cluster,cluster.origin))
cluster_length <- length(cluster_data[,1])
if(cluster_length != 1){
X0 <- as.matrix(cluster_data[,4:(length(hmapfile[1,]) - 6)])
if(cluster_length >= 2001){
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=20,width=15)
heatmap.2(
x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor= "white", offsetCol = 0.1,
# zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
# cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
# ColSideColors=ev_repeat,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
# abline(v=0.5467,col="black")
dev.off()
}
if(cluster_length >= 201 && cluster_length <= 2000){
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=15,width=12)
heatmap.2(
x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
# zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
# abline(v=0.5316,col="black")
dev.off()
}
if(cluster_length >= 150 && cluster_length <= 200){
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=12,width=12)
heatmap.2(
x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
# zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.2, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
dev.off()
}
if(cluster_length >= 101 && cluster_length <= 149){
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,mypath,height=12,width=12)
heatmap.2(
x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.2, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
# zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
dev.off()
}
if(cluster_length >= 60 && cluster_length <= 100){
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=12,width=12)
heatmap.2(
x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.4, scale = "none",
breaks=colormapbreaks,symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
dev.off()
}
if(cluster_length <= 59 && cluster_length >= 30){
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=9,width=12)
heatmap.2(
x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
dev.off()
}
if(cluster_length <= 29){
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=7,width=12)
heatmap.2(
x=X0,
Rowv=TRUE, Colv=NA,
distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.9, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
dev.off()
}
}
# print(paste("FINISHED", "CLUSTER",cluster,sep=" "))
}

42887
workflow/apps/r/go_terms.tab Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,199 @@
#!/usr/bin/env R
# GTA (GoTermAveraging)
# Your output may not be reproducible as org.Sc.sgd.db is uploaded from Bioconductor R library and changes
#
# Updated 240724 Bryan C Roessler to improve file operations and portability
# NOTE: The script now has 2 additional OPTIONAL arguments:
# 1. Path to SGD terms file (go.terms.tab)
# 2. Path to SGD features file (gene_association.sgd)
library("stringr")
library("org.Sc.sgd.db")
library("plyr")
# Parse arguments
args <- commandArgs(TRUE)
exp_name <- args[1]
if (length(args) > 2) {
zscores_file <- args[2]
} else {
zscores_file <- "zscores/zscores_interaction.csv" # https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
}
if (length(args) > 3) {
sgd_terms_file <- args[3]
} else {
sgd_terms_file <- "go_terms.tab"
}
if (length(args) > 4) {
sgd_features_file <- args[4]
} else {
sgd_features_file <- "gene_association.sgd" # https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
}
if (length(args) > 5) {
output_dir <- args[5]
} else {
output_dir <- "../../out/gta" # https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
}
# # Set SGDgeneList file path
# if (length(args) > 4) {
# SGDgeneList <- args[4]
# } else {
# SGDgeneList <- "../Code/SGD_features.tab"
#Begin for loop for experiments in this study-----------------ZScores_Interaction.csv
for(m in 1:length(zscores_file)){
#zscores_file <- paste(Wstudy,"/",expName[m],'/ZScores/ZScores_Interaction.csv',sep="") #ArgsScore[1]
X <- read.csv(file = zscores_file[m],stringsAsFactors=FALSE,header = TRUE)
if(colnames(X)[1] == "OrfRep"){
colnames(X)[1] <- "ORF"
}
#Terms is the GO term list
Terms <- read.delim(file = sgd_terms_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
#Gene_Association is the gene association to GO term file
Gene_Association <- read.delim(sgd_features_file,skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Get the ORF names associated with each gene/GO term
Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
#Get the numeric GO ID for matching
Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
#get all unique GO terms
GO_Terms <- unique(Gene_Association$GO_ID)
#create a character vector with just the ColNames of the input file to store the scores for each GO term
Col_Names_X <- colnames(X)
#create a data_frame with header from input_file
GO_Term_Averages <- X[0,]
#fill table with NAs same length as number of GO terms
GO_Term_Averages[1:length(GO_Terms),] <- NA
#change the first and second col names to GO_ID and Term
colnames(GO_Term_Averages)[1] <- "GO_ID"
colnames(GO_Term_Averages)[2] <- "Term"
#create new columns for Ontology, number genes (used to calculate the avg score), all possible genes in the GO term, and print genes/ORFs used
GO_Term_Averages$Ontology <- NA
GO_Term_Averages$NumGenes <- NA
GO_Term_Averages$AllPossibleGenes <- NA
GO_Term_Averages$Genes <- NA
GO_Term_Averages$ORFs <- NA
#create a data.frame for the standard deviation info
GO_Term_SD <- X[0,]
GO_Term_SD[1:length(GO_Terms),] <- NA
colnames(GO_Term_SD)[1] <- "GO_ID"
colnames(GO_Term_SD)[2] <- "Term"
#Loop for each GO term to get an average L and K Z score
for(i in 1:length(GO_Terms)){
#get the GO_Term
ID <- GO_Terms[i]
#Get data.frame for all genes associated to the GO Term
ID_AllGenes <- Gene_Association[Gene_Association$GO_ID == ID,]
#get a vector of just the gene names
ID_AllGenes_vector <- as.vector(GO2ALLORFs[as.character(ID)][[1]])
if(length(unique(ID_AllGenes_vector)) > 4000){
next()
}
#get the GO term character description where numeric Terms ID matches GO_Term's ID
GO_Description_Term <- as.character(Terms[Terms$GO_ID %in% ID_AllGenes$GO_ID_Numeric,]$GO_Term[1])
#get the Z scores for all genes in the GO_ID
Zscores_For_ID <- X[X$ORF %in% ID_AllGenes_vector,]
#get the Gene names and ORFs for the term
GO_Term_Averages$Genes[i] <- paste(unique(Zscores_For_ID$Gene),collapse=" | ")
GO_Term_Averages$ORFs[i] <- paste(unique(Zscores_For_ID$ORF),collapse=" | ")
#dataframe to report the averages for a GO term
#get the GO ID
GO_Term_Averages$GO_ID[i] <- as.character(ID)
#get the term name
GO_Term_Averages$Term[i] <- GO_Description_Term
#get total number of genes annotated to the Term that we have in our library
GO_Term_Averages$NumGenes[i] <- length(unique(Zscores_For_ID$ORF))
#get total number of genes annotated to the Term in SGD
GO_Term_Averages$AllPossibleGenes[i] <- length(unique(ID_AllGenes_vector))
#get the ontology of the term
GO_Term_Averages$Ontology[i] <- as.character(ID_AllGenes$Aspect[1])
#calculate the average score for every column
for(j in 3:length(X[1,])){
GO_Term_Averages[i,j] <- mean(Zscores_For_ID[,j],na.rm = TRUE)
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
}
#also calculate same values for the SD
GO_Term_SD$GO_ID[i] <- as.character(ID)
#get the term name
GO_Term_SD$Term[i] <- GO_Description_Term
#calculate column scores for SD
for(j in 3:length(X[1,])){
GO_Term_SD[i,j] <- sd(Zscores_For_ID[,j],na.rm = TRUE)
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
}
}
#add either _Avg or _SD depending on if the calculated score is an average or SD
colnames(GO_Term_Averages) <- paste(colnames(GO_Term_Averages),"Avg", sep = "_")
colnames(GO_Term_SD) <- paste(colnames(GO_Term_SD),"SD", sep = "_")
#combine the averages with the SDs to make one big data.frame
X2 <- cbind(GO_Term_Averages,GO_Term_SD)
#test[ , order(names(test))]
X2 <- X2[,order(names(X2))]
X2 <- X2[!is.na(X2$Z_lm_L_Avg),]
#create output file
write.csv(X2,file=paste(output_dir,"/",expName[m],"/Average_GOTerms_All.csv",sep=""),row.names=FALSE)
#remove NAs
X3 <- X2[!is.na(X2$Z_lm_L_Avg),]
#identify redundant GO terms
for(i in 1:length(X3[,1])){
#loop through each GO term - get term
GO_term_ID <- as.character(X3$GO_ID_Avg[i])
#get term in the X3
X3_Temp <- X3[X3$GO_ID_Avg == GO_term_ID,]
#get anywhere that has the same number K_Avg value
X3_Temp2 <- X3[X3$Z_lm_K_Avg %in% X3_Temp,]
if(length(X3_Temp2[,1]) > 1){
if(length(unique(X3_Temp2$Genes_Avg)) == 1){
X3_Temp2 <- X3_Temp2[1,]
}
}
if(i == 1){
Y <- X3_Temp2
}
if(i > 1){
Y <- rbind(Y,X3_Temp2)
}
}
Y1 <- unique(Y)
write.csv(Y1,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_All_NonRedundantTerms.csv",sep=""),row.names = FALSE)
Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2,]
Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg),]
write.csv(Y2,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_NonRedundantTerms_Above2SD_L.csv",sep=""),row.names = FALSE)
Y3 <- Y2[Y2$NumGenes_Avg > 2,]
write.csv(Y3,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv",sep=""),row.names = FALSE)
Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2,]
Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg),]
write.csv(Y4,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_NonRedundantTerms_Above2SD_K.csv",sep=""),row.names = FALSE)
Y5 <- Y4[Y4$NumGenes_Avg > 2,]
write.csv(Y5,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv",sep=""),row.names = FALSE)
#End of 'for loop'
}

File diff suppressed because it is too large Load Diff