save/write and load/read stack image files in Matlab

Write:

imageData = ones([100 100 10]);
tifFile = ‘ImageStack.tif’;
for i = 1:10
imwrite( imageData (:,:,i), tifFile,’WriteMode’,’append’);
end

Load:

tifFile = ‘ImageStack.tif’;
infoImage = imfinfo(tifFile);
w = InfoImage(1).Width;
h = InfoImage(1).Height;
planeNo = length(InfoImage);
imageData = zeros(h,w,planeNo,’uint16′);
for i = 1:planeNo
imageData(:,:,i) = imread(tifFile,’Index’,i);
end

Matlab Code: analyze collagen structure in renal cell carcinoma (RCC) (Chromophore vs Oncocytoma)


% --------------------------------- analyzeCollagen.m ---------------------------------------%
%% initialization

ch_folder = {'S12-19756C8','S12-25601A6','S13-6151A4','S13-10481C8','S13-29859A9','S13-29883A8','S13-33982B4','S13-36836C5','S14-7163A3','S14-9425B4','S14-17795B2'};
on_folder = {'S12-20431B4','S12-28537A2','S12-38438B6','S13-10677A5','S13-12911A2','S13-19695B2','S13-19695B3','S13-37948A2','S14-12554B2','S14-19568A3','S14-20995C4'};
mix_folder = {'S13-3375B1'};
    
ch_fileNumber = [4 4 6 4 4 4 4 3 4 5 5];
on_fileNumber = [4 4 4 4 5 4 4 4 4 4 4];
mix_fileNumber = [4];

ch_fileCode = {
'Ta','Tb','Tc','Td', ...
'Ta','Tb','Tc','Td', ...
'T1a','T1b','T2a','T2b','T2c','T2d', ...
'Ta','Tb','Tc','Td', ...
'T1a','T1b','T1c','T1d', ...
'Ta','Tb','Tc','Td', ...
'T1b','T1c','T1d','T2d', ...
'Tb','Tc','Td', ...
'Ta','Tb','Tb2','Tc', ...
'T1a','T1b','T1c','T1d','T2a', ... %S14-9425B4
'T1a','T1b','T2a','T2b','T2d'
};

on_fileCode = {
'Ta','Ta2','Tb','Tc', ...
'Ta','Ta2','Ta3','Ta4', ...
'Ta','Ta2','Tb','Tc', ... 
'Ta','Tb','Tc','Td', ...
'T1a','T1b','T1c','T2a','T2b', ...
'T1a','T1b','T2a','T2b', ...
'T1a','T1b','T2a','T2b', ...
'T1a','T1b','T2a','T2b', ...
'T1a','T1b','T2a','T2b', ...
'Ta','Tb','Tc','Td', ...
'T1a','T1b','T2a','T2b'
};

mix_fileCode = {'T1a','T1b','T1c','T1d'};

for i = 1:length(ch_fileCode)    
    ch_fileName{i} = ['25x_',ch_fileCode{i},'_power2_stepsize1_PMT700_zoom2_kalman3_Ch1.stk'];
    ch_fileName1{i} = ['25x_',ch_fileCode{i},'_780nm_power2_stepsize1_zoom2_kalman3_Ch1.stk'];
end

for i = 1:length(on_fileCode)    
    on_fileName{i} = ['25x_',on_fileCode{i},'_power2_stepsize1_PMT700_zoom2_kalman3_Ch1.stk'];
    on_fileName1{i} = ['25x_',on_fileCode{i},'_780nm_power2_stepsize1_zoom2_kalman3_Ch1.stk'];
    if i == 22
        on_fileName1{i} = ['25x_',on_fileCode{i},'_780nm_power2_stepsize1_zoom2_Ch1.stk'];
    end
end

for i = 1:length(mix_fileCode)    
    mix_fileName{i} = ['25x_',mix_fileCode{i},'_power2_stepsize1_PMT700_zoom2_kalman3_Ch1.stk'];
    mix_fileName1{i} = ['25x_',mix_fileCode{i},'_780nm_power2_stepsize1_zoom2_kalman3_Ch1.stk'];
end

%% load Ch1 maxImages (the maximum plane)

I_ch_col = zeros(800,800,length(ch_fileCode));
for i = 1:length(ch_fileCode)        
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end    
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    [MAX,ind] = max(sum(sum(I)));
    I_ch_col(:,:,i) = I(:,:,ind);
    figure(1);subplot(7,7,i);imagesc(I_ch_col(:,:,i),[130 2700]);axis off
end

I_on_col = zeros(800,800,length(on_fileCode));
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    [MAX,ind] = max(sum(sum(I)));
    I_on_col(:,:,i) = I(:,:,ind);    
%     I_on_col(i) = mean(I(:));
    figure(2);subplot(7,7,i);imagesc(I_on_col(:,:,i),[130 2000]);axis off
end

I_mix_col = zeros(800,800,length(mix_fileCode));
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    [MAX,ind] = max(sum(sum(I)));
    I_mix_col(:,:,i) = I(:,:,ind);    
%     I_mix_col(i) = mean(I(:));
    figure(3);subplot(7,7,i);imagesc(I_mix_col(:,:,i));axis off
end

%% load Ch1 max projection Images (the maximum plane)

I_ch_col = zeros(800,800,length(ch_fileCode));
for i = 1:length(ch_fileCode)        
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end    
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    I_ch_col(:,:,i) = max(I,[],3);
    figure(1);subplot(7,7,i);imagesc(I_ch_col(:,:,i));axis off
end

I_on_col = zeros(800,800,length(on_fileCode));
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end    
    I_on_col(:,:,i) = max(I,[],3);
%     I_on_col(i) = mean(I(:));
    figure(2);subplot(7,7,i);imagesc(I_on_col(:,:,i));axis off
end

I_mix_col = zeros(800,800,length(mix_fileCode));
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    I_mix_col(:,:,i) = max(I,[],3);
%     I_mix_col(i) = mean(I(:));
    figure(3);subplot(7,7,i);imagesc(I_mix_col(:,:,i));axis off
end

%% quantify the Ch1 intensity

I_ch_col = zeros(length(ch_fileCode),1);
for i = 1:length(ch_fileCode)        
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end    
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    I_ch_col(i) = mean(I(:));
end

I_on_col = zeros(length(on_fileCode),1);
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    I_on_col(i) = mean(I(:));
end

I_mix_col = zeros(length(mix_fileCode),1);
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    I_mix_col(i) = mean(I(:));
end

%% load Ch1 maxImages (the maximum plane), calculate covariance image and intensity histogram

I_ch_col = zeros(800,800,length(ch_fileCode));
for i = 1:length(ch_fileCode)        
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end    
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    [MAX,ind] = max(sum(sum(I)));
    I_ch_col(:,:,i) = I(:,:,ind);
%     I_ch_col(i) = mean(I(:));
end

I_on_col = zeros(800,800,length(on_fileCode));
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    [MAX,ind] = max(sum(sum(I)));
    I_on_col(:,:,i) = I(:,:,ind);    
%     I_on_col(i) = mean(I(:));
end

I_mix_col = zeros(800,800,length(mix_fileCode));
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    [MAX,ind] = max(sum(sum(I)));
    I_mix_col(:,:,i) = I(:,:,ind);    
%     I_mix_col(i) = mean(I(:));
end

% calculate covariance images
for i = 1:size(I_ch_col,3)
    ch_col_cov(:,:,i) = cov(double(I_ch_col(:,:,i)));
end
for i = 1:size(I_on_col,3)
    on_col_cov(:,:,i) = cov(double(I_on_col(:,:,i)));
end
for i = 1:size(I_mix_col,3)
    mix_col_cov(:,:,i) = cov(double(I_mix_col(:,:,i)));
end

save maxImage_cov ch_col_cov on_col_cov mix_col_cov
figure;
for i = 1:size(ch_col_cov,3);
    1:subplot(7,7,i);
    imagesc(ch_col_cov(:,:,i));
    axis off
end
figure;
for i = 1:size(on_col_cov,3);
    1:subplot(7,7,i);
    imagesc(on_col_cov(:,:,i));
    axis off
end

% calculate histgram in the max images
I_ch_col = reshape(I_ch_col,800*800,47);
I_on_col = reshape(I_on_col,800*800,45);
I_mix_col = reshape(I_mix_col,800*800,4);

X = 0:4095;
H_ch = zeros(length(X),size(I_ch_col,2));
for i = 1:size(I_ch_col,2)
    H_ch(:,i) = hist(I_ch_col(:,i),X);
end
H_on = zeros(length(X),size(I_on_col,2));
for i = 1:size(I_on_col,2)
    H_on(:,i) = hist(I_on_col(:,i),X); 
end
H_mix = zeros(length(X),size(I_mix_col,2));
for i = 1:size(I_mix_col,2)
    H_mix(:,i) = hist(I_mix_col(:,i),X);
end

save maxImage_hist H_ch H_on H_mix X

%% load Ch1 all image planes, find collagen volume per plane without smoothing

I_ch_col = uint16(zeros(800,800,length(ch_fileCode)));
for i = 1:length(ch_fileCode)        
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end    
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
%     I = I - min(I(:));
    BW = zeros(size(I));    
    for k = 1:size(I,3)
        level = graythresh(I(:,:,k));
        BW(:,:,k) = im2bw(I(:,:,k),level);
    end
    ch_col_vol(i) = sum(BW(:))/size(BW,3);
end

I_on_col = uint16(zeros(800,800,length(on_fileCode)));
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end
    end
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);      
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);        
            k1 = k1+1;
        end
    end
%     I = I - min(I(:));    
    BW = zeros(size(I));
    for k = 1:size(I,3)
        level = graythresh(I(:,:,k));
        BW(:,:,k) = im2bw(I(:,:,k),level);
    end    
    on_col_vol(i) = sum(BW(:))/size(BW,3);
end

I_mix_col = uint16(zeros(800,800,length(mix_fileCode)));
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
%     I = I - min(I(:));    
    BW = zeros(size(I));    
    for k = 1:size(I,3)
        level = graythresh(I(:,:,k));
        BW(:,:,k) = im2bw(I(:,:,k),level);
    end
    mix_col_vol(i) = sum(BW(:))/size(BW,3);
end
save totalImage_col_vol ch_col_vol on_col_vol mix_col_vol

%% load Ch1 all image planes, find collagen volume per plane after smoothing

I_ch_col = uint16(zeros(800,800,length(ch_fileCode)));
for i = 1:length(ch_fileCode)        
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end    
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    H = fspecial('gaussian',[2,2],1);
    for i = 1:size(I,3)
        I(:,:,i) = filter2(H,I(:,:,i));
    end
    BW = zeros(size(I));    
    for k = 1:size(I,3)
        level = graythresh(I(:,:,k));
        BW(:,:,k) = im2bw(I(:,:,k),level);
    end
    ch_col_vol(i) = sum(BW(:))/size(BW,3);
end

I_on_col = uint16(zeros(800,800,length(on_fileCode)));
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end
    end
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);      
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);        
            k1 = k1+1;
        end
    end
    H = fspecial('gaussian',[2,2],1);
    for i = 1:size(I,3)
        I(:,:,i) = filter2(H,I(:,:,i));
    end
    BW = zeros(size(I));
    for k = 1:size(I,3)
        level = graythresh(I(:,:,k));
        BW(:,:,k) = im2bw(I(:,:,k),level);
    end    
    on_col_vol(i) = sum(BW(:))/size(BW,3);
end

I_mix_col = uint16(zeros(800,800,length(mix_fileCode)));
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    H = fspecial('gaussian',[2,2],1);
    for i = 1:size(I,3)
        I(:,:,i) = filter2(H,I(:,:,i));
    end  
    BW = zeros(size(I));    
    for k = 1:size(I,3)
        level = graythresh(I(:,:,k));
        BW(:,:,k) = im2bw(I(:,:,k),level);
    end
    mix_col_vol(i) = sum(BW(:))/size(BW,3);
end
save totalImage_col_vol ch_col_vol on_col_vol mix_col_vol

%% load Ch1 all image planes, find collagen volume using global threshold without smoothing

I_ch_col = uint16(zeros(800,800,length(ch_fileCode)));
for i = 1:length(ch_fileCode)        
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end    
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    planes = size(I,3);
    I = reshape(I,size(I,1),size(I,2)*size(I,3));
%     I = I - min(I(:));
    BW = zeros(size(I));
	level = graythresh(I);
	BW = im2bw(I,level);
    ch_col_vol(i) = sum(BW(:))/planes;
end

I_on_col = uint16(zeros(800,800,length(on_fileCode)));
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end
    end
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);      
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);        
            k1 = k1+1;
        end
    end
    planes = size(I,3);
    I = reshape(I,size(I,1),size(I,2)*size(I,3));
%     I = I - min(I(:));
    BW = zeros(size(I));
	level = graythresh(I);
	BW = im2bw(I,level);
    on_col_vol(i) = sum(BW(:))/planes;
end

I_mix_col = uint16(zeros(800,800,length(mix_fileCode)));
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    planes = size(I,3);
    I = reshape(I,size(I,1),size(I,2)*size(I,3));
%     I = I - min(I(:));
    BW = zeros(size(I));
	level = graythresh(I);
	BW = im2bw(I,level);
    mix_col_vol(i) = sum(BW(:))/planes;
end
save totalImage_col_vol ch_col_vol on_col_vol mix_col_vol

%% load Ch1 maxImages (the maximum plane), find collagen volume without smoothing

I_ch_col = uint16(zeros(800,800,length(ch_fileCode)));
for i = 1:length(ch_fileCode)        
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end    
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    [MAX,ind] = max(sum(sum(I)));
    I_ch_col(:,:,i) = I(:,:,ind);  
end

I_on_col = uint16(zeros(800,800,length(on_fileCode)));
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    [MAX,ind] = max(sum(sum(I)));
    I_on_col(:,:,i) = I(:,:,ind);  
end

I_mix_col = uint16(zeros(800,800,length(mix_fileCode)));
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    [MAX,ind] = max(sum(sum(I)));
    I_mix_col(:,:,i) = I(:,:,ind);
end

for i = 1:size(I_ch_col,3)
    level = graythresh(I_ch_col(:,:,i));
	BW = im2bw(I_ch_col(:,:,i),level);
    ch_col_vol(i) = sum(BW(:));
end
for i = 1:size(I_on_col,3)
    level = graythresh(I_on_col(:,:,i));
	BW = im2bw(I_on_col(:,:,i),level);
    on_col_vol(i) = sum(BW(:));
end
for i = 1:size(I_mix_col,3)
    level = graythresh(I_mix_col(:,:,i));
	BW = im2bw(I_mix_col(:,:,i),level);
    mix_col_vol(i) = sum(BW(:));
end
save maxImage_col_vol ch_col_vol on_col_vol mix_col_vol

%% load Ch1 max projection image, find collagen volume

I_ch_col = uint16(zeros(800,800,length(ch_fileCode)));
for i = 1:length(ch_fileCode)        
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end    
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end    
    I_ch_col(:,:,i) = max(I,[],3);
end

I_on_col = uint16(zeros(800,800,length(on_fileCode)));
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    I_on_col(:,:,i) = max(I,[],3);
end

I_mix_col = uint16(zeros(800,800,length(mix_fileCode)));
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = [];    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    I_mix_col(:,:,i) = max(I,[],3);
end

for i = 1:size(I_ch_col,3)
    level = graythresh(I_ch_col(:,:,i));
	BW = im2bw(I_ch_col(:,:,i),level);
    ch_col_vol(i) = sum(BW(:));
end
for i = 1:size(I_on_col,3)
    level = graythresh(I_on_col(:,:,i));
	BW = im2bw(I_on_col(:,:,i),level);
    on_col_vol(i) = sum(BW(:));
end
for i = 1:size(I_mix_col,3)
    level = graythresh(I_mix_col(:,:,i));
	BW = im2bw(I_mix_col(:,:,i),level);
    mix_col_vol(i) = sum(BW(:));
end
save maxProjImage_col_vol ch_col_vol on_col_vol mix_col_vol

%% FFT
ch_col_fft = zeros(800,800,length(ch_fileCode));
for i = 1:length(ch_fileCode)        
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end    
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I0 = tiffread2(filePath);         
    I1 = []; 
    k1 = 1;
    for k = 1:length(I0)
        if mean(I0(k).data(:)) > 100
            I1(:,:,k1) = uint16(I0(k).data);
            k1 = k1+1;
        end            
    end 
    I = zeros(800,800,size(I1,3));    
    for k = 1:size(I1,3)
        I(:,:,k) = log(abs(fftshift(fft2(I1(:,:,k)))));
    end 
%     I2 = mean(I,3);
%     [i0,j0]=find(I2==max(I2(:)));    
    ch_col_fft(:,:,i) = mean(I,3);
end

on_col_fft = zeros(800,800,length(on_fileCode));
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I0 = tiffread2(filePath);
    k1 = 1;
    I1 = [];
    for k = 1:length(I0)
        if mean(I0(k).data(:)) > 100
            I1(:,:,k1) = uint16(I0(k).data);
            k1 = k1+1;
        end
    end
    I = zeros(800,800,size(I1,3));
    for k = 1:size(I1,3)
        I(:,:,k) = log(abs(fftshift(fft2(I1(:,:,k)))));
    end
    on_col_fft(:,:,i) = mean(I,3);
end

mix_col_fft = zeros(800,800,length(mix_fileCode));
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I0 = tiffread2(filePath);
    k1 = 1;
    I1 = [];    
    for k = 1:length(I0)
        if mean(I0(k).data(:)) > 100
            I1(:,:,k1) = uint16(I0(k).data);
            k1 = k1+1;
        end
    end
    I = zeros(800,800,size(I1,3));
    for k = 1:size(I1,3)
        I(:,:,k) = log(abs(fftshift(fft2(I1(:,:,k)))));
    end
    mix_col_fft(:,:,i) = mean(I,3);
end
save data_fft ch_col_fft on_col_fft mix_col_fft

% plot
figure; hold on;
for i = 1:size(ch_col_fft,3)
    I = ch_col_fft(:,:,i);
    [i0,j0] = find(I==max(I(:)));
    plot(I(i0,:),'b');
end

figure; hold on;
for i = 1:size(on_col_fft,3)
    I = on_col_fft(:,:,i);
    [i0,j0] = find(I==max(I(:)));
    plot(I(i0,:),'r');
end

%% fit FFT to Lorentz
a_ch_x = zeros(size(ch_col_fft,3),1);
x0_ch_x = a_ch_x;
gamma_ch_x = a_ch_x;
a_ch_y = a_ch_x;
x0_ch_y = a_ch_x;
gamma_ch_y = a_ch_x;
for i = 1:size(ch_col_fft,3)
    I = ch_col_fft(:,:,i);
    [i0,j0] = find(I==max(I(:)));
    [a_tmp,x0_tmp,gamma_tmp]=fitlorentz(1:size(I,1),I(i0,:));
    a_ch_x(i) = a_tmp;
    x0_ch_x(i) = x0_tmp;
    gamma_ch_x(i) = gamma_tmp;
    [a_tmp,x0_tmp,gamma_tmp]=fitlorentz(1:size(I,2),I(:,j0)');
    a_ch_y(i) = a_tmp;
    x0_ch_y(i) = x0_tmp;
    gamma_ch_y(i) = gamma_tmp;        
end

a_on_x = zeros(size(on_col_fft,3),1);
x0_on_x = a_on_x;
gamma_on_x = a_on_x;
a_on_y = a_on_x;
x0_on_y = a_on_x;
gamma_on_y = a_on_x;
for i = 1:size(on_col_fft,3)
    I = on_col_fft(:,:,i);
    [i0,j0] = find(I==max(I(:)));
    [a_tmp,x0_tmp,gamma_tmp]=fitlorentz(1:size(I,1),I(i0,:));
    a_on_x(i) = a_tmp;
    x0_on_x(i) = x0_tmp;
    gamma_on_x(i) = gamma_tmp;    
    [a_tmp,x0_tmp,gamma_tmp]=fitlorentz(1:size(I,2),I(:,j0)');    
    a_on_y(i) = a_tmp;
    x0_on_y(i) = x0_tmp;
    gamma_on_y(i) = gamma_tmp;        
end

a_mix_x = zeros(size(mix_col_fft,3),1);
x0_mix_x = a_mix_x;
gamma_mix_x = a_mix_x;
a_mix_y = a_mix_x;
x0_mix_y = a_mix_x;
gamma_mix_y = a_mix_x;
for i = 1:size(mix_col_fft,3)
    I = mix_col_fft(:,:,i);
    [i0,j0] = find(I==max(I(:)));
    [a_tmp,x0_tmp,gamma_tmp]=fitlorentz(1:size(I,1),I(i0,:));
    a_mix_x(i) = a_tmp;
    x0_mix_x(i) = x0_tmp;
    gamma_mix_x(i) = gamma_tmp;    
    [a_tmp,x0_tmp,gamma_tmp]=fitlorentz(1:size(I,2),I(:,j0)');
    a_mix_y(i) = a_tmp;
    x0_mix_y(i) = x0_tmp;
    gamma_mix_y(i) = gamma_tmp;    
end
save fft_width a_ch_x x0_ch_x gamma_ch_x a_on_x x0_on_x gamma_on_x a_mix_x x0_mix_x gamma_mix_x a_ch_y x0_ch_y gamma_ch_y a_on_y x0_on_y gamma_on_y a_mix_y x0_mix_y gamma_mix_y



%% load Ch1 all image planes, find collagen area in each plane without smoothing 
%  and find perimeter using smoothed thresholded image, calculate average of area/perimeter of each plane

ch_col_shape = zeros(length(ch_fileCode),1);
for i = 1:length(ch_fileCode)        
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end    
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
%     I = I - min(I(:));
    BW = uint16(zeros(size(I)));
    for k = 1:size(I,3)
        level = graythresh(I(:,:,k));
        BW(:,:,k) = im2bw(I(:,:,k),level);
    end
    
    I = I.*uint16(BW);
    H = fspecial('gaussian',[10,10],3);
    pars = struct([]);
    for k = 1:size(I,3)
        I(:,:,k) = uint16(filter2(H,I(:,:,k)));
        stats = regionprops(I(:,:,k)>0,'Area','Perimeter');
        for k1 = 1:length(stats)
            Area(k,k1) = stats(k1).Area;
            Perimeter(k,k1) = stats(k1).Perimeter;
        end        
    end

    Area_ch(i) = sum(Area(:));
    Perimeter_ch(i) = sum(Perimeter(:));
    
%     ch_col_shape(i) = Area_ch(i)./Perimeter_ch(i);    
    ch_col_shape(i) = squeeze(sum(sum(sum(BW))))./Perimeter_ch(i);
end

on_col_shape = zeros(length(on_fileCode),1);
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end
    end
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);      
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);        
            k1 = k1+1;
        end
    end
%     I = I - min(I(:));    
    BW = zeros(size(I));
    for k = 1:size(I,3)
        level = graythresh(I(:,:,k));
        BW(:,:,k) = im2bw(I(:,:,k),level);
    end    

    I = I.*uint16(BW);
    H = fspecial('gaussian',[10,10],3);
    Area = [];
    Perimeter = [];
    for k = 1:size(I,3)
        I(:,:,k) = uint16(filter2(H,I(:,:,k)));
        stats = regionprops(I(:,:,k)>0,'Area','Perimeter');
        for k1 = 1:length(stats)
            Area(k,k1) = stats(k1).Area;
            Perimeter(k,k1) = stats(k1).Perimeter;
        end        
    end

    Area_on(i) = sum(Area(:));
    Perimeter_on(i) = sum(Perimeter(:));
    
%     on_col_shape(i) = Area_on(i)./Perimeter_on(i);    
    on_col_shape(i) = squeeze(sum(sum(sum(BW))))./Perimeter_on(i);
end

mix_col_shape = zeros(length(mix_fileCode),1);
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
%     I = I - min(I(:));    
    BW = zeros(size(I));    
    for k = 1:size(I,3)
        level = graythresh(I(:,:,k));
        BW(:,:,k) = im2bw(I(:,:,k),level);
    end

    I = I.*uint16(BW);
    H = fspecial('gaussian',[10,10],3);
    Area = [];
    Perimeter = [];
    for k = 1:size(I,3)
        I(:,:,k) = uint16(filter2(H,I(:,:,k)));
        stats = regionprops(I(:,:,k)>0,'Area','Perimeter');
        for k1 = 1:length(stats)
            Area(k,k1) = stats(k1).Area;
            Perimeter(k,k1) = stats(k1).Perimeter;
        end        
    end

    Area_mix(i) = sum(Area(:));
    Perimeter_mix(i) = sum(Perimeter(:));
    
%     mix_col_shape(i) = Area_mix(i)./Perimeter_mix(i);    
    mix_col_shape(i) = squeeze(sum(sum(sum(BW))))./Perimeter_mix(i);
end
save totalImage_col_shape ch_col_shape on_col_shape mix_col_shape

%% load Ch1 max image plane, find collagen area without smoothing 
%  and find perimeter using smoothed thresholded image, calculate area/perimeter

ch_col_shape = zeros(length(ch_fileCode),1);
for i = 1:length(ch_fileCode)
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end
    end
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    
    [MAX,ind] = max(sum(sum(I)));
    I = I(:,:,ind);    

    BW = uint16(zeros(size(I)));
    level = graythresh(I);
    BW = im2bw(I,level);
    
    I = I.*uint16(BW);
    H = fspecial('gaussian',[10,10],3);
    
    I = uint16(filter2(H,I));
    stats = regionprops(I>0,'Area','Perimeter');
    
    for k = 1:length(stats)
        Area(k) = stats(k).Area;
        Perimeter(k) = stats(k).Perimeter;
    end
    Area_ch(i) = sum(Area);
    Perimeter_ch(i) = sum(Perimeter);
    
%     boundaries = bwboundaries(I);
%     B = [];
%     for k = 1:length(boundaries)
%         B(k) = length(boundaries{k});
%     end
%     Perimeter_ch(i) = sum(B);
    ch_col_shape(i) = mean(Area_ch(i)./Perimeter_ch(i));
%     ch_col_shape(i) = squeeze(sum(sum(BW)))./Perimeter_ch(i);
%     figure(3);subplot(7,7,i);imagesc(I);axis off
end

on_col_shape = zeros(length(on_fileCode),1);
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end
    end
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);      
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);        
            k1 = k1+1;
        end
    end
    
    [MAX,ind] = max(sum(sum(I)));
    I = I(:,:,ind);    

    BW = uint16(zeros(size(I)));
    level = graythresh(I);
    BW = im2bw(I,level);
    
    I = I.*uint16(BW);
    H = fspecial('gaussian',[10,10],3);
    pars = struct([]);
    I = uint16(filter2(H,I));
    stats = regionprops(I>0,'Area','Perimeter');
    
    for k = 1:length(stats)
        Area(k) = stats(k).Area;
        Perimeter(k) = stats(k).Perimeter;
    end
    Area_on(i) = sum(Area);
    Perimeter_on(i) = sum(Perimeter);
    
%     boundaries = bwboundaries(I);
%     B = [];
%     for k = 1:length(boundaries)
%         B(k) = length(boundaries{k});
%     end
%     Perimeter_on(i) = sum(B);    
	on_col_shape(i) = Area_on(i)./Perimeter_on(i);
% 	on_col_shape(i) = squeeze(sum(sum(BW)))./Perimeter_on(i);
%     figure(3); subplot(7,7,i); imagesc(BW); axis off
%     figure(4); subplot(7,7,i); imagesc(I>0); axis off    
%     figure(6);subplot(7,7,i);imagesc(I);axis off
end

mix_col_shape = zeros(length(mix_fileCode),1);
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end

    [MAX,ind] = max(sum(sum(I)));
    I = I(:,:,ind);    

    BW = uint16(zeros(size(I)));
    level = graythresh(I);
    BW = im2bw(I,level);
    
    I = I.*uint16(BW);
    H = fspecial('gaussian',[10,10],3);
    pars = struct([]);
    I = uint16(filter2(H,I));
    stats = regionprops(I>0,'Area','Perimeter');
    
    for k = 1:length(stats)
        Area(k) = stats(k).Area;
        Perimeter(k) = stats(k).Perimeter;
    end
    Area_mix(i) = sum(Area);
    Perimeter_mix(i) = sum(Perimeter);
    
%     boundaries = bwboundaries(I);
%     B = [];
%     for k = 1:length(boundaries)
%         B(k) = length(boundaries{k});
%     end
%     Perimeter_mix(i) = sum(B);    
    mix_col_shape(i) = Area_mix(i)./Perimeter_mix(i);
%     mix_col_shape(i) = squeeze(sum(sum(BW)))./Perimeter_mix(i);
end
save totalImage_col_shape ch_col_shape on_col_shape mix_col_shape


%% load Ch1 max Projection image, find collagen area without smoothing 
%  and find perimeter using smoothed thresholded image, calculate area/perimeter

ch_col_shape = zeros(length(ch_fileCode),1);
for i = 1:length(ch_fileCode)        
    i
    for j = 1:length(ch_fileNumber)
        if sum(ch_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end    
    filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' ch_folder{folderNo} '\Processed images\tumor\' ch_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);    
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end
    
    I = max(I,[],3);
    
    BW = uint16(zeros(size(I)));
    level = graythresh(I);
    BW = im2bw(I,level);
    
    I = I.*uint16(BW);
    H = fspecial('gaussian',[10,10],3);
    pars = struct([]);
    I = uint16(filter2(H,I));
    stats = regionprops(I>0,'Area','Perimeter');

    for k = 1:length(stats)
        Area(k) = stats(k).Area;
        Perimeter(k) = stats(k).Perimeter;
    end
    Area_ch(i) = sum(Area);
    Perimeter_ch(i) = sum(Perimeter);    
%     ch_col_shape(i) = mean(Area_ch(i)./Perimeter_ch(i));    
    ch_col_shape(i) = mean(squeeze(sum(sum(BW)))./Perimeter_ch(i));    
%     figure(3);subplot(7,7,i);imagesc(I);axis off
end

on_col_shape = zeros(length(on_fileCode),1);
for i = 1:length(on_fileCode)
    i
    for j = 1:length(on_fileNumber)
        if sum(on_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end
    end
    filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' on_folder{folderNo} '\Processed images\tumor\' on_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);      
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);        
            k1 = k1+1;
        end
    end
    
    I = max(I,[],3);

    BW = uint16(zeros(size(I)));
    level = graythresh(I);
    BW = im2bw(I,level);
    
    I = I.*uint16(BW);
    H = fspecial('gaussian',[10,10],3);
    pars = struct([]);
    I = uint16(filter2(H,I));
    stats = regionprops(I>0,'Area','Perimeter');

    for k = 1:length(stats)
        Area(k) = stats(k).Area;
        Perimeter(k) = stats(k).Perimeter;
    end
    Area_on(i) = sum(Area);
    Perimeter_on(i) = sum(Perimeter);    
% 	on_col_shape(i) = mean(Area_on(i)./Perimeter_on(i));
	on_col_shape(i) = mean(squeeze(sum(sum(BW)))./Perimeter_on(i));
%     figure(3); subplot(7,7,i); imagesc(BW); axis off
%     figure(4); subplot(7,7,i); imagesc(I>0); axis off    
%     figure(6);subplot(7,7,i);imagesc(I);axis off
end

mix_col_shape = zeros(length(mix_fileCode),1);
for i = 1:length(mix_fileCode)
    i
    for j = 1:length(mix_fileNumber)
        if sum(mix_fileNumber(1:j)) >= i
            folderNo = j;
            break
        end        
    end        
    filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName{i}];
    if ~exist(filePath)
        filePath = ['Onco-chromo\' mix_folder{folderNo} '\Processed images\tumor\' mix_fileName1{i}];
    end
    I1 = tiffread2(filePath);
    k1 = 1;
    I = uint16([]);
    for k = 1:length(I1)
        if mean(I1(k).data(:)) > 100
            I(:,:,k1) = uint16(I1(k).data);
            k1 = k1+1;
        end
    end

    I = max(I,[],3);

    BW = uint16(zeros(size(I)));
    level = graythresh(I);
    BW = im2bw(I,level);
    
    I = I.*uint16(BW);
    H = fspecial('gaussian',[10,10],3);
    pars = struct([]);
    I = uint16(filter2(H,I));
    stats = regionprops(I>0,'Area','Perimeter');

    for k = 1:length(stats)
        Area(k) = stats(k).Area;
        Perimeter(k) = stats(k).Perimeter;
    end
    Area_mix(i) = sum(Area);
    Perimeter_mix(i) = sum(Perimeter);    
%     mix_col_shape(i) = mean(Area_mix(i)./Perimeter_mix(i));
    mix_col_shape(i) = mean(squeeze(sum(sum(BW)))./Perimeter_ch(i));
end
save totalImage_col_shape ch_col_shape on_col_shape mix_col_shape


% 
% boundaries = bwboundaries(Ibw_smooth(:,:,1)>0);
% hold on;
% for k=1:length(boundaries)
%    b = boundaries{k};
%    plot(b(:,2),b(:,1),'b','LineWidth',1);
% end

% --------------------------------- lorentz.m ---------------------------------------%
function y = lorentz(x,x0,gamma)
y = 1/pi*0.5*gamma./((x-x0).^2+(0.5*gamma)^2);
plot(x,y);
drawnow

% --------------------------------- fitlorentz.m ---------------------------------------%
function [a,x0,gamma] = fitlorentz(x,y)
err = @(pars,x,y) sqrt(sum((y - pars(1)*lorentz(x,pars(2),pars(3))-pars(4)).^2));
[MAX,ind] = max(y);
a = sum(y(:));
x0 = x(ind);
gamma = (x(end)-x(1))/10;
y0 = min(y);
pars = fminsearch(@(pars)err(pars,x,y),[a,x0,gamma,y0]);
a = pars(1)
x0 = pars(2);
gamma = pars(3);
y0 = pars(4);
yfit = lorentz(x,x0,gamma);
plot(x,y,'r.',x,a*yfit+y0)
drawnow

% --------------------------------- gaussian.m ---------------------------------------%
function y = gaussian(x,a,mu,sigma)
y = a*exp(-(x-mu).^2/2/sigma^2);

% --------------------------------- fitgaussian.m ---------------------------------------%
function [mu,sigma,yfit] = fitgaussian(x,y)
err = @(pars,x,y) sqrt(sum((y - gaussian(x,pars(1),pars(2),pars(3))).^2));
[MAX,ind] = max(y);
pars = fminsearch(@(pars)err(pars,x,y),[1,x(ind),x(2)-x(1)]);
a = pars(1)
mu = pars(2);
sigma = pars(3);
yfit = gaussian(x,a,mu,sigma);
bar(x,y)
hold on
plot(x,yfit)



2012 in review

The WordPress.com stats helper monkeys prepared a 2012 annual report for this blog.

Here’s an excerpt:

About 55,000 tourists visit Liechtenstein every year. This blog was viewed about 290,000 times in 2012. If it were Liechtenstein, it would take about 5 years for that many people to see it. Your blog had more visits than a small country in Europe!

Click here to see the complete report.

IEXPLORE.EXE redirected to 0749.com or 13721.net

My internet explorer was re-directed to http://www.0749.com. I removed a few links in registry like: “…iexplore.exe” http://www.0749.com. Then I started IE, it was redirected to 13721.net instead. I deleted everything I found in registry about this address. But it still did that.

I have Norton Antivirus, but it can’t detect anything.

Finally, I found a suspicous file in the IE folder: 1802120223.dat. I changed the name. Then the problem was gone. Obviously, this file is called by IE, which is marked in registry.

Then I found a key in registry:

[HKEY_CLASSES_ROOT\CLSID\{18021223-2011-0295-951B-9EA34E34E8CC}\InprocServer32]
@=”1802120223.dat”

If I remove this key and have the file there, there won’t be any problem either.

That’s why people can’t recognize. But the antivirus software is supposed to recognize it.

Gene Sequence Search for Very Large Data File

A research fellow at Harvard asked me to write a program to search for gene sequence, such as ‘TCC’, and record the next 4 codes. The data file is 14Gb. He tried some matlab codes, and the system freezes, or keeps running and never stops.

I first tested using a loop method (V1.0). It turns out it will take a month to finish 14Gb data on my 1.8GHz Core 2 Duo/3Gb RAM PC. Then I updated it to use matrix. It turns out it will only take 1.3 hours on my 1.8Gb PC or 40 minutes on my 2.33GHz Core 2 Duo/2Gb RAM PC. It beats any codes that he got using Python or other language.

Source codes are shown as follows, where v1.0 is using loop and slow for large data file, it records wanted codes and target sequence positions; v2.0 and v2.1 record wanted codes only without positions; v2.2 and v2.3 records both codes and positions.

The latest version of the program is also posted on mathworks: http://www.mathworks.com/matlabcentral/fileexchange/31966-fast-gene-sequence-search-for-very-large-data-file

%———————————– seqSearch.m V1.0 —————————————

function [p,s]=seqSearch(f,targSeq,lc,offset,M,print)
%[p,s] = seqSearch(f,targSeq,lc,offset,M,print)
%[p,s] = seqSearch(f,'','','','','') with all default parameters.
%
%  Gene Sequence Search V1.0 - search for a specific sequence targSeq and record
%  a neighboring code sequence with an offset position.
%
%Input:
% f: data file path, format: 'filepath', e.g. 'C:\sample.seq'
% targSeq: searching target sequence, by default 'TCC'
% lc: length of the code to record, by default 4
% offset: number of offset positons from the search sequence to record the
%   code. e.g., offset is 1 for the first one after the last code (to the
%   right), and negative value means before the first code (to the left).
%   by default 1
% M: length of the chunk of the codes to read into memory each time,
%   by default 10E6
% print: display first one in every "print" number of found codes,
%   by default 50
%
%Output:
% p: positions of the target sequence found (first code of the sequence)
% s: the recorded codes
%
%   Binlin Wu - CCNY
%   bwu@sci.ccny.cuny.edu
%   Jun 10th, 2011

% targetSeq = 'TCC';
% M = 1e6;

switch nargin
    case 0
        error('Parameters needed: seqSearch(f,targSeq,lc,offset,M,print)')
    case 1
        targSeq = 'TCC'; lc = 4; offset = 1; M = 1e6; print = 100;
    case 2
        lc = 4; offset = 1; M = 1e6; print = 100;
    case 3
        offset = 1; M = 1e6; print = 100;
    case 4
        M = 1e6; print = 100;
    case 5
        print = 100;
    case 6
    otherwise
        error('too many parameters: seqSearch(f,targSeq,lc,offset,M,print)')
end

if isempty(targSeq)
    targSeq = 'TCC';
end
if isempty(lc)
    lc = 4;
end
if isempty(offset)
    offset = 1;
end
if isempty(M)
    M = 1e6;
end
if isempty(print)
    print = 100;
end

% if nargin < 5
%     error('More parameters needed, seqSearch(f,targSeq,lc,offset,M,...)')
% elseif	nargin==5
%     print = 50;
% end

lt = length(targSeq); %length of the search target sequence
fclose('all');
% f = 'C:\Documents and Settings\Bin\Desktop\sample1.txt';
fid = fopen(f,'r');

% ----- initialization -------
k = 0;
n = 0;
p = [];
s = '';
C0 = '';

fprintf('Start searching ...\n')
if offset > 0
    lMin = lt+offset+lc-1; %the least codes possible to have both a target sequence and the codes to record
    if M < lMin
        errMsg = sprintf('M must be larger than %d',lMin);
        error(errMsg);
    end
    C = textscan(fid,'%c',M,'HeaderLines',1);
    C = C{1};
    if length(C) > 0 && length(C) < lMin  %too short, length < lMin
        for i = 1:length(C)-lt+1
            if ~isequal(C(i:i+lt-1)',targSeq)
                continue
            end
            k = k+1;
            if print==1 || mod(k,print)==1
                fprintf('  - %dth %s sequence found\n',k,targSeq);
            end
            p(k) = i;
            s(k,:) = char(zeros(1,lc));
        end
    elseif length(C) >= lMin && length(C) < M  %only read once, lMin <= length < M
        i_END = length(C) - (lMin - 1);
        for i=1:i_END
            if ~isequal(C(i:i+lt-1)',targSeq)
                continue
            end
            k = k+1;
            if print==1 || mod(k,print)==1
                fprintf('  - %dth %s sequence found\n',k,targSeq);
            end
            p(k) = i;
            s(k,:) = C(i+lt-1+offset:i+lt-1+offset+lc-1);
        end
        C0 = C(i_END+1:end);
        for i = 1:length(C0)-lt+1
            if ~isequal(C0(i:i+lt-1)',targSeq)
                continue
            end
            k = k+1;
            if print==1 || mod(k,print)==1
                fprintf('  - %dth %s sequence found\n',k,targSeq);
            end
            p(k) = i_END+i;
            s(k,:) = char(zeros(1,lc));
        end
    elseif length(C) == M %data is no less than M codes
%         M = M+offset+lc-1;
        n = 1;
        fprintf('\n  In the %dth %d codes: \n',n,M);

        i_END = length(C) - (lMin - 1);
        for i=1:i_END
            if ~isequal(C(i:i+lt-1)',targSeq)
                continue
            end
            k = k+1;
            if print==1 || mod(k,print)==1
                fprintf('  - %dth %s sequence found\n',k,targSeq);
            end
            p(k) = i;
    %             if i+lt-1+offset+lc-1 > M && length(C) < M+length(C0) %run to the end of the data
    %                 s{k} = [];
    %             end
            s(k,:) = C(i+lt-1+offset:i+lt-1+offset+lc-1);

        end
        C0 = C(i_END+1:end);
        i_END = length(C);
        while(1)
            C = textscan(fid,'%c',M);
            if isempty(C{1})
                break
            end

            n = n+1;
            i_END = length(C{1});
            fprintf('\n  In the %dth %d codes: \n',n,M);
            C = [C0;C{1}];    

%             i_END = length(C) - (lMin - 1);

            for i=1:i_END
                if ~isequal(C(i:i+lt-1)',targSeq)
                    continue
                end
                k = k+1;
                if print==1 || mod(k,print)==1
                    fprintf('  - %dth %s sequence found\n',k,targSeq);
                end
                p(k)=(n-1)*M - (lMin - 1) + i;
        %             if i+lt-1+offset+lc-1 > M && length(C) < M+length(C0) %run to the end of the data
        %                 s{k} = [];
        %             end
                s(k,:) = C(i+lt-1+offset:i+lt-1+offset+lc-1);

            end
            C0 = C(i_END+1:end);
        end

        for i = 1:length(C0)-lt+1   % rest lMin-1 codes at the end
            if ~isequal(C0(i:i+lt-1)',targSeq)
                continue
            end
            k = k+1;
            if print==1 || mod(k,print)==1
                fprintf('  - %dth %s sequence found\n',k,targSeq);
            end
            if i_END == M % exactly n * M codes & target sequence found in the ending lMin-1 codes
                p(k)=n*M - (lMin - 1) + i;
            else
                p(k)=(n-1)*M - (lMin - 1) + i_END + i;
            end
            s(k,:) = char(zeros(1,lc));
        end
    end
else    % when offset is negtive
    lMin = lt-offset; %the least codes possible to have both a target sequence and the codes to record
    if M < lMin
        errMsg = sprintf('M must be larger than %d',lMin);
        error(errMsg);
    end
    if -offset < lc
        error(' |offset| can not be smaller than lc, when offset < 0');
    end
    C = textscan(fid,'%c',M,'HeaderLines',1);
    C = C{1};
    if length(C) > 0 && length(C) < lMin  %too short, length < lMin
        for i = 1:length(C)-lt+1
            if ~isequal(C(i:i+lt-1)',targSeq)
                continue
            end
            k = k+1;
            if print==1 || mod(k,print)==1
                fprintf('  - %dth %s sequence found\n',k,targSeq);
            end
            p(k) = i;
            s(k,:) = char(zeros(1,lc));
        end
    elseif length(C) >= lMin && length(C) < M  %only read once, lMin <= length < M
        for i = 1:(lMin-1)-lt+1
            if ~isequal(C(i:i+lt-1)',targSeq)
                continue
            end
            k = k+1;
            if print==1 || mod(k,print)==1
                fprintf('  - %dth %s sequence found\n',k,targSeq);
            end
            p(k) = i;
            s(k,:) = char(zeros(1,lc));
        end
        i_BEGIN = i+1; %lMin-lt+1;
        for i=i_BEGIN:length(C)-lt+1
            if ~isequal(C(i:i+lt-1)',targSeq)
                continue
            end
            k = k+1;
            if print==1 || mod(k,print)==1
                fprintf('  - %dth %s sequence found\n',k,targSeq);
            end
            p(k) = i;
            s(k,:) = C(i+offset : i+offset+lc-1);
        end
    elseif length(C) == M %data is no less than M codes
%         M = M+offset+lc-1;
        n = 1;
        fprintf('\n  In the %dth %d codes: \n',n,M);

        i_END = lMin -  lt; %(lMin-1)-lt+1 ;
        for i = 1:i_END %(lMin-1)-lt+1
            if ~isequal(C(i:i+lt-1)',targSeq)
                continue
            end
            k = k+1;
            if print==1 || mod(k,print)==1
                fprintf('  - %dth %s sequence found\n',k,targSeq);
            end
            p(k) = i;
            s(k,:) = char(zeros(1,lc));
        end

        i_BEGIN = i_END + 1; %lMin-lt+1;
        i_END = M - lt + 1;

        for i=i_BEGIN:i_END %length(C)-lt+1
            if ~isequal(C(i:i+lt-1)',targSeq)
                continue
            end
            k = k+1;
            if print==1 || mod(k,print)==1
                fprintf('  - %dth %s sequence found\n',k,targSeq);
            end
            p(k) = i;
            s(k,:) = C(i+offset:i+offset+lc-1);
        end
        C0 = C(end-(lMin-1)+1:end);
        while(1)
            C = textscan(fid,'%c',M);
            if isempty(C{1})
                break
            end

            n = n+1;
            fprintf('\n  In the %dth %d codes: \n',n,M);
            C = [C0;C{1}];

            i_BEGIN = lMin-lt+1;
            i_END = length(C)-lt+1;
            for i=i_BEGIN:i_END
                if ~isequal(C(i:i+lt-1)',targSeq)
                    continue
                end
                k = k+1;
                if print==1 || mod(k,print)==1
                    fprintf('  - %dth %s sequence found\n',k,targSeq);
                end
                p(k)=(n-1)*M - (lMin - 1) + i;
                s(k,:) = C(i+offset:i+offset+lc-1);
            end
            C0 = C(end-(lMin-1)+1:end);
        end
    end
end
fprintf('\n  Searching finished! \n',n,M);
fclose(fid);
save seqSearch.mat p s

%———————————– seqSearch.m V2.0 —————————————

function seq1 = seqSearch(f,targSeq,lc,offset,M,sFlag,fs)
%seq1 = seqSearch2(f,targSeq,lc,offset,M,sFlag,fs)
%seq1 = seqSearch2(f) with all default parameters.
%seq1 = seqSearch2(f,'','','',1e7) with all default parameters except M
%** If the dataset is very large, NOT using output is highly recoomended.
%
%  Gene Sequence Search V2.0 - search for a specific sequence and record
%  a neighboring code sequence with an offset position using matrix for
%  large datasets.
%
%Input:
% f: data file path, format: 'filepath', e.g. 'C:\sample.seq'
% targSeq: search target sequence, by default 'TCC'
% lc: length of the codes to record, by default 4
% offset: number of offset positons from the search sequence to record the
%   code. e.g., offset is 1 for the first one after the last code (to the
%   right), and negative value means before the first code (to the left).
%   by default 1
% M: length of the chunk of the codes to read into memory each time,
%   by default 10E6
% sFlag: 1 for save, 0 for not save, by default 0
% fs: filename to save, only valid when sFlag = 1, e.g. 'seq.txt'
%
%Output:
% seq1: the recorded codes
%
%   Binlin Wu - CCNY
%   bwu@sci.ccny.cuny.edu
%   Jun 10th, 2011

% set default values for the parameters
switch nargin
    case 0
        error('Parameters needed: seqSearch(f,targSeq,lc,offset,M,print)')
    case 1
        targSeq = 'TCC'; lc = 4; offset = 1; M = 1e6; sFlag = 0;
    case 2
        lc = 4; offset = 1; M = 1e6; sFlag = 0;
    case 3
        offset = 1; M = 1e6; sFlag = 0;
    case 4
        M = 1e6; sFlag = 0;
    case 5
        sFlag = 0;
    case 6
        if sFlag == 1
            error('save destination file name missing');
        end
    case 7
        if isequal(sFlag,1) && isempty(fs)
            error('save destination file name missing');
        end
    otherwise
        error('too many parameters: seqSearch(f,targSeq,lc,offset,M,print)')
end

if isempty(targSeq)
    targSeq = 'TCC';
end
if isempty(lc)
    lc = 4;
end
if isempty(offset)
    offset = 1;
end
if isempty(M)
    M = 1e6;
end
if isempty(sFlag)
    sFlag = 0;
end

% ---- existing save file overwritten confirm ---
confirm = 0;
if sFlag == 1 && exist(fs,'file') == 2
    while(1)
        if confirm == 1
            break
        end
        if sFlag == 1 && exist(fs,'file') == 2
            fprintf('Filename %s already exists!\n',fs);
            reply = upper(input('Do you want to overwrite it? Y/N [N]: ', 's'));
            if isempty(reply)
                reply = 'N';
            end
            if ~isequal(reply,'Y') && ~isequal(reply,'N')
                confirm = 0;
            elseif isequal(reply,'Y')
                confirm = 1;
            else
                fs1 = input('Please enter a new file name: ', 's');
                fprintf('Codes will be recorded in file: %s\n', fs1);
                reply = upper(input('Confirm? Y/N [Y]: ', 's'));
                if isempty(reply)
                    reply = 'Y';
                end
                if isequal(reply,'Y')
                    confirm = 1;
                    fs = fs1;
                else
                    confirm = 0;
                end
                if exist(fs1,'file') == 2
                    confirm = 0;
                end
            end
        end
    end
end
% --------------------------------------

lt = length(targSeq); %length of the search target sequence
fclose('all');
% f = '.\sample.seq';
fid = fopen(f,'r');
if sFlag == 1
    sfid = fopen(fs,'w');
end

% ----- initialization -------
n = 0;
C0 = '';
seq1 = '';

fprintf('Start searching ...\n')
lMin = lt+offset+lc-1; %the least codes possible to have both a target sequence and the codes to record
if M < lMin
    errMsg = sprintf('M must be larger than %d',lMin);
    error(errMsg);
end
C = textscan(fid,'%c',0,'HeaderLines',1);

while(1)
    C = textscan(fid,'%c',M);
    if isempty(C{1})
        break
    end

    n = n+1;
    fprintf('\n  Checking the %dth %d codes ... \n',n,M);
    C = [C0;C{1}];    

    i_END = length(C) - lMin + lt;

    C1 = [C(1:i_END) C(2:i_END+1) C(3:i_END+2)];
    SEQ = repmat('TCC',i_END,1);
    seqPos = C1 == SEQ;
    seqPos = seqPos(:,1)&seqPos(:,2)&seqPos(:,3);
    seqPos1 = logical( ...
              [0; 0; 0; seqPos(1:end);  0] + ...
              [0; 0; 0; 0;             seqPos(1:end)] + ...
              [0; 0; 0; 0;             0;               seqPos(1:end-1)] + ...
              [0; 0; 0; 0;             0;               0;               seqPos(1:end-2)]);

    if sFlag == 1
        fprintf(sfid,'%s',C(seqPos1));
    end
    if nargout == 1
        seq1 = [seq1;C(seqPos1)];
    end
    fprintf('  The %dth %d codes done. \n',n,M);
    C0 = C(end - (lMin - 1) + 1:end);
end
fprintf('\n  Searching finished! \n',n,M);
fclose('all');

%———————————– seqSearch.m V2.1 —————————————

function seq1 = seqSearch(f,targSeq,lc,offset,M,sFlag,fs)
%seq1 = seqSearch2(f,targSeq,lc,offset,M,sFlag,fs)
%seq1 = seqSearch2(f) with all default parameters.
%seq1 = seqSearch2(f,'','','',1e7) with all default parameters except M
%** If the dataset is very large, NOT using output is highly recoomended.
%
%  Gene Sequence Search V2.1 - search for a specific sequence and record
%  a neighboring code sequence with an offset position using matrix for
%  large datasets.
%
%Input:
% f: data file path, format: 'filepath', e.g. 'C:\sample.seq'
% targSeq: search target sequence, by default 'TCC'
% lc: length of the codes to record, by default 4
% offset: number of offset positons from the search sequence to record the
%   code. e.g., offset is 1 for the first one after the last code (to the
%   right), and negative value means before the first code (to the left).
%   by default 1
% M: length of the chunk of the codes to read into memory each time,
%   by default 10E6
% sFlag: 1 for save, 0 for not save, by default 0
% fs: filename to save, only valid when sFlag = 1, e.g. 'seq.txt'
%
%Output:
% seq1: the recorded codes
%
%   Binlin Wu - CCNY
%   bwu@sci.ccny.cuny.edu
%   Jun 10th, 2011

% set default values for the parameters
switch nargin
    case 0
        error('Parameters needed: seqSearch(f,targSeq,lc,offset,M,print)')
    case 1
        targSeq = 'TCC'; lc = 4; offset = 1; M = 1e6; sFlag = 0;
    case 2
        lc = 4; offset = 1; M = 1e6; sFlag = 0;
    case 3
        offset = 1; M = 1e6; sFlag = 0;
    case 4
        M = 1e6; sFlag = 0;
    case 5
        sFlag = 0;
    case 6
        if sFlag == 1
            error('save destination file name missing');
        end
    case 7
        if isequal(sFlag,1) && isempty(fs)
            error('save destination file name missing');
        end
    otherwise
        error('too many parameters: seqSearch(f,targSeq,lc,offset,M,print)')
end

if isempty(targSeq)
    targSeq = 'TCC';
end
if isempty(lc)
    lc = 4;
end
if isempty(offset)
    offset = 1;
end
if isempty(M)
    M = 1e6;
end
if isempty(sFlag)
    sFlag = 0;
end

% ---- existing save file overwritten confirm ---
confirm = 0;
if sFlag == 1 && exist(fs,'file') == 2
    while(1)
        if confirm == 1
            break
        end
        if sFlag == 1 && exist(fs,'file') == 2
            fprintf('Filename %s already exists!\n',fs);
            reply = upper(input('Do you want to overwrite it? Y/N [N]: ', 's'));
            if isempty(reply)
                reply = 'N';
            end
            if ~isequal(reply,'Y') && ~isequal(reply,'N')
                confirm = 0;
            elseif isequal(reply,'Y')
                confirm = 1;
            else
                fs1 = input('Please enter a new file name: ', 's');
                fprintf('Codes will be recorded in file: %s\n', fs1);
                reply = upper(input('Confirm? Y/N [Y]: ', 's'));
                if isempty(reply)
                    reply = 'Y';
                end
                if isequal(reply,'Y')
                    confirm = 1;
                    fs = fs1;
                else
                    confirm = 0;
                end
                if exist(fs1,'file') == 2
                    confirm = 0;
                end
            end
        end
    end
end
% --------------------------------------

lt = length(targSeq); %length of the search target sequence
fclose('all');
% f = '.\sample.seq';
fid = fopen(f,'r');
if sFlag == 1
    sfid = fopen(fs,'w');
end

% ----- initialization -------
n = 0;
C0 = '';
seq1 = '';

fprintf('Start searching ...\n')
lMin = lt+offset+lc-1; %the least codes possible to have both a target sequence and the codes to record
if M < lMin
    errMsg = sprintf('M must be larger than %d',lMin);
    error(errMsg);
end
C = textscan(fid,'%c',0,'HeaderLines',1);

while(1)
    C = textscan(fid,'%c',M);
    if isempty(C{1})
        break
    end

    n = n+1;
    fprintf('\n  Checking the %dth %d codes ... \n',n,M);
    C = [C0;C{1}];    

%     i_END = length(C) - lMin + lt;
%
%     C1 = [C(1:i_END) C(2:i_END+1) C(3:i_END+2)];
%     SEQ = repmat('TCC',i_END,1);
%     seqPos = C1 == SEQ;
%     seqPos = seqPos(:,1)&seqPos(:,2)&seqPos(:,3);
%     seqPos1 = logical( ...
%               [0; 0; 0; seqPos(1:end);  0] + ...
%               [0; 0; 0; 0;             seqPos(1:end)] + ...
%               [0; 0; 0; 0;             0;               seqPos(1:end-1)] + ...
%               [0; 0; 0; 0;             0;               0;               seqPos(1:end-2)]);

%     seqPos = strfind(C(1:i_END)','TCC');
%     seqPos1 = [seqPos+lt+offset-1;seqPos+lt+offset;seqPos+lt+offset+1;seqPos+lt+offset+2];

    seqPos = strfind(C(1:end-lMin + lt)',targSeq);
    if ~isempty(seqPos)
        seqPos1 = repmat(seqPos+lt+offset-1,lc,1);
        seqPos1 = seqPos1 + repmat([0:lc-1]',1,length(seqPos));

        seqPos1 = seqPos1(:);

        if sFlag == 1
            fprintf(sfid,'%s',C(seqPos1));
        end
        if nargout == 1
            seq1 = [seq1;C(seqPos1)];
        end
    end
    fprintf('  The %dth %d codes done. \n',n,M);
    C0 = C(end - (lMin - 1) + 1:end);
end
fprintf('\n  Searching finished! \n',n,M);
fclose('all');

%———————————– seqSearch.m V2.2 —————————————

function [seq,seqPos] = seqSearch(f,targSeq,lc,offset,M,sFlag,fs)
%[seq,seqPos] = seqSearch2(f,targSeq,lc,offset,M,sFlag,fs)
%[seq,seqPos] = seqSearch2(f) with all default parameters.
%[seq,seqPos] = seqSearch2(f,'','','',1e7) with all default parameters except M
%** If the dataset is very large, NOT using output is highly recoomended.
%
%  Gene Sequence Search V2.2 - search for a specific sequence and record
%  a neighboring code sequence with an offset position using matrix for
%  large datasets.
%
%Input:
% f: data file path, format: 'filepath', e.g. 'C:\sample.seq'
% targSeq: search target sequence, by default 'TCC'
% lc: length of the codes to record, by default 4
% offset: number of offset positons from the search sequence to record the
%   code. e.g., offset is 1 for the first one after the last code (to the
%   right), and negative value means before the first code (to the left).
%   by default 1
% M: length of the chunk of the codes to read into memory each time,
%   by default 10E6
% sFlag: 1 for save, 0 for not save, by default 0
% fs: filename to save, only valid when sFlag = 1, e.g. 'seq.txt'
%
%Output:
% seq: the recorded codes
% seqPos: positions of the target search sequence
%
%   Binlin Wu - CCNY
%   bwu@sci.ccny.cuny.edu
%   Jun 10th, 2011

% set default values for the parameters
switch nargin
    case 0
        error('Parameters needed: seqSearch(f,targSeq,lc,offset,M,print)')
    case 1
        targSeq = 'TCC'; lc = 4; offset = 1; M = 1e6; sFlag = 0;
    case 2
        lc = 4; offset = 1; M = 1e6; sFlag = 0;
    case 3
        offset = 1; M = 1e6; sFlag = 0;
    case 4
        M = 1e6; sFlag = 0;
    case 5
        sFlag = 0;
    case 6
        if sFlag == 1
            error('save destination file name missing');
        end
    case 7
        if sFlag == 1 && isempty(fs)
            error('save destination file name missing');
        end
    otherwise
        error('too many parameters: seqSearch(f,targSeq,lc,offset,M,print)')
end

if isempty(targSeq)
    targSeq = 'TCC';
end
if isempty(lc)
    lc = 4;
end
if isempty(offset)
    offset = 1;
end
if isempty(M)
    M = 1e6;
end
if isempty(sFlag)
    sFlag = 0;
end

% ---- existing save file overwritten confirm ---
confirm = 0;
if sFlag == 1 && exist(fs,'file') == 2
    while(1)
        if confirm == 1
            break
        end
        if sFlag == 1 && exist(fs,'file') == 2
            fprintf('Filename %s already exists!\n',fs);
            reply = upper(input('Do you want to overwrite it? Y/N [N]: ', 's'));
            if isempty(reply)
                reply = 'N';
            end
            if ~isequal(reply,'Y') && ~isequal(reply,'N')
                confirm = 0;
            elseif isequal(reply,'Y')
                confirm = 1;
            else
                fs1 = input('Please enter a new file name: ', 's');
                fprintf('Codes will be recorded in file: %s\n', fs1);
                reply = upper(input('Confirm? Y/N [Y]: ', 's'));
                if isempty(reply)
                    reply = 'Y';
                end
                if isequal(reply,'Y')
                    confirm = 1;
                    fs = fs1;
                else
                    confirm = 0;
                end
                if exist(fs1,'file') == 2
                    confirm = 0;
                end
            end
        end
    end
end

% ----- initialization -------

lt = length(targSeq); %length of the search target sequence
fclose('all');
% f = '.\sample.seq';
fid = fopen(f,'r');
if sFlag == 1
    sfid = fopen(fs,'w');
end

n = 0;
C0 = '';
seq = '';
seqPos = [];

% ----- start search -------

fprintf('Start searching ...\n')
if offset > 0
    lMin = lt+offset+lc-1; %the least codes possible to have both a target sequence and the codes to record
    if M < lMin
        errMsg = sprintf('M must be larger than %d',lMin);
        error(errMsg);
    end

    C = textscan(fid,'%c',M,'HeaderLines',1);
    C = C{1};

    if isempty(C)
        fprintf('\n  The data file is empty!\n')
        return
    end

    if length(C) > 0 && length(C) < lMin  %too short, length < lMin
        if nargout == 2
            seqPos0 = strfind(C',targSeq);
            if ~isempty(seqPos0)
                seq = [seq;repmat(' ',lc*length(seqPos0),1)];
                seqPos = [seqPos seqPos0];
            end
        end

    elseif length(C) >= lMin && length(C) < M  %only read once, lMin <= length < M

        seqPos0 = strfind(C(1:end - (lMin - 1) + (lt-1))',targSeq);

        if ~isempty(seqPos0)
            seqPos1 = repmat(seqPos0+lt-1+offset,lc,1);
            seqPos1 = seqPos1 + repmat([0:lc-1]',1,length(seqPos0));
            seqPos1 = seqPos1(:);

            if sFlag == 1
                fprintf(sfid,'%s',C(seqPos1));
            end
            if nargout >= 1
                seq = [seq;C(seqPos1)];
            end
            if nargout >= 2
                seqPos = [seqPos seqPos0];
            end
        end             

        seqPos0 = strfind(C(end - (lMin - 1) + 1:end)',targSeq);
        if ~isempty(seqPos0)
            if nargout >= 1
                seq = [seq;repmat(' ',lc*length(seqPos0),1)];
            end
            if nargout >= 2
                seqPos = [seqPos seqPos0 + length(C) - (lMin - 1)];
            end
        end

    elseif length(C) == M %data is no less than M codes
        n=1;
        LastChunkLength = length(C);
        fprintf('\n  Checking the %dth %d codes ... \n',n,M);

        seqPos0 = strfind(C(1:end - (lMin - 1) + (lt-1))',targSeq);

        if ~isempty(seqPos0)
            seqPos1 = repmat(seqPos0+lt-1+offset,lc,1);
            seqPos1 = seqPos1 + repmat([0:lc-1]',1,length(seqPos0));
            seqPos1 = seqPos1(:);

            if sFlag == 1
                fprintf(sfid,'%s',C(seqPos1));
            end
            if nargout >= 1
                seq = [seq;C(seqPos1)];
            end
            if nargout >= 2
                seqPos = [seqPos seqPos0];
            end
        end
        fprintf('  The %dth %d codes done. \n',n,M);
        C0 = C(end - (lMin - 1) + 1:end);                 

        while(1)
            C = textscan(fid,'%c',M);
            if isempty(C{1})
                break
            end
            LastChunkLength = length(C{1});
            n = n+1;
            fprintf('\n  Checking the %dth %d codes ... \n',n,M);
            C = [C0;C{1}];    

        %     C1 = [C(1:i_END) C(2:i_END+1) C(3:i_END+2)];
        %     SEQ = repmat('TCC',i_END,1);
        %     seqPos = C1 == SEQ;
        %     seqPos = seqPos(:,1)&seqPos(:,2)&seqPos(:,3);
        %     seqPos1 = logical( ...
        %               [0; 0; 0; seqPos(1:end); 0;               0;               0            ] + ...
        %               [0; 0; 0; 0;             seqPos(1:end);   0;               0            ] + ...
        %               [0; 0; 0; 0;             0;               seqPos(1:end);   0            ] + ...
        %               [0; 0; 0; 0;             0;               0;
        %               seqPos(1:end)] );

    %         seqPos = strfind(C(1:i_END)','TCC');
    %         seqPos1 = [seqPos+lt+offset-1;seqPos+lt+offset;seqPos+lt+offset+1;seqPos+lt+offset+2];

            seqPos0 = strfind(C(1:end - (lMin - 1) + (lt-1))',targSeq);
            if ~isempty(seqPos0)
                seqPos1 = repmat(seqPos0+lt-1+offset,lc,1);
                seqPos1 = seqPos1 + repmat([0:lc-1]',1,length(seqPos0));
                seqPos1 = seqPos1(:);

                if sFlag == 1
                    fprintf(sfid,'%s',C(seqPos1));
                end
                if nargout >= 1
                    seq = [seq;C(seqPos1)];
                    if nargout >= 2
                        seqPos = [seqPos seqPos0+(n-1)*M-(lMin-1)];
                    end
                end
            end
            fprintf('  The %dth %d codes done. \n',n,M);
            C0 = C(end - (lMin - 1) + 1:end);
        end
        if nargout == 2
            seqPos0 = strfind(C0',targSeq);
            if ~isempty(seqPos0)
                seq = [seq;repmat(' ',lc*length(seqPos0),1)];
                seqPos = [seqPos seqPos0+LastChunkLength+(n-1)*M-(lMin-1)];
            end
        end
    end 

else
    lMin = lt-offset; %the least codes possible to have both a target sequence and the codes to record
    if M < lMin
        errMsg = sprintf('M must be larger than %d',lMin);
        error(errMsg);
    end
    if -offset < lc
        error(' |offset| can not be smaller than lc, when offset < 0');
    end

    C = textscan(fid,'%c',M,'HeaderLines',1);
    C = C{1};

    if isempty(C)
        fprintf('\n  The data file is empty!\n')
        return
    end    

    if length(C) > 0 && length(C) < lMin  %too short, length < lMin

        seqPos0 = strfind(C',targSeq);
        if nargout == 2
            if ~isempty(seqPos0)
                seq = [seq;repmat(' ',lc*length(seqPos0),1)];
                seqPos = seqPos0;
            end
        end

    elseif length(C) >= lMin && length(C) < M  %only read once, lMin <= length < M

        seqPos0 = strfind(C(1:lMin-1)',targSeq);
        if nargout == 2
            if ~isempty(seqPos0)
                seq = [seq;repmat(' ',lc*length(seqPos0),1)];
                seqPos = seqPos0;
            end
        end     

        seqPos0 = strfind(C(-offset+1:end)',targSeq);

        if ~isempty(seqPos0)
            seqPos1 = repmat(seqPos0,lc,1);
            seqPos1 = seqPos1 + repmat([0:lc-1]',1,length(seqPos0));
            seqPos1 = seqPos1(:);

            if sFlag == 1
                fprintf(sfid,'%s',C(seqPos1));
            end
            if nargout >= 1
                seq = [seq;C(seqPos1)];
                if nargout >= 2
                    seqPos = [seqPos seqPos0-offset];
                end
            end
        end
    elseif length(C) == M %data is no less than M codes
        n = 1;
        fprintf('\n  Checking the %dth %d codes ... \n',n,M);

        seqPos0 = strfind(C(1:lMin-1)',targSeq);
        if nargout == 2
            if ~isempty(seqPos0)
                seq = [seq;repmat(' ',lc*length(seqPos0),1)];
                seqPos = seqPos0;
            end
        end         

        seqPos0 = strfind(C(-offset+1:end)',targSeq);
        if ~isempty(seqPos0)
            seqPos1 = repmat(seqPos0,lc,1);
            seqPos1 = seqPos1 + repmat([0:lc-1]',1,length(seqPos0));
            seqPos1 = seqPos1(:);

            if sFlag == 1
                fprintf(sfid,'%s',C(seqPos1));
            end
            if nargout >= 1
                seq = [seq;C(seqPos1)];
                if nargout >= 2
                    seqPos = [seqPos seqPos0-offset];
                end
            end
        end
        fprintf('  The %dth %d codes done. \n',n,M);
        C0 = C(end-(lMin-1)+1:end);

        while(1)
            C = textscan(fid,'%c',M);
            if isempty(C{1})
                break
            end

            n = n+1;
            fprintf('\n  Checking the %dth %d codes ... \n',n,M);
            C = [C0;C{1}];    

            seqPos0 = strfind(C(-offset+1:end)',targSeq);

            if ~isempty(seqPos0)
                seqPos1 = repmat(seqPos0,lc,1);
                seqPos1 = seqPos1 + repmat([0:lc-1]',1,length(seqPos0));
                seqPos1 = seqPos1(:);
                if sFlag == 1
                    fprintf(sfid,'%s',C(seqPos1));
                end
                if nargout >= 1
                    seq = [seq;C(seqPos1)];
                    if nargout >= 2
                        seqPos = [seqPos seqPos0-offset+M*(n-1)-(lMin-1)];
                    end
                end
            end
            fprintf('  The %dth %d codes done. \n',n,M);
            C0 = C(end-(lMin-1)+1:end);
        end

    end
end

fprintf('\n  Searching finished! \n',n,M);
fclose('all');

%———————————– seqSearch.m V2.3 —————————————

function [seq,seqPos] = seqSearch(f,nHL,targSeq,lc,offset,M,sFlag,fs)
%[seq,seqPos] = seqSearch2(f,nHL,targSeq,lc,offset,M,sFlag,fs)
%[seq,seqPos] = seqSearch2(f) with all default parameters.
%[seq,seqPos] = seqSearch2(f,'','','','',1e7) with all default parameters except M
%** If the dataset is very large, NOT using output is highly recoomended.
%
%  Gene Sequence Search V2.3 - search for a specific sequence and record
%  a neighboring code sequence with an offset position using matrix for
%  large datasets.
%
%Input:
% f: data file path, format: 'filepath', e.g. 'C:\sample.seq'
% targSeq: search target sequence, by default 'TCC'
% lc: length of the codes to record, by default 4
% offset: number of offset positons from the search sequence to record the
%   code. e.g., offset is 1 for the first one after the last code (to the
%   right), and negative value means before the first code (to the left).
%   by default 1
% M: length of the chunk of the codes to read into memory each time,
%   by default 10E6
% sFlag: 1 for save, 0 for not save, by default 0
% fs: filename to save, only valid when sFlag = 1, e.g. 'seq.txt'
% nHL: number of headlines you want to remove from the data, by default 0
%
%Output:
% seq: the recorded codes
% seqPos: positions of the target search sequence
%
%   Binlin Wu - CCNY
%   bwu@sci.ccny.cuny.edu
%   Jun 10th, 2011

% set default values for the parameters
switch nargin
    case 0
        error('Parameters needed: seqSearch(f,nHL,targSeq,lc,offset,M,sFlag,fs)')
    case 1
        nHL = 0; targSeq = 'TCC'; lc = 4; offset = 1; M = 1e6; sFlag = 0;
    case 2
        targSeq = 'TCC'; lc = 4; offset = 1; M = 1e6; sFlag = 0;
    case 3
        lc = 4; offset = 1; M = 1e6; sFlag = 0;
    case 4
        offset = 1; M = 1e6; sFlag = 0;
    case 5
        M = 1e6; sFlag = 0;
    case 6
        sFlag = 0;
    case 7
        if sFlag == 1
            error('save destination file name missing');
        end
    case 8
        if isequal(sFlag,1) && isempty(fs)
            error('save destination file name missing');
        end
    otherwise
        error('too many parameters: seqSearch(f,nHL,targSeq,lc,offset,M,sFlag,fs)')
end

if isempty(nHL)
    nHL = 0;
end
if isempty(targSeq)
    targSeq = 'TCC';
end
if isempty(lc)
    lc = 4;
end
if isempty(offset)
    offset = 1;
end
if isempty(M)
    M = 1e6;
end
if isempty(sFlag)
    sFlag = 0;
end

% ---- existing save file overwritten confirm ---
confirm = 0;
if sFlag == 1 && exist(fs,'file') == 2
    while(1)
        if confirm == 1
            break
        end
        if sFlag == 1 && exist(fs,'file') == 2
            fprintf('Filename %s already exists!\n',fs);
            reply = upper(input('Do you want to overwrite it? Y/N [N]: ', 's'));
            if isempty(reply)
                reply = 'N';
            end
            if ~isequal(reply,'Y') && ~isequal(reply,'N')
                confirm = 0;
            elseif isequal(reply,'Y')
                confirm = 1;
            else
                fs1 = input('Please enter a new file name: ', 's');
                fprintf('Codes will be recorded in file: %s\n', fs1);
                reply = upper(input('Confirm? Y/N [Y]: ', 's'));
                if isempty(reply)
                    reply = 'Y';
                end
                if isequal(reply,'Y')
                    confirm = 1;
                    fs = fs1;
                else
                    confirm = 0;
                end
                if exist(fs1,'file') == 2
                    confirm = 0;
                end
            end
        end
    end
end

% ----- initialization -------

lt = length(targSeq); %length of the search target sequence
fclose('all');
% f = '.\sample.seq';
fid = fopen(f,'r');
if sFlag == 1
    sfid = fopen(fs,'w');
end

n = 0;
C0 = '';
seq = '';
seqPos = [];

% ----- start search -------

fprintf('Start searching ...\n')
if offset > 0
    lMin = lt+offset+lc-1; %the least codes possible to have both a target sequence and the codes to record
    if M < lMin
        errMsg = sprintf('M must be larger than %d',lMin);
        error(errMsg);
    end

    C = textscan(fid,'%c',M,'HeaderLines',nHL);
    C = C{1};

    if isempty(C) % && n == 1
        fprintf('\n  The data file is empty!\n')
        return

    elseif length(C) > 0 && length(C) < lMin  %too short, length < lMin
        if nargout == 2
            seqPos0 = strfind(C',targSeq);
            if ~isempty(seqPos0)
                seq = [seq;repmat(' ',lc*length(seqPos0),1)];
                seqPos = [seqPos seqPos0];
            end
        end

    else
        while(1)
            if length(C) == 0
                break
            else
                n = n+1;
                fprintf('\n  Checking the %dth %d codes ... \n',n,M);
                LastChunkLength = length(C);
                C = [C0;C];
                seqPos0 = strfind(C(1:end - (lMin - 1) + (lt-1))',targSeq);
%                 seqPos0 = strfind(C(1:lengthC + lt-1)',targSeq); %not good for data length <=M
                if ~isempty(seqPos0)
                    seqPos1 = repmat(seqPos0+lt-1+offset,lc,1);
                    seqPos1 = seqPos1 + repmat([0:lc-1]',1,length(seqPos0));
                    seqPos1 = seqPos1(:);

                    if sFlag == 1
                        fprintf(sfid,'%s',C(seqPos1));
                    end
                    if nargout >= 1
                        seq = [seq;C(seqPos1)];
                        if nargout >= 2
                            if n==1 %data length <= M
                                seqPos = seqPos0;
                            else %data length > M
%                                 seqPos = [seqPos seqPos0+length(C)-(lMin-1)+(n-1)*M-(lMin-1)];  %not good for data length <=M
                                seqPos = [seqPos seqPos0+(n-1)*M-(lMin-1)];
                            end
                        end
                    end
                end
                fprintf('  The %dth %d codes done. \n',n,M);
                C0 = C(end - (lMin - 1) +1:end);
%                 C0 = C(lengthC + 1:end)  %not good for data length <=M
            end
            C = textscan(fid,'%c',M);
            C = C{1};
        end

        if nargout == 2
            seqPos0 = strfind(C0',targSeq);
            if ~isempty(seqPos0)
                seq = [seq;repmat(' ',lc*length(seqPos0),1)];
                seqPos = [seqPos seqPos0+(n-1)*M+LastChunkLength-(lMin-1)];
            end
        end
    end
else
    lMin = lt-offset; %the least codes possible to have both a target sequence and the codes to record
    if M < lMin
        errMsg = sprintf('M must be larger than %d',lMin);
        error(errMsg);
    end
    if -offset < lc
        error(' |offset| can not be smaller than lc, when offset < 0');
    end

    C = textscan(fid,'%c',M,'HeaderLines',1);
    C = C{1};

    if isempty(C) % && n == 1
        fprintf('\n  The data file is empty!\n')
        return

    elseif length(C) > 0 && length(C) < lMin  %too short, length < lMin
        if nargout == 2
            seqPos0 = strfind(C',targSeq);
            if ~isempty(seqPos0)
                seq = [seq;repmat(' ',lc*length(seqPos0),1)];
                seqPos = seqPos0;
            end
        end

    else
        if nargout == 2
            seqPos0 = strfind(C(1:lMin-1)',targSeq);
            if ~isempty(seqPos0)
                seq = [seq;repmat(' ',lc*length(seqPos0),1)];
                seqPos = seqPos0;
            end
        end
        while(1)
            if length(C) == 0
                break
            else
                n = n+1;
                fprintf('\n  Checking the %dth %d codes ... \n',n,M);
                C = [C0;C];
                seqPos0 = strfind(C(-offset+1:end)',targSeq);

                if ~isempty(seqPos0)
                    seqPos1 = repmat(seqPos0,lc,1);
                    seqPos1 = seqPos1 + repmat([0:lc-1]',1,length(seqPos0));
                    seqPos1 = seqPos1(:);

                    if sFlag == 1
                        fprintf(sfid,'%s',C(seqPos1));
                    end
                    if nargout >= 1
                        seq = [seq;C(seqPos1)];
                        if nargout >= 2
                            if n == 1
                                seqPos = [seqPos seqPos0-offset];
                            else
                                seqPos = [seqPos seqPos0-offset+M*(n-1)-(lMin-1)];
                            end
                        end
                    end
                end
                fprintf('  The %dth %d codes done. \n',n,M);
                C0 = C(end-(lMin-1)+1:end);
            end
            C = textscan(fid,'%c',M);
            C=C{1};
        end
    end
end

fprintf('\n  Searching finished! \n',n,M);
fclose('all');

Watch ANY Online Videos/TV (PPStream, PPLive etc) on TV via DLNA

Sometimes, we can’t get the streaming source for online videos, TV shows, and Live TV so as to send it to TV via DLNA using a DLNA server such as TVersity.

For online TV like PPStream, PPLive, these popular Chinese online TV software, there were some methods introduced using VLC: Link 1Link 2, and Link 3. The idea is this. VLC captures the online streaming, and generates a secondary streaming, which is taken by TVersity and shared on the local network. But it seems none of these works for me for uusee, pplive and ppstream, since I can’t find the streaming source for these software.

Streaming the whole screen will be an ultimate method that can work with any media source via DLNA streaming.

More details about VLC and TVersity can be found here: https://alenblog.wordpress.com/2011/04/21/stream-computer-desktop-to-tv-using-dlna-tversity-and-vlc/ and https://alenblog.wordpress.com/2011/04/07/tversity-dlna-for-samsung-tv/.

Here is how it’s done.

VLC can capture the screen using screen://. But it turns out it can’t capture audio simultaneously using input-slave: dshow://. The workaround would be to use DirecShow with a directshow filter, such as VH Screen Capture.

VH screen captures video or audio only in MP4. But both captured, video is not shown, even though MediaInfo shows video data is stored. For DIV3+MP3 (asf), VH can’t see video at all, even though video data was also stored. Without transcoding, output .asf file is fine. But the data file is so big, and VLC crashes soon. The picture captured in MP4 are really great. It’s the best picture I’ve got compared to other ways mentioned here.

UScreenCapture works fine with DIV3+mp3(asf) and MP4. But the picture quality is a lot worse, even if using larger BitRate and generate big data file. Anyway, for the time being, by using directshow (UScreenCapture), video and audio can be both captured.

Another problem is my SoundMax HD audio sound card doesn’t have Stereo Mix. So I use a stereo audio cable to connect my speaker and mic ports, sending sound from the speaker to mic directly. VLC captures the audio from the microphone.

If you can see your screen, but not the video, you may need disable the hardware accelaration, which can be found at the property of your graphics.

I make the streaming using the VLC GUI. I select the video and audio sources, transcode it to DIV3+MP3, 25fps, 4096 kbps, and then stream it to mms://127.0.0.1:1234 or http://127.0.0.1:1234, which is further shared by TVersity. Then I can watch on my Samsung HDTV anything on PC. It works great, particular for PPStream that I use it very often.

This way, I don’t need worry about that the media type for TVersity or Samsung TV any more. Whatever I can see on PC, I can see it on TV.

** When using VLC, try to transcode with some codecs that are natively supported by your TV. Then TVersity doesn’t need do another transcoding. That will work much better.

Run two Matlab functions simutaneously in parallel

My friend gave me a project: run two matlab loops (can be put in two functions) simultaneously. They have to be synchronized. They have to start at the same time, and they have to synchronize every step in loop. The problem was simplified because the two loops with run with a fairly stable step size. So what I need do is to start the two functions at the same time with error less than 0.1ms.

To run two matlab functions, we need parallel computing, or simply use two matlab sessions. For two matlab sessions, we need make a feedback in one session to trigger the other session. There are different ways to make the trigger.

1. Use hard drive to communite between two session is one way. Basically, we need first initialize a start.ini file with -1:

fid=fopen('start.ini','w');
fprintf(fid,'%d',-1);
fclose(fid);

Then one session start function 1 and write 1 to the ini file:

fid=fopen('start.ini','w');
fprintf(fid,'%d',1);
fclose(fid);

In the other session, it keeps checking the ini file, it starts once it reads 1.

while 1
    fid=fopen('start.ini','r');
    k=fscanf(fid,'%d',1);
    fclose(fid);
    if k==1
    %-------- reset start.ini to -1 start ---------------%
        fid=fopen('start.ini','w');
        fprintf(fid,'%d',-1);
        fclose(fid);
    %-------- reset start.ini to -1 end ---------------%
        break
    end
end

This works in a lot of scenarios. Here the problem with this is it takes a few milliseconds, and it fluctuates. So this is not a good solution.

2. Use a shared memory. Ram should be faster than Hard Drive. Basically, we can have one session write a variable to the memory, and have the other session read it, just like writing and reading from hard drive, but faster. The problem is two matlab sessions use two separate workspaces residing in different memory locations, not shared.

Actually, if we could, creating a shared variable in CPU cache (register) is the fastest way to read and write a variable for a computer. But I can’t find anything in matlab about that.

There is method called DDE (Dynamic Data Exchange) that makes different applications communicate.  But I didn’t find anything that can make DDE work between two matlab session.

Then I found a matlab program sharedmatrix on mathworks: http://www.mathworks.com/matlabcentral/fileexchange/28572-sharedmatrix. This program creates a memory block that will be shared by two matlab sessions. I managed to compile it in Matlab 2010a and Ubuntu. But I can’t make it work. This program seems very unstable. When the second session read the shared memory, matlab crashes. So I gave up this method.

There are other methods to make the feedback, but I can’t think of a way that can really solve my problem.

3. Use parallel computing

Another method to make the synchronization is to use parallel computing. A more detailed post can be found here about parallel computing in matlab: https://alenblog.wordpress.com/2011/03/31/parallel-computing-in-matlab/.

Basically, we can have two threads that can call two functions simultaneously. The traditional multithreading is actually through time division. So the time actually not synchronized for the two functions. But I’m not sure what the time difference is. I found a post here, where basically some codes were written in c++ for multithreading, it can be compiled in matlab to MEX file, and then be called. But like in the comment, I got same errors when compiling:

mex mythreadprog.cpp

Error mythreadprog.cpp: 11 syntax error; found `MyThread’ expecting `;’
Error mythreadprog.cpp: 19 redeclaration of `mexFunction’ previously declared at .\mex.h 135
Warning mythreadprog.cpp: 23 assignment of pointer to void function(pointer to void) to pointer to int function(pointer to void)
Error mythreadprog.cpp: 33 syntax error; found `MyThread’ expecting `;’
Warning mythreadprog.cpp: 58 missing return value
Error mythreadprog.cpp: 33 undefined size for `void cdecl’
4 errors, 2 warnings

C:\PROGRA~1\MATLAB\R2007B\BIN\MEX.PL: Error: Compile of ‘mythreadprog.cpp’ failed.

The new multithreading parallel computing using multi-core computer is built in recent matlab versions and can really do the job.

I found different ways to do parallel computing in matlab. The way posted here is using parallel computing, but it doesn’t make the synchronization. The code is shown below:

 
funList = {@fun1,@fun2,@fun3};
dataList = {data1,data2,data3}; %# or pass file names 

parfor i=1:length(funList)
    %# call the function
    funList{i}(dataList{i});
end

Finally, I got a way using matlabpool open two labs, and use spmd to run two functions in two labs separately. labBarrier is used to synchronize the two labs. The code is shown below.

function data = main

struct data1 data2;
data1.x = 1;
data2.x = 2;
funList = {@fun1,@fun2};
dataList = {data1,data2}; % or pass file names 

matlabpool open 2
spmd
    labBarrier
	funList{labindex}(dataList{labindex})
end
matlabpool close

function y = fun1(data1)
time1=clock;
fprintf('%02d : %02d : %02f\n',time1(4),time1(5),time1(6))
labindex
pause(5)
disp('run f1');

function y = fun2(data2)
time2=clock;
fprintf('%02d : %02d : %02f\n',time2(4),time2(5),time2(6))
labindex
pause(5);
disp('run f2')

We can see the two functions are starts at the same time. But the clock command only shows to the accuracy of 1ms.

Follow

Get every new post delivered to your Inbox.

Join 69 other followers