using System;
using System.IO;
namespace Dev102
{
internal class Interview5
{
/// <summary>
/// Maximum Number of Records that could be held in Memory
/// Simulates the memory constrain
/// </summary>
private const int MaxInitRecords = 100;
/// <summary>
/// Number of test Records
/// </summary>
private const int totalRecords = 100000;
private const string RandomFile = @"C:\Temp\Mess.txt";
const string targetFile = @"C:\Temp\Mess_Cleaned.txt";
private const int SizeWithNewline = 12;
public static void CreateDataFile()
{
if (File.Exists(RandomFile))
File.Delete(RandomFile);
using (StreamWriter output = File.CreateText(RandomFile))
{
Random rnd =
new Random
();
for (int i = 0; i < totalRecords; i++)
{
output.WriteLine((rnd.NextDouble()*10000000000).ToString("0000000000"));
}
}
}
public static void SortRandomData()
{
//Initialize
string tmpFileA = Path.GetTempFileName();
string tmpFileB = Path.GetTempFileName();
using (StreamReader source = File.OpenText(RandomFile))
{
StreamWriter outputA = File.CreateText(tmpFileA);
StreamWriter outputB = File.CreateText(tmpFileB);
StreamWriter current = outputA;
while (! source.EndOfStream)
{
int MaximumMemoryLimit = SizeWithNewline*MaxInitRecords;
char[] initialBlock =
new char[MaximumMemoryLimit
];
//Read the First Block of Data to sort.
//This uses more Memory then allowed (Stores the input twice),
//but I am using the simple version so I can process the whole chunk at once
//If you are picky, then imagine I loop over the input, and read one line at a time ;)
int dataRead;
dataRead = source.ReadBlock(initialBlock, 0, MaximumMemoryLimit);
//Handle "short" Input... Trim the Array down so it only holds the elements that are actually read
if (dataRead < MaximumMemoryLimit)
Array.Resize(ref initialBlock, dataRead);
String[] array =
new String(initialBlock
).
Split(new string[] {Environment.
NewLine},
StringSplitOptions.RemoveEmptyEntries);
//Quicksort the Array - Array.Sort uses a Quicksort so dont reinvent the wheel
Array.Sort(array);
current.WriteLine(String.Join(Environment.NewLine, array));
//Toggle the Destination File
current = (current == outputA) ? outputB : outputA;
}
outputA.Dispose();
outputB.Dispose();
Console.Out.WriteLine("tmpFileA = {0}", tmpFileA);
Console.Out.WriteLine("tmpFileB = {0}", tmpFileB);
}
MergeSort(tmpFileA, tmpFileB);
}
private static void MergeSort(string firstFile, string secondFile)
{
String[] fileNames =
new string[] {"",
"", firstFile, secondFile
};
int Runs = 0;
while ((new FileInfo
(fileNames
[3])).
Length >
0) {
Runs++;
Console.Out.WriteLine("Run Nr. = {0}", Runs);
//Remove Old tmp Files
RemoveOldTmpFiles(fileNames);
SwapFilenamesAndGetTmpFilenames(fileNames);
//Open Files for Reading and Writing
StreamReader readerA = File.OpenText(fileNames[0]);
StreamReader readerB = File.OpenText(fileNames[1]);
StreamWriter writerA = File.CreateText(fileNames[2]);
StreamWriter writerB = File.CreateText(fileNames[3]);
StreamWriter current = writerA;
string currentRecordA = String.Empty, currentRecordB = String.Empty,lastRecord = String.Empty;
while ((!(readerA.EndOfStream && readerB.EndOfStream)))
{
//Read new Record
if (currentRecordA == string.Empty && !readerA.EndOfStream)
currentRecordA = readerA.ReadLine();
if (currentRecordB == string.Empty && !readerB.EndOfStream)
currentRecordB = readerB.ReadLine();
if (CompareRecords(currentRecordA, currentRecordB, lastRecord))
{
current.WriteLine(currentRecordA);
lastRecord = currentRecordA;
currentRecordA = string.Empty;
}
else if (CompareRecords(currentRecordB, currentRecordA, lastRecord))
{
current.WriteLine(currentRecordB);
lastRecord = currentRecordB;
currentRecordB = string.Empty;
}
else
{
current = (current == writerA) ? writerB : writerA;
lastRecord = string.Empty;
}
}
//ONLY One record might be left.... Either A or B, but not both
if (currentRecordB != String.Empty)
current.WriteLine(currentRecordB);
if (currentRecordA != String.Empty)
current.WriteLine(currentRecordA);
DisposeStreams(writerA, readerA, readerB, writerB);
}
File.Delete(targetFile);
File.Move(fileNames[2], targetFile);
//Remove Source Files
RemoveOldTmpFiles(fileNames);
//Remove Empty File
if (File.Exists(fileNames[3]))
File.Delete(fileNames[3]);
Console.Out.WriteLine("Done...");
}
/// <summary>
/// Disposes the streams.
/// </summary>
/// <param name="writerA">The writer A.</param>
/// <param name="readerA">The reader A.</param>
/// <param name="readerB">The reader B.</param>
/// <param name="writerB">The writer B.</param>
private static void DisposeStreams(StreamWriter writerA, StreamReader readerA, StreamReader readerB, StreamWriter writerB)
{
readerA.Dispose();
readerB.Dispose();
writerA.Dispose();
writerB.Dispose();
}
/// <summary>
/// Record comparer. Checks if Records A is smaller then B, and detects when its time for a "switch"
/// </summary>
/// <param name="currentRecordA">The current record A.</param>
/// <param name="currentRecordB">The current record B.</param>
/// <param name="lastRecord">The last record.</param>
/// <returns></returns>
private static bool CompareRecords(string currentRecordA, string currentRecordB, string lastRecord)
{
//Dont write Empty Records
if (currentRecordA == String.Empty)
return false;
//We have to switch files... Both are smaller then last record
if (currentRecordA.CompareTo(lastRecord) < 0 && currentRecordB.CompareTo(lastRecord) < 0)
return false;
// Plain Compare here
if (currentRecordB == String.Empty)
return currentRecordA.CompareTo(lastRecord) >= 0;
else
if (currentRecordA.CompareTo(currentRecordB) <= 0)
return currentRecordA.CompareTo(lastRecord) >= 0 || lastRecord == String.Empty;
else
return currentRecordB.CompareTo(lastRecord) <0;
}
/// <summary>
/// Swaps the filenames and get new TMP filenames.
/// </summary>
/// <param name="fileNames">The file names.</param>
private static void SwapFilenamesAndGetTmpFilenames(string[] fileNames)
{
fileNames[0] = fileNames[2];
fileNames[1] = fileNames[3];
fileNames[2] = Path.GetTempFileName();
fileNames[3] = Path.GetTempFileName();
}
/// <summary>
/// Removes the old TMP files.
/// </summary>
/// <param name="fileNames">The file names.</param>
private static void RemoveOldTmpFiles(string[] fileNames)
{
if (File.Exists(fileNames[0]))
File.Delete(fileNames[0]);
if (File.Exists(fileNames[1]))
File.Delete(fileNames[1]);
}
}
}