1. using System;
  2. using System.IO;
  3.  
  4. namespace Dev102
  5. {
  6. internal class Interview5
  7. {
  8. /// <summary>
  9. /// Maximum Number of Records that could be held in Memory
  10. /// Simulates the memory constrain
  11. /// </summary>
  12. private const int MaxInitRecords = 100;
  13.  
  14. /// <summary>
  15. /// Number of test Records
  16. /// </summary>
  17. private const int totalRecords = 100000;
  18. private const string RandomFile = @"C:\Temp\Mess.txt";
  19. const string targetFile = @"C:\Temp\Mess_Cleaned.txt";
  20. private const int SizeWithNewline = 12;
  21.  
  22. public static void CreateDataFile()
  23. {
  24. if (File.Exists(RandomFile))
  25. File.Delete(RandomFile);
  26. using (StreamWriter output = File.CreateText(RandomFile))
  27. {
  28. Random rnd = new Random();
  29. for (int i = 0; i < totalRecords; i++)
  30. {
  31. output.WriteLine((rnd.NextDouble()*10000000000).ToString("0000000000"));
  32. }
  33. }
  34. }
  35.  
  36. public static void SortRandomData()
  37. {
  38. //Initialize
  39. string tmpFileA = Path.GetTempFileName();
  40. string tmpFileB = Path.GetTempFileName();
  41.  
  42. using (StreamReader source = File.OpenText(RandomFile))
  43. {
  44. StreamWriter outputA = File.CreateText(tmpFileA);
  45. StreamWriter outputB = File.CreateText(tmpFileB);
  46. StreamWriter current = outputA;
  47.  
  48. while (! source.EndOfStream)
  49. {
  50. int MaximumMemoryLimit = SizeWithNewline*MaxInitRecords;
  51. char[] initialBlock = new char[MaximumMemoryLimit];
  52. //Read the First Block of Data to sort.
  53. //This uses more Memory then allowed (Stores the input twice),
  54. //but I am using the simple version so I can process the whole chunk at once
  55. //If you are picky, then imagine I loop over the input, and read one line at a time ;)
  56. int dataRead;
  57. dataRead = source.ReadBlock(initialBlock, 0, MaximumMemoryLimit);
  58.  
  59. //Handle "short" Input... Trim the Array down so it only holds the elements that are actually read
  60. if (dataRead < MaximumMemoryLimit)
  61. Array.Resize(ref initialBlock, dataRead);
  62. String[] array =
  63. new String(initialBlock).Split(new string[] {Environment.NewLine},
  64. StringSplitOptions.RemoveEmptyEntries);
  65. //Quicksort the Array - Array.Sort uses a Quicksort so dont reinvent the wheel
  66. Array.Sort(array);
  67. current.WriteLine(String.Join(Environment.NewLine, array));
  68. //Toggle the Destination File
  69. current = (current == outputA) ? outputB : outputA;
  70. }
  71. outputA.Dispose();
  72. outputB.Dispose();
  73. Console.Out.WriteLine("tmpFileA = {0}", tmpFileA);
  74. Console.Out.WriteLine("tmpFileB = {0}", tmpFileB);
  75. }
  76. MergeSort(tmpFileA, tmpFileB);
  77. }
  78.  
  79. private static void MergeSort(string firstFile, string secondFile)
  80. {
  81. String[] fileNames = new string[] {"", "", firstFile, secondFile};
  82. int Runs = 0;
  83. while ((new FileInfo(fileNames[3])).Length > 0)
  84. {
  85. Runs++;
  86. Console.Out.WriteLine("Run Nr. = {0}", Runs);
  87. //Remove Old tmp Files
  88. RemoveOldTmpFiles(fileNames);
  89. SwapFilenamesAndGetTmpFilenames(fileNames);
  90. //Open Files for Reading and Writing
  91. StreamReader readerA = File.OpenText(fileNames[0]);
  92. StreamReader readerB = File.OpenText(fileNames[1]);
  93. StreamWriter writerA = File.CreateText(fileNames[2]);
  94. StreamWriter writerB = File.CreateText(fileNames[3]);
  95. StreamWriter current = writerA;
  96. string currentRecordA = String.Empty, currentRecordB = String.Empty,lastRecord = String.Empty;
  97. while ((!(readerA.EndOfStream && readerB.EndOfStream)))
  98. {
  99. //Read new Record
  100. if (currentRecordA == string.Empty && !readerA.EndOfStream)
  101. currentRecordA = readerA.ReadLine();
  102. if (currentRecordB == string.Empty && !readerB.EndOfStream)
  103. currentRecordB = readerB.ReadLine();
  104. if (CompareRecords(currentRecordA, currentRecordB, lastRecord))
  105. {
  106. current.WriteLine(currentRecordA);
  107. lastRecord = currentRecordA;
  108. currentRecordA = string.Empty;
  109. }
  110. else if (CompareRecords(currentRecordB, currentRecordA, lastRecord))
  111. {
  112. current.WriteLine(currentRecordB);
  113. lastRecord = currentRecordB;
  114. currentRecordB = string.Empty;
  115. }
  116. else
  117. {
  118. current = (current == writerA) ? writerB : writerA;
  119. lastRecord = string.Empty;
  120. }
  121. }
  122. //ONLY One record might be left.... Either A or B, but not both
  123. if (currentRecordB != String.Empty)
  124. current.WriteLine(currentRecordB);
  125. if (currentRecordA != String.Empty)
  126. current.WriteLine(currentRecordA);
  127. DisposeStreams(writerA, readerA, readerB, writerB);
  128. }
  129.  
  130. File.Delete(targetFile);
  131. File.Move(fileNames[2], targetFile);
  132. //Remove Source Files
  133. RemoveOldTmpFiles(fileNames);
  134. //Remove Empty File
  135. if (File.Exists(fileNames[3]))
  136. File.Delete(fileNames[3]);
  137. Console.Out.WriteLine("Done...");
  138. }
  139.  
  140. /// <summary>
  141. /// Disposes the streams.
  142. /// </summary>
  143. /// <param name="writerA">The writer A.</param>
  144. /// <param name="readerA">The reader A.</param>
  145. /// <param name="readerB">The reader B.</param>
  146. /// <param name="writerB">The writer B.</param>
  147. private static void DisposeStreams(StreamWriter writerA, StreamReader readerA, StreamReader readerB, StreamWriter writerB)
  148. {
  149. readerA.Dispose();
  150. readerB.Dispose();
  151. writerA.Dispose();
  152. writerB.Dispose();
  153. }
  154.  
  155. /// <summary>
  156. /// Record comparer. Checks if Records A is smaller then B, and detects when its time for a "switch"
  157. /// </summary>
  158. /// <param name="currentRecordA">The current record A.</param>
  159. /// <param name="currentRecordB">The current record B.</param>
  160. /// <param name="lastRecord">The last record.</param>
  161. /// <returns></returns>
  162. private static bool CompareRecords(string currentRecordA, string currentRecordB, string lastRecord)
  163. {
  164.  
  165. //Dont write Empty Records
  166. if (currentRecordA == String.Empty)
  167. return false;
  168. //We have to switch files... Both are smaller then last record
  169. if (currentRecordA.CompareTo(lastRecord) < 0 && currentRecordB.CompareTo(lastRecord) < 0)
  170. return false;
  171. // Plain Compare here
  172. if (currentRecordB == String.Empty)
  173. return currentRecordA.CompareTo(lastRecord) >= 0;
  174. else
  175. if (currentRecordA.CompareTo(currentRecordB) <= 0)
  176. return currentRecordA.CompareTo(lastRecord) >= 0 || lastRecord == String.Empty;
  177. else
  178. return currentRecordB.CompareTo(lastRecord) <0;
  179. }
  180.  
  181. /// <summary>
  182. /// Swaps the filenames and get new TMP filenames.
  183. /// </summary>
  184. /// <param name="fileNames">The file names.</param>
  185. private static void SwapFilenamesAndGetTmpFilenames(string[] fileNames)
  186. {
  187. fileNames[0] = fileNames[2];
  188. fileNames[1] = fileNames[3];
  189. fileNames[2] = Path.GetTempFileName();
  190. fileNames[3] = Path.GetTempFileName();
  191. }
  192.  
  193. /// <summary>
  194. /// Removes the old TMP files.
  195. /// </summary>
  196. /// <param name="fileNames">The file names.</param>
  197. private static void RemoveOldTmpFiles(string[] fileNames)
  198. {
  199. if (File.Exists(fileNames[0]))
  200. File.Delete(fileNames[0]);
  201. if (File.Exists(fileNames[1]))
  202. File.Delete(fileNames[1]);
  203. }
  204. }
  205. }