C-Sharp | Java | Python | Swift | GO | WPF | Ruby | Scala | F# | JavaScript | SQL | PHP | Angular | HTML
Step 1: A List with 7 int elements is created. The List contains duplicate elements for the values 3 and 4.
Step 2: We use the Distinct() extension method on the List. Duplicates are removed. With ToList, we convert back to the original List type.
ToListC# program that removes duplicates in List
using System;
using System.Collections.Generic;
using System.Linq;
class Program
{
static void Main()
{
// Step 1: create List with duplicate elements.
List<int> list = new List<int>();
list.Add(1);
list.Add(2);
list.Add(3);
list.Add(3);
list.Add(4);
list.Add(4);
list.Add(4);
foreach (int value in list)
{
Console.WriteLine("Before: {0}", value);
}
// Step 2: get distinct elements and convert into a list again.
List<int> distinct = list.Distinct().ToList();
foreach (int value in distinct)
{
Console.WriteLine("After: {0}", value);
}
}
}
Output
Before: 1
Before: 2
Before: 3
Before: 3
Before: 4
Before: 4
Before: 4
After: 1
After: 2
After: 3
After: 4
Tip: We can use Distinct() with a special IEqualityComparer to treat all objects with an equal field as duplicates of one another.
IEqualityComparerDistinct: This System.Linq extension method can be invoked on arrays, Lists, or any IEnumerable type.
DistinctThus: We can make the result have each object with a different key. We remove duplicates considering only a part of each object.
C# program that finds objects with unique fields
using System;
using System.Collections.Generic;
using System.Linq;
class Item
{
public int Key;
public string Value;
}
class ItemEqualityComparer : IEqualityComparer<Item>
{
public bool Equals(Item x, Item y)
{
// Two items are equal if their keys are equal.
return x.Key == y.Key;
}
public int GetHashCode(Item obj)
{
return obj.Key.GetHashCode();
}
}
class Program
{
static void Main()
{
var list = new List<Item>();
list.Add(new Item() { Key = 5, Value = "Bird" });
list.Add(new Item() { Key = 5, Value = "Frog" });
list.Add(new Item() { Key = 10, Value = "Bird" });
list.Add(new Item() { Key = 10, Value = "Frog" });
// Get distinct items using IEqualityComparer.
// ... The Key field is used to see if two items are equal.
var result = list.Distinct(new ItemEqualityComparer());
foreach (var item in result)
{
Console.WriteLine($"DISTINCT ITEM = {item.Key} {item.Value}");
}
}
}
Output
DISTINCT ITEM = 5 Bird
DISTINCT ITEM = 10 Bird
And: If an element that comes before the current one is the same as the current one, we have a duplicate.
Warning: This method would become extremely slow on large collections of elements.
C# program that uses iterative removal
using System;
using System.Collections.Generic;
class Program
{
public static List<string> RemoveDuplicatesIterative(List<string> items)
{
List<string> result = new List<string>();
for (int i = 0; i < items.Count; i++)
{
// Assume not duplicate.
bool duplicate = false;
for (int z = 0; z < i; z++)
{
if (items[z] == items[i])
{
// This is a duplicate.
duplicate = true;
break;
}
}
// If not duplicate, add to result.
if (!duplicate)
{
result.Add(items[i]);
}
}
return result;
}
static void Main()
{
// Call method with this input.
List<string> input = new List<string>() { "x", "x", "y", "y", "y", "z" };
List<string> output = RemoveDuplicatesIterative(input);
Console.WriteLine("Input: " + string.Join(",", input));
Console.WriteLine("Output: " + string.Join(",", output));
}
}
Output
Input: x,x,y,y,y,z
Output: x,y,z
Note: This is similar to the Distinct method in its implementation. It may be slower for tiny lists than the iterative method.
C# program that uses HashSet for duplicates
using System;
using System.Collections.Generic;
class Program
{
public static List<string> RemoveDuplicatesSet(List<string> items)
{
// Use HashSet to maintain table of duplicates encountered.
var result = new List<string>();
var set = new HashSet<string>();
for (int i = 0; i < items.Count; i++)
{
// If not duplicate, add to result.
if (!set.Contains(items[i]))
{
result.Add(items[i]);
// Record as a future duplicate.
set.Add(items[i]);
}
}
return result;
}
static void Main()
{
var input = new List<string>() { "a", "b", "a", "b", "b", "b", "c" };
var output = RemoveDuplicatesSet(input);
Console.WriteLine("Input: " + string.Join(",", input));
Console.WriteLine("Output: " + string.Join(",", output));
}
}
Output
Input: a,b,a,b,b,b,c
Output: a,b,c
Note: The list has the values placed in an alternating order, and then all the remaining duplicates are placed at the end.
C# program that generates duplicates
using System;
using System.Collections.Generic;
class Program
{
public static List<string> GetListWithDuplicates(int length, int duplicatesLength)
{
const string duplicateString = "bird";
List<string> result = new List<string>();
// Add all required strings.
for (int i = 0; i < length; i++)
{
result.Add("cat" + i);
// Add duplicate strings if required.
if (duplicatesLength > 0)
{
result.Add(duplicateString);
duplicatesLength--;
}
}
// Add all remaining duplicates.
for (int i = 0; i < duplicatesLength; i++)
{
result.Add(duplicateString);
}
// Return result.
return result;
}
static void Main()
{
Console.WriteLine(string.Join(",", GetListWithDuplicates(5, 2)));
Console.WriteLine(string.Join(",", GetListWithDuplicates(1, 0)));
Console.WriteLine(string.Join(",", GetListWithDuplicates(4, 4)));
}
}
Output
cat0,bird,cat1,bird,cat2,cat3,cat4
cat0
cat0,bird,cat1,bird,cat2,bird,cat3,bird
Type T: We can use T to mean any type. So we can remove duplicates in a list of strings, ints or anything that has a hash code.
C# program that uses generic method
using System;
using System.Collections.Generic;
class Program
{
public static List<T> RemoveDuplicatesSet<T>(List<T> items)
{
// Use HashSet to remember items seen.
var result = new List<T>();
var set = new HashSet<T>();
for (int i = 0; i < items.Count; i++)
{
// Add if needed.
if (!set.Contains(items[i]))
{
result.Add(items[i]);
set.Add(items[i]);
}
}
return result;
}
static void Main()
{
var input = new List<string>() { "j", "x", "j", "x", "y" };
var output = RemoveDuplicatesSet(input);
Console.WriteLine("Input: " + string.Join(",", input));
Console.WriteLine("Output: " + string.Join(",", output));
}
}
Output
Input: j,x,j,x,y
Output: j,x,y
Version 1: This version of the code uses the Distinct extension method to remove duplicate elements.
Version 2: In this code, we use the HashSet to remove duplicate elements—we build up a new List by checking the HashSet.
Version 3: We use a nested for-loop to build up a List containing all the non-duplicated elements.
Result: For the 3-element lists, the nested for-loop (version 3) is fastest. For larger lists, the HashSet method (version 2) is fastest.
Thus: For short lists, prefer a for-loop. For long lists, prefer a HashSet method. A branch could test for the optimal dedude strategy.
C# program that benchmarks duplicate removal methods
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
class Program
{
static void Main()
{
for (int test = 0; test < 3; test++)
{
// Get test data.
var testData = GetTestData(test);
var max = testData.Item4;
var s1 = Stopwatch.StartNew();
for (int i = 0; i < max; i++)
{
// Version 1: use Distinct.
var unique = testData.Item2.Distinct().ToList();
if (unique.Count != testData.Item3)
{
return;
}
}
s1.Stop();
var s2 = Stopwatch.StartNew();
for (int i = 0; i < max; i++)
{
// Version 2: use HashSet.
var unique = RemoveDuplicatesSet(testData.Item2);
if (unique.Count != testData.Item3)
{
return;
}
}
s2.Stop();
var s3 = Stopwatch.StartNew();
for (int i = 0; i < max; i++)
{
// Version 3: use nested for-loop.
var unique = RemoveDuplicatesIterative(testData.Item2);
if (unique.Count != testData.Item3)
{
return;
}
}
s3.Stop();
// Write description.
Console.WriteLine(testData.Item1);
// Write timings.
Console.WriteLine(s1.Elapsed.TotalMilliseconds + " ms");
Console.WriteLine(s2.Elapsed.TotalMilliseconds + " ms");
Console.WriteLine(s3.Elapsed.TotalMilliseconds + " ms");
}
}
static Tuple<string, List<string>, int, int> GetTestData(int test)
{
// Tuple contains description string, list, the unique element count, and iterations for test.
switch (test)
{
default:
case 0:
return new Tuple<string, List<string>, int, int>("3 ELEMENT LIST, 0 DUPLICATES",
GetListWithDuplicates(3, 0),
3,
100000);
case 1:
return new Tuple<string, List<string>, int, int>("300 ELEMENT LIST, 100 DUPLICATES",
GetListWithDuplicates(200, 100),
201,
1000);
case 2:
return new Tuple<string, List<string>, int, int>("3000 ELEMENT LIST, 1000 DUPLICATES",
GetListWithDuplicates(2000, 1000),
2001,
100);
}
}
public static List<string> GetListWithDuplicates(int length, int duplicatesLength)
{
const string duplicateString = "bird";
List<string> result = new List<string>();
for (int i = 0; i < length; i++)
{
result.Add("cat" + i);
if (duplicatesLength > 0)
{
result.Add(duplicateString);
duplicatesLength--;
}
}
for (int i = 0; i < duplicatesLength; i++)
{
result.Add(duplicateString);
}
return result;
}
public static List<string> RemoveDuplicatesSet(List<string> items)
{
var result = new List<string>();
var set = new HashSet<string>();
for (int i = 0; i < items.Count; i++)
{
if (!set.Contains(items[i]))
{
result.Add(items[i]);
set.Add(items[i]);
}
}
return result;
}
public static List<string> RemoveDuplicatesIterative(List<string> items)
{
List<string> result = new List<string>();
for (int i = 0; i < items.Count; i++)
{
bool duplicate = false;
for (int z = 0; z < i; z++)
{
if (items[z] == items[i])
{
duplicate = true;
break;
}
}
if (!duplicate)
{
result.Add(items[i]);
}
}
return result;
}
}
Output
3 ELEMENT LIST, 0 DUPLICATES
37.9524 ms
19.9176 ms
8.0359 ms
300 ELEMENT LIST, 100 DUPLICATES
23.1117 ms
20.499 ms
188.2431 ms
3000 ELEMENT LIST, 1000 DUPLICATES
22.7601 ms
21.4638 ms
1765.6447 ms