What are Percentiles

import numpy as np

# Sample data
data = [14, 23, 7, 45, 18, 32, 9, 27, 12, 5, 38, 16, 21, 8, 34, 29, 11, 4, 37, 25]

# Calculate the 25th, 50th (median), and 75th percentiles
percentile_25 = np.percentile(data, 25)
percentile_50 = np.percentile(data, 50)  # This is the median
percentile_75 = np.percentile(data, 75)

print(f"25th percentile: {percentile_25}")
print(f"50th percentile (median): {percentile_50}")
print(f"75th percentile: {percentile_75}")

Here is a more detailed example that explains the steps involved:

import numpy as np

# Sample data
data = [14, 23, 7, 45, 18, 32, 9, 27, 12, 5, 38, 16, 21, 8, 34, 29, 11, 4, 37, 25]

# Sort the data to understand the distribution
data_sorted = sorted(data)
print(f"Sorted data: {data_sorted}")

# Calculate and print the 10th, 25th, 50th, 75th, and 90th percentiles
percentiles = [10, 25, 50, 75, 90]
for p in percentiles:
    percentile_value = np.percentile(data, p)
    print(f"{p}th percentile: {percentile_value}")

# Let's calculate the percentiles manually for a better understanding

# 10th percentile: Value below which 10% of the data lies
percentile_10_manual = data_sorted[int(0.1 * len(data)) - 1]
print(f"10th percentile (manual calculation): {percentile_10_manual}")

# 25th percentile: Value below which 25% of the data lies
percentile_25_manual = data_sorted[int(0.25 * len(data)) - 1]
print(f"25th percentile (manual calculation): {percentile_25_manual}")

# 50th percentile: Value below which 50% of the data lies (median)
percentile_50_manual = data_sorted[int(0.5 * len(data)) - 1]
print(f"50th percentile (manual calculation): {percentile_50_manual}")

# 75th percentile: Value below which 75% of the data lies
percentile_75_manual = data_sorted[int(0.75 * len(data)) - 1]
print(f"75th percentile (manual calculation): {percentile_75_manual}")

# 90th percentile: Value below which 90% of the data lies
percentile_90_manual = data_sorted[int(0.9 * len(data)) - 1]
print(f"90th percentile (manual calculation): {percentile_90_manual}")