SMLlab

Posts

4

September 06, 2025

import numpy as np import matplotlib.pyplot as plt import seaborn as sns # Simulate a skewed salary distribution (e.g., exponential distribution) np.random.seed(42) population = np.random.exponential(scale=70000, size=10000) # Skewed salaries # Take 10 random samples, each with 50 salaries, and compute their means sample_means = [] for _ in range(10): sample = np.random.choice(population, size=50, replace=True) sample_means.append(np.mean(sample)) # Plotting the sample means distribution plt.figure(figsize=(8, 5)) sns.histplot(sample_means, bins=10, kde=True, color='skyblue', edgecolor='black') plt.title("Sampling Distribution of Mean Salaries (10 samples of size 50)") plt.xlabel("Sample Mean Salary") plt.ylabel("Frequency") plt.grid(True) plt.tight_layout() plt.show()

3

September 06, 2025

import pandas as pd import seaborn as sns import matplotlib.pyplot as plt # Sample dataset data = { 'EngineSize_L': [1.2, 1.6, 2.0, 2.5, 3.0, 1.8, 2.2, 3.5, 4.0, 2.8], 'FuelEfficiency_MPG': [40, 35, 30, 28, 24, 33, 29, 20, 18, 26], 'Price_USD': [18000, 20000, 24000, 28000, 35000, 22000, 26000, 40000, 45000, 33000] } df = pd.DataFrame(data) # Pair plot sns.pairplot(df) plt.suptitle("Pair Plot: Engine Size, Fuel Efficiency, Price", y=1.02) plt.tight_layout() plt.show() # Correlation matrix corr_matrix = df.corr(numeric_only=True) print("Correlation Matrix:\n", corr_matrix) # Heatmap of correlations plt.figure(figsize=(6, 4)) sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f") plt.title('Correlation Matrix: Car Features') plt.tight_layout() plt.show()

2

September 06, 2025

import pandas as pd import matplotlib.pyplot as plt import seaborn as sns # Sample dataset data = { 'Satisfaction': ['Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'High', 'Medium', 'Low', 'High', 'Medium', 'Low', 'High', 'Medium', 'Low', 'High', 'High', 'Medium', 'Low', 'High'], 'RepeatPurchase': ['No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes'] } df = pd.DataFrame(data) # Count plot to show satisfaction vs repeat purchase plt.figure(figsize=(8, 5)) sns.countplot(data=df, x='Satisfaction', hue='RepeatPurchase') plt.title('Customer Satisfaction vs Rep...

1

September 06, 2025

import numpy as np # Sample dataset of house prices (in lakhs) house_prices = [45, 55, 60, 62, 68, 70, 75, 80, 85, 90, 100, 110, 125] # Calculate the 25th and 75th percentiles q1 = np.percentile(house_prices, 25) q3 = np.percentile(house_prices, 75) # Calculate the Interquartile Range (IQR) iqr = q3 - q1 # Print the results print(f"25th Percentile (Q1): {q1}") print(f"75th Percentile (Q3): {q3}") print(f"Interquartile Range (IQR): {iqr}")