| DataAnalysis (version 0.0.1) | index DataAnalysis.html | 
This package implements tools to build python package and tools.
 
>>> from DataAnalysis import DataAnalysis
>>> from pprint import pprint
>>> data = [
...     {
...             "age": 45,
...             "pay": 80000,
...             "level": 12,
...     },
...     {
...             "age": 18,
...             "pay": 25000,
...             "level": 3,
...     },
...     {
...             "age": 45,
...             "pay": 40000,
...             "level": 3,
...     }
... ]
>>> analyse = DataAnalysis(data)
>>> pprint([x for x in analyse.get_medians()])
[statistictype(key='age', value=45),
 statistictype(key='pay', value=40000),
 statistictype(key='level', value=3)]
>>> pprint([x for x in analyse.get_deviations()])
[statistictype(key='age', value=12.727922061357855),
 statistictype(key='pay', value=23213.98046197353),
 statistictype(key='level', value=4.242640687119285)]
>>> pprint([x for x in analyse.get_variances()])
[statistictype(key='age', value=243),
 statistictype(key='pay', value=808333333.3333334),
 statistictype(key='level', value=27)]
>>> pprint([x for x in analyse.get_averages()])
[statistictype(key='age', value=36.0),
 statistictype(key='pay', value=48333.333333333336),
 statistictype(key='level', value=6.0)]
>>> pprint([x for x in analyse.get_maximums()])
[statistictype(key='age', value=valuetype(key='age', value=45, counter=2)),
 statistictype(key='pay', value=valuetype(key='pay', value=80000, counter=1)),
 statistictype(key='level', value=valuetype(key='level', value=12, counter=1))]
>>> pprint([x for x in analyse.get_minimums()])
[statistictype(key='age', value=valuetype(key='age', value=18, counter=1)),
 statistictype(key='pay', value=valuetype(key='pay', value=25000, counter=1)),
 statistictype(key='level', value=valuetype(key='level', value=3, counter=2))]
>>> pprint([x for x in analyse.count_values_by_keys()])
[statistictype(key='age', value=2),
 statistictype(key='pay', value=3),
 statistictype(key='level', value=2)]
>>> analyse.count_values_by_key('level')
statistictype(key='level', value=2)
>>> pprint([x for x in analyse.sort_by_value()])
[valuetype(key='level', value=3, counter=2),
 valuetype(key='level', value=12, counter=1),
 valuetype(key='age', value=18, counter=1),
 valuetype(key='age', value=45, counter=2),
 valuetype(key='pay', value=25000, counter=1),
 valuetype(key='pay', value=40000, counter=1),
 valuetype(key='pay', value=80000, counter=1)]
>>> pprint([x for x in analyse.sort_values_by_sum()])
[valuetype(key='level', value=3, counter=2),
 valuetype(key='level', value=12, counter=1),
 valuetype(key='age', value=18, counter=1),
 valuetype(key='age', value=45, counter=2),
 valuetype(key='pay', value=25000, counter=1),
 valuetype(key='pay', value=40000, counter=1),
 valuetype(key='pay', value=80000, counter=1)]
>>> pprint([x for x in analyse.sort_keys_by_sum()])
[('pay', 145000), ('age', 108), ('level', 18)]
>>> pprint([x for x in analyse.sort_by_counter()])
[valuetype(key='age', value=18, counter=1),
 valuetype(key='pay', value=80000, counter=1),
 valuetype(key='pay', value=25000, counter=1),
 valuetype(key='pay', value=40000, counter=1),
 valuetype(key='level', value=12, counter=1),
 valuetype(key='age', value=45, counter=2),
 valuetype(key='level', value=3, counter=2)]
>>> pprint([x for x in analyse.sort_by_key()])
[valuetype(key='age', value=45, counter=2),
 valuetype(key='age', value=18, counter=1),
 valuetype(key='level', value=12, counter=1),
 valuetype(key='level', value=3, counter=2),
 valuetype(key='pay', value=80000, counter=1),
 valuetype(key='pay', value=25000, counter=1),
 valuetype(key='pay', value=40000, counter=1)]
>>> pprint([x for x in analyse.keys_frequences()])
[statistictype(key='age', value=0.07441809186500006),
 statistictype(key='pay', value=99.91317889282416),
 statistictype(key='level', value=0.012403015310833345)]
>>> pprint([x for x in analyse.keys_values_frequences()])
[statistictype(key=valuetype(key='age', value=45, counter=2), value=0.06201507655416673),
 statistictype(key=valuetype(key='age', value=18, counter=1), value=0.012403015310833345),
 statistictype(key=valuetype(key='pay', value=80000, counter=1), value=55.12451249259265),
 statistictype(key=valuetype(key='pay', value=25000, counter=1), value=17.226410153935202),
 statistictype(key=valuetype(key='pay', value=40000, counter=1), value=27.562256246296325),
 statistictype(key=valuetype(key='level', value=12, counter=1), value=0.008268676873888896),
 statistictype(key=valuetype(key='level', value=3, counter=2), value=0.004134338436944448)]
>>> pprint([x for x in analyse.keys_values_count_frequences()])
[statistictype(key=valuetype(key='age', value=45, counter=2), value=22.22222222222222),
 statistictype(key=valuetype(key='age', value=18, counter=1), value=11.11111111111111),
 statistictype(key=valuetype(key='pay', value=80000, counter=1), value=11.11111111111111),
 statistictype(key=valuetype(key='pay', value=25000, counter=1), value=11.11111111111111),
 statistictype(key=valuetype(key='pay', value=40000, counter=1), value=11.11111111111111),
 statistictype(key=valuetype(key='level', value=12, counter=1), value=11.11111111111111),
 statistictype(key=valuetype(key='level', value=3, counter=2), value=22.22222222222222)]
>>> pprint([x for x in analyse.values_frequences()])
[statistictype(key=45, value=22.22222222222222),
 statistictype(key=18, value=11.11111111111111),
 statistictype(key=80000, value=11.11111111111111),
 statistictype(key=25000, value=11.11111111111111),
 statistictype(key=40000, value=11.11111111111111),
 statistictype(key=12, value=11.11111111111111),
 statistictype(key=3, value=22.22222222222222)]
>>> pprint([x for x in analyse.value_frequence(45)])
[45, 22.22222222222222]
>>> pprint([x for x in analyse.key_value_count_frequence(analyse.valuetype(key='pay', value=80000, counter=1))])
[valuetype(key='pay', value=80000, counter=1), 11.11111111111111]
>>> pprint([x for x in analyse.key_value_frequence(analyse.valuetype(key='pay', value=80000, counter=1))])
[valuetype(key='pay', value=80000, counter=1), 55.12451249259265]
>>> pprint([x for x in analyse.key_frequence('pay')])
['pay', 99.91317889282416]
>>> statistictypes = [DataAnalysis.statistictype(key=45, value=22.22222222222222),
...  DataAnalysis.statistictype(key=18, value=11.11111111111111),
...  DataAnalysis.statistictype(key=80000, value=11.11111111111111),
...  DataAnalysis.statistictype(key=25000, value=11.11111111111111),
...  DataAnalysis.statistictype(key=40000, value=11.11111111111111),
...  DataAnalysis.statistictype(key=12, value=11.11111111111111),
...  DataAnalysis.statistictype(key=3, value=22.22222222222222)]
>>> pprint([x for x in DataAnalysis.sort_statistictype_by_key(statistictypes)])
[statistictype(key=3, value=22.22222222222222),
 statistictype(key=12, value=11.11111111111111),
 statistictype(key=18, value=11.11111111111111),
 statistictype(key=45, value=22.22222222222222),
 statistictype(key=25000, value=11.11111111111111),
 statistictype(key=40000, value=11.11111111111111),
 statistictype(key=80000, value=11.11111111111111)]
>>> pprint([x for x in DataAnalysis.sort_statistictype_by_value(statistictypes)])
[statistictype(key=18, value=11.11111111111111),
 statistictype(key=80000, value=11.11111111111111),
 statistictype(key=25000, value=11.11111111111111),
 statistictype(key=40000, value=11.11111111111111),
 statistictype(key=12, value=11.11111111111111),
 statistictype(key=45, value=22.22222222222222),
 statistictype(key=3, value=22.22222222222222)]
>>> pprint([x for x in DataAnalysis.sort_dict_by_value({"a": 2, "b": 1, "c": 3})])
['b', 'a', 'c']
>>> pprint([x for x in analyse.get_values_by_key('pay')])
[valuetype(key='pay', value=80000, counter=1),
 valuetype(key='pay', value=25000, counter=1),
 valuetype(key='pay', value=40000, counter=1)]
>>> pprint([x for x in analyse.get_gt(DataAnalysis.valuetype(key='level', value=3, counter=2))])
[valuetype(key='age', value=45, counter=2),
 valuetype(key='age', value=18, counter=1),
 valuetype(key='pay', value=80000, counter=1),
 valuetype(key='pay', value=25000, counter=1),
 valuetype(key='pay', value=40000, counter=1),
 valuetype(key='level', value=12, counter=1),
 None]
>>> pprint([x for x in analyse.get_lt(DataAnalysis.valuetype(key='level', value=3, counter=2))])
[None, None, None, None, None, None, None]
>>> pprint([x for x in analyse.get_lt(DataAnalysis.valuetype(key='pay', value=25000, counter=1))])
[valuetype(key='age', value=45, counter=2),
 valuetype(key='age', value=18, counter=1),
 None,
 None,
 None,
 valuetype(key='level', value=12, counter=1),
 valuetype(key='level', value=3, counter=2)]
>>> analyse.count_lt(DataAnalysis.valuetype(key='pay', value=25000, counter=1))
6
>>> analyse.count_gt(DataAnalysis.valuetype(key='pay', value=25000, counter=1))
2
>>> analyse.count_value('level', 3)
valuetype(key='level', value=3, counter=2)
>>> analyse.get_minimum('level')
statistictype(key='level', value=valuetype(key='level', value=3, counter=2))
>>> analyse.get_maximum('level')
statistictype(key='level', value=valuetype(key='level', value=12, counter=1))
>>> analyse.get_sum('level')
statistictype(key='level', value=18)
>>> analyse.get_average('level')
statistictype(key='level', value=6.0)
>>> analyse.get_variance('level')
statistictype(key='level', value=27)
>>> analyse.get_deviation('level')
statistictype(key='level', value=4.242640687119285)
>>> analyse.get_median('level')
statistictype(key='level', value=3)
>>> data = [
...     {
...             "filename": "__init__.py",
...             "size": 255,
...             "lines": 5,
...             "modification": datetime.now(),
...     },
...     {
...             "filename": "WebScripts.py",
...             "size": 256520,
...             "lines": 3214,
...             "modification": datetime(2016, 6, 22, 12, 25, 48),
...     },
...     {
...             "filename": "future_python_file.py",
...     },
... ]
>>> analyse = DataAnalysis(data)
>>> pprint([x for x in analyse.get_sums()])
[statistictype(key='filename', value=None),
 statistictype(key='size', value=256775),
 statistictype(key='lines', value=3219),
 statistictype(key='modification', value=None)]
>>> analyse = DataAnalysis(data, fields=['size', 'lines', 'modification'])
>>> pprint([x for x in analyse.get_sums()])
[statistictype(key='size', value=256775),
 statistictype(key='lines', value=3219),
 statistictype(key='modification', value=None)]
>>> analyse = DataAnalysis(data, filter_=lambda x: x.get("size"))
>>> len(analyse.keys['filename'])
2
>>> analyse = DataAnalysis(data)
>>> len(analyse.keys['filename'])
3
>>> data = (
...     (1,2,3),
...     (500, 412, 561),
...     (721, 216, 683),
...     (10,25,56),
... )
>>> analyse = DataAnalysis(data)
>>> len(analyse.keys[0])
4
>>> analyse = DataAnalysis(data, filters={0: lambda x: x < 100})
>>> len(analyse.keys[0])
2
>>> analyse = DataAnalysis(data, fields=[1,3])
>>> len(analyse.keys[0])
0
>>> pprint([x for x in analyse.get_all_values()])
[valuetype(key=1, value=2, counter=1),
 valuetype(key=1, value=412, counter=1),
 valuetype(key=1, value=216, counter=1),
 valuetype(key=1, value=25, counter=1)]
>>> data = [{"key1": 1, "key2": 2}] * 3 + [{"key1": 2, "key2": 1}] * 2
>>> DataAnalysis.print_data(data, {"key1": "Column name", "key2": 5}, True)
|Column name|key2 |
|-----------|-----|
|1          |2    |
|1          |2    |
|1          |2    |
|1          |2    |
|2          |1    |
|2          |1    |
>>> DataAnalysis.print_data(data, {"key1": "Column name", "key2": 2}, True)
|Column name|ke|
|-----------|--|
|1          |2 |
|1          |2 |
|1          |2 |
|1          |2 |
|2          |1 |
|2          |1 |
>>> DataAnalysis.print_data(data)
|key1|key2|
|----|----|
|1   |2   |
|1   |2   |
|1   |2   |
|1   |2   |
|2   |1   |
|2   |1   |
>>> analysis = DataAnalysis(data)
>>> analysis.statistictypes_printer(analysis.get_deviations())
|key                    |value              |
|-----------------------|-------------------|
|key1                   |0.4898979485566356 |
|key2                   |0.4898979485566356 |
>>> analysis.statistictypes_printer(analysis.get_averages())
|key                    |value              |
|-----------------------|-------------------|
|key1                   |1.4                |
|key2                   |1.6                |
>>> analysis.valuetypes_printer(analysis.get_gt(analysis.valuetype(key="key1", value=0.5, counter=0)))
|key                    |value              |counter     |
|-----------------------|-------------------|------------|
|key1                   |1                  |3           |
|key1                   |2                  |2           |
|key2                   |2                  |3           |
|key2                   |1                  |2           |
>>> from DataAnalysis import PYPLOT
>>> if PYPLOT: analysis.statistictypes_chart(analysis.get_averages())
...
>>> for x in DataAnalysis.get_grouped_DataAnalysis(data, ("key1", "key2")): DataAnalysis.valuetypes_printer(x.get_values()); print()
...
|key                    |value              |counter     |
|-----------------------|-------------------|------------|
|key1                   |1                  |3           |
|key2                   |2                  |3           |
<BLANKLINE>
|key                    |value              |counter     |
|-----------------------|-------------------|------------|
|key1                   |2                  |2           |
|key2                   |1                  |2           |
<BLANKLINE>
>>> if PYPLOT: analysis.valuetypes_values_chart(analysis.get_all_values())
...
>>> if PYPLOT: analysis.valuetypes_counters_chart(analysis.get_values())
...
>>> import sys
>>> sys.modules["matplotlib"] = sys
>>> sys.modules["matplotlib.pyplot"] = sys
>>> from importlib import reload
>>> DataAnalysis = reload(sys.modules["DataAnalysis"]).DataAnalysis
>>> DataAnalysis.show_chart
Traceback (most recent call last):
  ...
AttributeError: type object 'DataAnalysis' has no attribute 'show_chart'
>>>
 
Run tests:
 ~# python -m doctest DataAnalysis.py
 ~# python DataAnalysis.py            # Verbose mode
 
1 items passed all tests:
  79 tests in __main__
79 tests in 65 items.
79 passed and 0 failed.
Test passed.
 
~# coverage run DataAnalysis.py
~# coverage report
Name              Stmts   Miss  Cover
-------------------------------------
DataAnalysis.py     290      0   100%
-------------------------------------
TOTAL               290      0   100%
~#
| Classes | ||||||||||
| 
 
 | ||||||||||
| Data | ||
| __all__ = ['DataAnalysis'] __author_email__ = 'mauricelambert434@gmail.com' __copyright__ = '\nPythonToolsKit Copyright (C) 2022 Maurice Lam...ome to redistribute it\nunder certain conditions.\n' __description__ = '\nThis package implements tools to build python package and tools.\n' __license__ = 'GPL-3.0 License' __maintainer__ = 'Maurice Lambert' __maintainer_email__ = 'mauricelambert434@gmail.com' __url__ = 'https://github.com/mauricelambert/PythonToolsKit' | ||
| Author | ||
| Maurice Lambert | ||