DataAnalysis (version 0.0.1) | index DataAnalysis.html |
This package implements tools to build python package and tools.
>>> from DataAnalysis import DataAnalysis
>>> from pprint import pprint
>>> data = [
... {
... "age": 45,
... "pay": 80000,
... "level": 12,
... },
... {
... "age": 18,
... "pay": 25000,
... "level": 3,
... },
... {
... "age": 45,
... "pay": 40000,
... "level": 3,
... }
... ]
>>> analyse = DataAnalysis(data)
>>> pprint([x for x in analyse.get_medians()])
[statistictype(key='age', value=45),
statistictype(key='pay', value=40000),
statistictype(key='level', value=3)]
>>> pprint([x for x in analyse.get_deviations()])
[statistictype(key='age', value=12.727922061357855),
statistictype(key='pay', value=23213.98046197353),
statistictype(key='level', value=4.242640687119285)]
>>> pprint([x for x in analyse.get_variances()])
[statistictype(key='age', value=243),
statistictype(key='pay', value=808333333.3333334),
statistictype(key='level', value=27)]
>>> pprint([x for x in analyse.get_averages()])
[statistictype(key='age', value=36.0),
statistictype(key='pay', value=48333.333333333336),
statistictype(key='level', value=6.0)]
>>> pprint([x for x in analyse.get_maximums()])
[statistictype(key='age', value=valuetype(key='age', value=45, counter=2)),
statistictype(key='pay', value=valuetype(key='pay', value=80000, counter=1)),
statistictype(key='level', value=valuetype(key='level', value=12, counter=1))]
>>> pprint([x for x in analyse.get_minimums()])
[statistictype(key='age', value=valuetype(key='age', value=18, counter=1)),
statistictype(key='pay', value=valuetype(key='pay', value=25000, counter=1)),
statistictype(key='level', value=valuetype(key='level', value=3, counter=2))]
>>> pprint([x for x in analyse.count_values_by_keys()])
[statistictype(key='age', value=2),
statistictype(key='pay', value=3),
statistictype(key='level', value=2)]
>>> analyse.count_values_by_key('level')
statistictype(key='level', value=2)
>>> pprint([x for x in analyse.sort_by_value()])
[valuetype(key='level', value=3, counter=2),
valuetype(key='level', value=12, counter=1),
valuetype(key='age', value=18, counter=1),
valuetype(key='age', value=45, counter=2),
valuetype(key='pay', value=25000, counter=1),
valuetype(key='pay', value=40000, counter=1),
valuetype(key='pay', value=80000, counter=1)]
>>> pprint([x for x in analyse.sort_values_by_sum()])
[valuetype(key='level', value=3, counter=2),
valuetype(key='level', value=12, counter=1),
valuetype(key='age', value=18, counter=1),
valuetype(key='age', value=45, counter=2),
valuetype(key='pay', value=25000, counter=1),
valuetype(key='pay', value=40000, counter=1),
valuetype(key='pay', value=80000, counter=1)]
>>> pprint([x for x in analyse.sort_keys_by_sum()])
[('pay', 145000), ('age', 108), ('level', 18)]
>>> pprint([x for x in analyse.sort_by_counter()])
[valuetype(key='age', value=18, counter=1),
valuetype(key='pay', value=80000, counter=1),
valuetype(key='pay', value=25000, counter=1),
valuetype(key='pay', value=40000, counter=1),
valuetype(key='level', value=12, counter=1),
valuetype(key='age', value=45, counter=2),
valuetype(key='level', value=3, counter=2)]
>>> pprint([x for x in analyse.sort_by_key()])
[valuetype(key='age', value=45, counter=2),
valuetype(key='age', value=18, counter=1),
valuetype(key='level', value=12, counter=1),
valuetype(key='level', value=3, counter=2),
valuetype(key='pay', value=80000, counter=1),
valuetype(key='pay', value=25000, counter=1),
valuetype(key='pay', value=40000, counter=1)]
>>> pprint([x for x in analyse.keys_frequences()])
[statistictype(key='age', value=0.07441809186500006),
statistictype(key='pay', value=99.91317889282416),
statistictype(key='level', value=0.012403015310833345)]
>>> pprint([x for x in analyse.keys_values_frequences()])
[statistictype(key=valuetype(key='age', value=45, counter=2), value=0.06201507655416673),
statistictype(key=valuetype(key='age', value=18, counter=1), value=0.012403015310833345),
statistictype(key=valuetype(key='pay', value=80000, counter=1), value=55.12451249259265),
statistictype(key=valuetype(key='pay', value=25000, counter=1), value=17.226410153935202),
statistictype(key=valuetype(key='pay', value=40000, counter=1), value=27.562256246296325),
statistictype(key=valuetype(key='level', value=12, counter=1), value=0.008268676873888896),
statistictype(key=valuetype(key='level', value=3, counter=2), value=0.004134338436944448)]
>>> pprint([x for x in analyse.keys_values_count_frequences()])
[statistictype(key=valuetype(key='age', value=45, counter=2), value=22.22222222222222),
statistictype(key=valuetype(key='age', value=18, counter=1), value=11.11111111111111),
statistictype(key=valuetype(key='pay', value=80000, counter=1), value=11.11111111111111),
statistictype(key=valuetype(key='pay', value=25000, counter=1), value=11.11111111111111),
statistictype(key=valuetype(key='pay', value=40000, counter=1), value=11.11111111111111),
statistictype(key=valuetype(key='level', value=12, counter=1), value=11.11111111111111),
statistictype(key=valuetype(key='level', value=3, counter=2), value=22.22222222222222)]
>>> pprint([x for x in analyse.values_frequences()])
[statistictype(key=45, value=22.22222222222222),
statistictype(key=18, value=11.11111111111111),
statistictype(key=80000, value=11.11111111111111),
statistictype(key=25000, value=11.11111111111111),
statistictype(key=40000, value=11.11111111111111),
statistictype(key=12, value=11.11111111111111),
statistictype(key=3, value=22.22222222222222)]
>>> pprint([x for x in analyse.value_frequence(45)])
[45, 22.22222222222222]
>>> pprint([x for x in analyse.key_value_count_frequence(analyse.valuetype(key='pay', value=80000, counter=1))])
[valuetype(key='pay', value=80000, counter=1), 11.11111111111111]
>>> pprint([x for x in analyse.key_value_frequence(analyse.valuetype(key='pay', value=80000, counter=1))])
[valuetype(key='pay', value=80000, counter=1), 55.12451249259265]
>>> pprint([x for x in analyse.key_frequence('pay')])
['pay', 99.91317889282416]
>>> statistictypes = [DataAnalysis.statistictype(key=45, value=22.22222222222222),
... DataAnalysis.statistictype(key=18, value=11.11111111111111),
... DataAnalysis.statistictype(key=80000, value=11.11111111111111),
... DataAnalysis.statistictype(key=25000, value=11.11111111111111),
... DataAnalysis.statistictype(key=40000, value=11.11111111111111),
... DataAnalysis.statistictype(key=12, value=11.11111111111111),
... DataAnalysis.statistictype(key=3, value=22.22222222222222)]
>>> pprint([x for x in DataAnalysis.sort_statistictype_by_key(statistictypes)])
[statistictype(key=3, value=22.22222222222222),
statistictype(key=12, value=11.11111111111111),
statistictype(key=18, value=11.11111111111111),
statistictype(key=45, value=22.22222222222222),
statistictype(key=25000, value=11.11111111111111),
statistictype(key=40000, value=11.11111111111111),
statistictype(key=80000, value=11.11111111111111)]
>>> pprint([x for x in DataAnalysis.sort_statistictype_by_value(statistictypes)])
[statistictype(key=18, value=11.11111111111111),
statistictype(key=80000, value=11.11111111111111),
statistictype(key=25000, value=11.11111111111111),
statistictype(key=40000, value=11.11111111111111),
statistictype(key=12, value=11.11111111111111),
statistictype(key=45, value=22.22222222222222),
statistictype(key=3, value=22.22222222222222)]
>>> pprint([x for x in DataAnalysis.sort_dict_by_value({"a": 2, "b": 1, "c": 3})])
['b', 'a', 'c']
>>> pprint([x for x in analyse.get_values_by_key('pay')])
[valuetype(key='pay', value=80000, counter=1),
valuetype(key='pay', value=25000, counter=1),
valuetype(key='pay', value=40000, counter=1)]
>>> pprint([x for x in analyse.get_gt(DataAnalysis.valuetype(key='level', value=3, counter=2))])
[valuetype(key='age', value=45, counter=2),
valuetype(key='age', value=18, counter=1),
valuetype(key='pay', value=80000, counter=1),
valuetype(key='pay', value=25000, counter=1),
valuetype(key='pay', value=40000, counter=1),
valuetype(key='level', value=12, counter=1),
None]
>>> pprint([x for x in analyse.get_lt(DataAnalysis.valuetype(key='level', value=3, counter=2))])
[None, None, None, None, None, None, None]
>>> pprint([x for x in analyse.get_lt(DataAnalysis.valuetype(key='pay', value=25000, counter=1))])
[valuetype(key='age', value=45, counter=2),
valuetype(key='age', value=18, counter=1),
None,
None,
None,
valuetype(key='level', value=12, counter=1),
valuetype(key='level', value=3, counter=2)]
>>> analyse.count_lt(DataAnalysis.valuetype(key='pay', value=25000, counter=1))
6
>>> analyse.count_gt(DataAnalysis.valuetype(key='pay', value=25000, counter=1))
2
>>> analyse.count_value('level', 3)
valuetype(key='level', value=3, counter=2)
>>> analyse.get_minimum('level')
statistictype(key='level', value=valuetype(key='level', value=3, counter=2))
>>> analyse.get_maximum('level')
statistictype(key='level', value=valuetype(key='level', value=12, counter=1))
>>> analyse.get_sum('level')
statistictype(key='level', value=18)
>>> analyse.get_average('level')
statistictype(key='level', value=6.0)
>>> analyse.get_variance('level')
statistictype(key='level', value=27)
>>> analyse.get_deviation('level')
statistictype(key='level', value=4.242640687119285)
>>> analyse.get_median('level')
statistictype(key='level', value=3)
>>> data = [
... {
... "filename": "__init__.py",
... "size": 255,
... "lines": 5,
... "modification": datetime.now(),
... },
... {
... "filename": "WebScripts.py",
... "size": 256520,
... "lines": 3214,
... "modification": datetime(2016, 6, 22, 12, 25, 48),
... },
... {
... "filename": "future_python_file.py",
... },
... ]
>>> analyse = DataAnalysis(data)
>>> pprint([x for x in analyse.get_sums()])
[statistictype(key='filename', value=None),
statistictype(key='size', value=256775),
statistictype(key='lines', value=3219),
statistictype(key='modification', value=None)]
>>> analyse = DataAnalysis(data, fields=['size', 'lines', 'modification'])
>>> pprint([x for x in analyse.get_sums()])
[statistictype(key='size', value=256775),
statistictype(key='lines', value=3219),
statistictype(key='modification', value=None)]
>>> analyse = DataAnalysis(data, filter_=lambda x: x.get("size"))
>>> len(analyse.keys['filename'])
2
>>> analyse = DataAnalysis(data)
>>> len(analyse.keys['filename'])
3
>>> data = (
... (1,2,3),
... (500, 412, 561),
... (721, 216, 683),
... (10,25,56),
... )
>>> analyse = DataAnalysis(data)
>>> len(analyse.keys[0])
4
>>> analyse = DataAnalysis(data, filters={0: lambda x: x < 100})
>>> len(analyse.keys[0])
2
>>> analyse = DataAnalysis(data, fields=[1,3])
>>> len(analyse.keys[0])
0
>>> pprint([x for x in analyse.get_all_values()])
[valuetype(key=1, value=2, counter=1),
valuetype(key=1, value=412, counter=1),
valuetype(key=1, value=216, counter=1),
valuetype(key=1, value=25, counter=1)]
>>> data = [{"key1": 1, "key2": 2}] * 3 + [{"key1": 2, "key2": 1}] * 2
>>> DataAnalysis.print_data(data, {"key1": "Column name", "key2": 5}, True)
|Column name|key2 |
|-----------|-----|
|1 |2 |
|1 |2 |
|1 |2 |
|1 |2 |
|2 |1 |
|2 |1 |
>>> DataAnalysis.print_data(data, {"key1": "Column name", "key2": 2}, True)
|Column name|ke|
|-----------|--|
|1 |2 |
|1 |2 |
|1 |2 |
|1 |2 |
|2 |1 |
|2 |1 |
>>> DataAnalysis.print_data(data)
|key1|key2|
|----|----|
|1 |2 |
|1 |2 |
|1 |2 |
|1 |2 |
|2 |1 |
|2 |1 |
>>> analysis = DataAnalysis(data)
>>> analysis.statistictypes_printer(analysis.get_deviations())
|key |value |
|-----------------------|-------------------|
|key1 |0.4898979485566356 |
|key2 |0.4898979485566356 |
>>> analysis.statistictypes_printer(analysis.get_averages())
|key |value |
|-----------------------|-------------------|
|key1 |1.4 |
|key2 |1.6 |
>>> analysis.valuetypes_printer(analysis.get_gt(analysis.valuetype(key="key1", value=0.5, counter=0)))
|key |value |counter |
|-----------------------|-------------------|------------|
|key1 |1 |3 |
|key1 |2 |2 |
|key2 |2 |3 |
|key2 |1 |2 |
>>> from DataAnalysis import PYPLOT
>>> if PYPLOT: analysis.statistictypes_chart(analysis.get_averages())
...
>>> for x in DataAnalysis.get_grouped_DataAnalysis(data, ("key1", "key2")): DataAnalysis.valuetypes_printer(x.get_values()); print()
...
|key |value |counter |
|-----------------------|-------------------|------------|
|key1 |1 |3 |
|key2 |2 |3 |
<BLANKLINE>
|key |value |counter |
|-----------------------|-------------------|------------|
|key1 |2 |2 |
|key2 |1 |2 |
<BLANKLINE>
>>> if PYPLOT: analysis.valuetypes_values_chart(analysis.get_all_values())
...
>>> if PYPLOT: analysis.valuetypes_counters_chart(analysis.get_values())
...
>>> import sys
>>> sys.modules["matplotlib"] = sys
>>> sys.modules["matplotlib.pyplot"] = sys
>>> from importlib import reload
>>> DataAnalysis = reload(sys.modules["DataAnalysis"]).DataAnalysis
>>> DataAnalysis.show_chart
Traceback (most recent call last):
...
AttributeError: type object 'DataAnalysis' has no attribute 'show_chart'
>>>
Run tests:
~# python -m doctest DataAnalysis.py
~# python DataAnalysis.py # Verbose mode
1 items passed all tests:
79 tests in __main__
79 tests in 65 items.
79 passed and 0 failed.
Test passed.
~# coverage run DataAnalysis.py
~# coverage report
Name Stmts Miss Cover
-------------------------------------
DataAnalysis.py 290 0 100%
-------------------------------------
TOTAL 290 0 100%
~#
Classes | ||||||||||
|
Data | ||
__all__ = ['DataAnalysis'] __author_email__ = 'mauricelambert434@gmail.com' __copyright__ = '\nPythonToolsKit Copyright (C) 2022 Maurice Lam...ome to redistribute it\nunder certain conditions.\n' __description__ = '\nThis package implements tools to build python package and tools.\n' __license__ = 'GPL-3.0 License' __maintainer__ = 'Maurice Lambert' __maintainer_email__ = 'mauricelambert434@gmail.com' __url__ = 'https://github.com/mauricelambert/PythonToolsKit' |
Author | ||
Maurice Lambert |