-
大小: 431KB文件類型: .zip金幣: 2下載: 1 次發布日期: 2021-06-17
- 語言: Python
- 標簽:
資源簡介
This is an open repo of all the best practices of writing PySpark that I have learnt from working with the Framework.

代碼片段和文件信息
import?glob
import?os
import?os.path?as?op
import?sys
from?shutil?import?copyfile
try:
????assert?sys.version_info.major?==?3
????assert?sys.version_info.minor?>?5
except?Assertionerror:
????raise?RuntimeError(‘converter?requires?Python?3.6+!‘)
basedir?=?op.abspath(op.dirname(__file__))
markdown_dir?=?op.join(basedir?‘src‘)
ipynb_dir?=?op.abspath(op.join(basedir?os.pardir?‘src‘))
os.system(f‘rm?-rf?{markdown_dir}/*/‘)??#?delete?chapter?folders?only
#?convert?ipynb?to?md
files_ipynb?=?glob.glob(f‘{ipynb_dir}/**/*.ipynb‘?recursive=True)
for?file_ipynb?in?files_ipynb:
????file_ipynb?=?op.abspath(file_ipynb)
????if?‘Random‘?in?file_ipynb:
????????continue
????file_md?=?file_ipynb.replace(‘src‘?‘gitbook/src‘)?\
????????.replace(‘.ipynb‘?‘.md‘)?\
????????.replace(‘(‘?‘<‘).replace(‘)‘?‘>‘).replace(‘?‘?‘‘)
????os.makedirs(op.dirname(file_md)?exist_ok=True)
????cmd?=?f‘jupyter?nbconvert?--to?markdown?“{file_ipynb}“?--output?“{file_md}“‘
????os.system(cmd)
#?copy?md?to?md
files_md?=?glob.glob(f‘{ipynb_dir}/**/*.md‘?recursive=True)
for?file_md?in?files_md:
????file_md?=?op.abspath(file_md)
????cp_file_md?=?file_md.replace(‘src‘?‘gitbook/src‘)
????os.makedirs(op.dirname(cp_file_md)?exist_ok=True)
????copyfile(file_md?cp_file_md)
style?=?“““\
yle?scoped>
????.dataframe?tbody?tr?th:only-of-type?{
????????vertical-align:?middle;
????}
????.dataframe?tbody?tr?th?{
????????vertical-align:?top;
????}
????.dataframe?thead?th?{
????????text-align:?right;
????}
yle>
“““
#?cleanup
files?=?glob.glob(f‘{markdown_dir}/**/*.md‘?recursive=True)
for?file?in?files:
????with?open(file?‘r‘)?as?f:
????????content?=?f.read()
????content_new?=?content.replace(style?‘‘)
????with?open(file?‘w‘)?as?f:
????????f.write(content_new)
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2019-06-02?20:48??spark-syntax-master\
?????目錄???????????0??2019-06-02?20:48??spark-syntax-master\.github\
?????目錄???????????0??2019-06-02?20:48??spark-syntax-master\.github\ISSUE_TEMPLATE\
?????文件?????????539??2019-06-02?20:48??spark-syntax-master\.github\ISSUE_TEMPLATE\feature-topic-request.md
?????文件????????1241??2019-06-02?20:48??spark-syntax-master\.gitignore
?????文件????????9605??2019-06-02?20:48??spark-syntax-master\README.md
?????目錄???????????0??2019-06-02?20:48??spark-syntax-master\gitbook\
?????文件?????????103??2019-06-02?20:48??spark-syntax-master\gitbook\.bookignore
?????文件?????????896??2019-06-02?20:48??spark-syntax-master\gitbook\Dockerfile
?????文件????????1388??2019-06-02?20:48??spark-syntax-master\gitbook\Makefile
?????文件?????????879??2019-06-02?20:48??spark-syntax-master\gitbook\Vagrantfile
?????文件?????????679??2019-06-02?20:48??spark-syntax-master\gitbook\book.json
?????文件????????1759??2019-06-02?20:48??spark-syntax-master\gitbook\convert-ipynb2markdown.py
?????文件????????4761??2019-06-02?20:48??spark-syntax-master\gitbook\gitbook-auto-summary.py
?????文件??????184686??2019-06-02?20:48??spark-syntax-master\gitbook\package-lock.json
?????文件????????1034??2019-06-02?20:48??spark-syntax-master\gitbook\package.json
?????目錄???????????0??2019-06-02?20:48??spark-syntax-master\gitbook\src\
?????目錄???????????0??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?1?-?Basics\
?????文件????????2725??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?1?-?Basics\Section?1?-?Useful?Material.md
?????文件????????2440??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?1?-?Basics\Section?2?-?Creating?your?First?Data?ob
?????文件????????3894??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?1?-?Basics\Section?3?-?Reading?your?First?Dataset.md
?????文件????????2712??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?1?-?Basics\Section?4?-?More?Comfortable?with?SQL.md
?????目錄???????????0??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?2?-?Exploring?the?Spark?APIs\
?????文件????????2696??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?2?-?Exploring?the?Spark?APIs\Section?1.1?-?Struct?Types.md
?????文件????????3256??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?2?-?Exploring?the?Spark?APIs\Section?1.2?-?Arrays?and?Lists.md
?????文件????????4492??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?2?-?Exploring?the?Spark?APIs\Section?1.3?-?Maps?and?Dictionaries.md
?????文件???????13512??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?2?-?Exploring?the?Spark?APIs\Section?1.4?-?Decimals?and?Why?did?my?Decimals?Overflow.md
?????文件????????2848??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?2?-?Exploring?the?Spark?APIs\Section?2?-?Performing?your?First?Transformations.md
?????文件????????5542??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?2?-?Exploring?the?Spark?APIs\Section?2.1?-?Looking?at?Your?Data.md
?????文件????????8493??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?2?-?Exploring?the?Spark?APIs\Section?2.10?-?Spark?Functions?aren‘t?Enough?I?Need?my?Own!.md
?????文件???????10720??2019-06-02?20:48??spark-syntax-master\gitbook\src\Chapter?2?-?Exploring?the?Spark?APIs\Section?2.11??-?Unionizing?Multiple?Datafr
............此處省略80個文件信息
評論
共有 條評論