資源簡介
高斯消去法(LU分解)并行算法:設計實現SSE算法,加速計算過程。包括代碼以及說明文檔。

代碼片段和文件信息
//?SSE_LU.cpp?:?定義控制臺應用程序的入口點。
//
#include?“stdafx.h“
#include?
#include?
#include?
#include?
#include?
#include?
#include?
#include?
#define?N?4096
using?namespace?std;
float**?serial(float?**A)?//************************************串行的高斯*******************************************
{
for?(int?k?=?0;?k? {
float?tmp?=A[k][k];
for?(int?j?=?k;?j? {
A[k][j]?=?A[k][j]?/?tmp;
}
for?(int?i?=?k?+?1;?i? {
float?tmp2?=?A[i][k];
for?(int?j?=?k?+?1;?j? {
A[i][j]?=?A[i][j]?-?tmp2?*?A[k][j];
}
A[i][k]?=?0;
}
}
return?A;
}
void?parallel(float?**A)?//************************************并行的高斯*****************************************
{
__m128?t1?t2?t3?t4;
for?(int?k?=?0;?k? {
float?tmp[4]?=?{?A[k][k]?A[k][k]?A[k][k]?A[k][k]?};
t1?=?_mm_loadu_ps(tmp);
for?(int?j?=?N?-?4;?j?>=?k;?j?-=?4)?//從后向前每次取四個
{
t2?=?_mm_loadu_ps(A[k]?+?j);
t3?=?_mm_div_ps(t2?t1);//除法
_mm_storeu_ps(A[k]?+?j?t3);
}
if?(k?%?4?!=?(N?%?4))?//處理不能被4整除的元素
{
for?(int?j?=?k;?j?%?4?!=?(?N%?4);?j++)
{
A[k][j]?=?A[k][j]?/?tmp[0];
}
}
for?(int?j?=?(N?%?4)?-?1;?j?>=?0;?j--)?
{????
A[k][j]?=?A[k][j]?/?tmp[0];
}
for?(int?i?=?k?+?1;?i? {
float?tmp[4]?=?{?A[i][k]?A[i][k]?A[i][k]?A[i][k]?};
t1?=?_mm_loadu_ps(tmp);
for?(int?j?=?N?-?4;?j?>k;?j?-=?4)
{
t2?=?_mm_loadu_ps(A[i]?+?j);
t3?=?_mm_loadu_ps(A[k]?+?j);
t4?=?_mm_sub_ps(t2?_mm_mul_ps(t1?t3));?//減法
_mm_storeu_ps(A[i]?+?j?t4);
}
for?(int?j?=?k?+?1;?j?%?4?!=?(N?%?4);?j++)
{
A[i][j]?=?A[i][j]?-?A[i][k]?*?A[k][j];
}
A[i][k]?=?0;
}
}
}
void?print(float?**Aint?num)?//**********************************打印矩陣*****************************************
{
for?(int?i?=?0;?i? {
for?(int?j?=?0;?j? {
cout?< }
cout?< }
}
int?_tmain()?//*************************************主函數入口********************************************
{
srand((unsigned)time(NULL));?//生成種子?
float?**A?=?new?float*[N];
float?**A2?=?new?float*[N];
for?(int?i?=?0;?i {
A[i]?=?new?float[N];
A2[i]?=?A[i];
}
cout?<“*************生成初始隨機矩陣************“?< for?(int?i?=?0;?i?1;?i++)
{
for?(int?j?=?0;?j? {
A[i][j]?=?rand()?%?90;?//取值為1-90之間
}
}
print(A10);
cout?<“***************串行的高斯**************“?< clock_t??clockBegin?clockEnd;
clockBegin?=?clock();?//開始計時
float?**B?=?serial(A);
clockEnd?=?clock();
print(A10);
cout?<“總共耗時:?“?<
cout?<“***************并行的高斯**************“?< clockBegin?=?clock();?//開始計時
parallel(A2);
clockEnd?=?clock();
print(A210);
cout?<“總共耗時:?“?< return?
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件?????156783??2016-07-19?14:34??高斯消去法SSE并行化.docx
?????文件??????74752??2016-03-29?17:25??SSE_LU\Debug\SSE_LU.exe
?????文件?????708952??2016-03-29?17:25??SSE_LU\Debug\SSE_LU.ilk
?????文件????1298432??2016-03-29?17:25??SSE_LU\Debug\SSE_LU.pdb
?????文件????3080192??2016-03-29?17:25??SSE_LU\ipch\sse_lu-5967a1ae\sse_lu-b3fb2d97.ipch
?????文件???????1735??2016-03-29?17:25??SSE_LU\SSE_LU\Debug\SSE_LU.log
?????文件?????165993??2016-03-29?17:25??SSE_LU\SSE_LU\Debug\SSE_LU.obj
?????文件????1703936??2016-03-29?15:11??SSE_LU\SSE_LU\Debug\SSE_LU.pch
?????文件???????1374??2016-03-29?17:25??SSE_LU\SSE_LU\Debug\SSE_LU.tlog\cl.command.1.tlog
?????文件??????30640??2016-03-29?17:25??SSE_LU\SSE_LU\Debug\SSE_LU.tlog\CL.read.1.tlog
?????文件????????910??2016-03-29?17:25??SSE_LU\SSE_LU\Debug\SSE_LU.tlog\CL.write.1.tlog
?????文件???????1208??2016-03-29?17:25??SSE_LU\SSE_LU\Debug\SSE_LU.tlog\li
?????文件???????2738??2016-03-29?17:25??SSE_LU\SSE_LU\Debug\SSE_LU.tlog\li
?????文件????????480??2016-03-29?17:25??SSE_LU\SSE_LU\Debug\SSE_LU.tlog\li
?????文件????????188??2016-03-29?17:25??SSE_LU\SSE_LU\Debug\SSE_LU.tlog\SSE_LU.lastbuildstate
?????文件??????11920??2016-03-29?15:11??SSE_LU\SSE_LU\Debug\stdafx.obj
?????文件?????764928??2016-03-29?17:25??SSE_LU\SSE_LU\Debug\vc120.idb
?????文件?????487424??2016-03-29?17:25??SSE_LU\SSE_LU\Debug\vc120.pdb
?????文件???????1503??2016-03-29?14:05??SSE_LU\SSE_LU\ReadMe.txt
?????文件???????3110??2016-03-29?17:25??SSE_LU\SSE_LU\SSE_LU.cpp
?????文件???????4531??2016-03-29?14:05??SSE_LU\SSE_LU\SSE_LU.vcxproj
?????文件???????1313??2016-03-29?14:05??SSE_LU\SSE_LU\SSE_LU.vcxproj.filters
?????文件????????212??2016-03-29?14:05??SSE_LU\SSE_LU\stdafx.cpp
?????文件????????234??2016-03-29?14:05??SSE_LU\SSE_LU\stdafx.h
?????文件????????236??2016-03-29?14:05??SSE_LU\SSE_LU\targetver.h
?????文件???33619968??2016-03-29?17:34??SSE_LU\SSE_LU.sdf
?????文件????????964??2016-03-29?14:05??SSE_LU\SSE_LU.sln
????..A..H.?????13824??2016-03-29?17:34??SSE_LU\SSE_LU.v12.suo
?????目錄??????????0??2016-03-29?17:25??SSE_LU\SSE_LU\Debug\SSE_LU.tlog
?????目錄??????????0??2016-03-29?14:05??SSE_LU\ipch\sse_lu-5967a1ae
............此處省略8個文件信息
評論
共有 條評論